# Salt Lake Multivariable Regression

In [2]:
import pandas as pd
import numpy as np


In [3]:
#open SaltLakeWaterElevation.xlsx
df = pd.read_excel('SaltLakeWaterElevation.xlsx')
df

Unnamed: 0,Dt,"Lake elevation, NGVD, ft (mean)"
0,10/18/1847,4199.80
1,10/19/1847,
2,10/20/1847,
3,10/21/1847,
4,10/22/1847,
...,...,...
64444,2024-03-27 00:00:00,4194.1
64445,2024-03-28 00:00:00,4194.2
64446,2024-03-29 00:00:00,4194.2
64447,2024-03-30 00:00:00,4194.2


In [4]:

file_path = 'SaltLakeWaterElevation.xlsx'
df_salt_lake = pd.read_excel(file_path)

# Convert 'Dt' column to datetime
df_salt_lake['Dt'] = pd.to_datetime(df_salt_lake['Dt'])

# Clean and convert 'Lake elevation, NGVD, ft (mean)' column to numeric
# Strip any trailing whitespace and replace empty strings with NaN
df_salt_lake['Lake elevation, NGVD, ft (mean)'] = df_salt_lake['Lake elevation, NGVD, ft (mean)'].str.strip().replace('', pd.NA)
df_salt_lake['Lake elevation, NGVD, ft (mean)'] = pd.to_numeric(df_salt_lake['Lake elevation, NGVD, ft (mean)'], errors='coerce')

# Calculate yearly averages
yearly_averages = df_salt_lake.groupby(df_salt_lake['Dt'].dt.year)['Lake elevation, NGVD, ft (mean)'].mean()

yearly_averages


Dt
1847    4199.800000
1848    4200.400000
1849    4200.500000
1850    4200.550000
1851    4201.300000
           ...     
2020    4193.559836
2021    4191.661370
2022    4190.047945
2023    4192.102466
2024    4193.308791
Name: Lake elevation, NGVD, ft (mean), Length: 178, dtype: float64

In [5]:
yearly_averages_df = yearly_averages.reset_index() 
yearly_averages_df.columns = ['year', 'average elevation'] 

yearly_averages_df.head(100)


Unnamed: 0,year,average elevation
0,1847,4199.800000
1,1848,4200.400000
2,1849,4200.500000
3,1850,4200.550000
4,1851,4201.300000
...,...,...
95,1942,4195.479167
96,1943,4195.425000
97,1944,4195.550000
98,1945,4195.620833


In [6]:
#save the yearly averages to a csv file as waterlevel_yearly_averages.csv
yearly_averages_df.to_csv('waterlevel_yearly_averages.csv', index=False)

In [43]:
#show me the biggest value of the average elevation and the year it happened
max_elevation = yearly_averages_df['average elevation'].max()
max_elevation_year = yearly_averages_df[yearly_averages_df['average elevation'] == max_elevation]['year'].values[0]
max_elevation, max_elevation_year

(4211.05, 1873)

In [44]:
#show me the smallest value of the average elevation and the year it happened
min_elevation = yearly_averages_df['average elevation'].min()
min_elevation_year = yearly_averages_df[yearly_averages_df['average elevation'] == min_elevation]['year'].values[0]
min_elevation, min_elevation_year

(4190.04794520548, 2022)

In [45]:
#show me the average elevation throughout the years
average_elevation = yearly_averages_df['average elevation'].mean()
average_elevation

4200.311450591144