In [1]:
import pandas as pd

In [2]:
# Loading Lake Mead CSV file
lake_powell_load = "../Resources/Lake_Powell_Storage.csv"

# Reading the Lake Mead data
lake_powell_df = pd.read_csv(lake_powell_load)
lake_powell_df.head()


Unnamed: 0,datetime,storage
0,6/28/1963,0.0
1,6/29/1963,5400.0
2,6/30/1963,7400.0
3,7/1/1963,9400.0
4,7/2/1963,13400.0


In [3]:
# Get data types
print (lake_powell_df.dtypes)

datetime     object
storage     float64
dtype: object


In [4]:
# Convert the 'datetime' column to datetime format
lake_powell_df['datetime'] = pd.to_datetime(lake_powell_df['datetime'])
lake_powell_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21768 entries, 0 to 21767
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   datetime  21768 non-null  datetime64[ns]
 1   storage   21768 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 340.2 KB


In [5]:
# Extract month and year
lake_powell_df['year'] = lake_powell_df['datetime'].dt.year
lake_powell_df['month'] = lake_powell_df['datetime'].dt.month
lake_powell_df.head()

Unnamed: 0,datetime,storage,year,month
0,1963-06-28,0.0,1963,6
1,1963-06-29,5400.0,1963,6
2,1963-06-30,7400.0,1963,6
3,1963-07-01,9400.0,1963,7
4,1963-07-02,13400.0,1963,7


In [6]:
# Verify the 'datetime' conversion
lake_powell_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21768 entries, 0 to 21767
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   datetime  21768 non-null  datetime64[ns]
 1   storage   21768 non-null  float64       
 2   year      21768 non-null  int64         
 3   month     21768 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(2)
memory usage: 680.4 KB


In [7]:
# Change the 'month' from a number to actual abbreviated name
lake_powell_df['month'] = pd.to_datetime(lake_powell_df['month'], format='%m').dt.month_name().str.slice(stop=3)
lake_powell_df.head()

Unnamed: 0,datetime,storage,year,month
0,1963-06-28,0.0,1963,Jun
1,1963-06-29,5400.0,1963,Jun
2,1963-06-30,7400.0,1963,Jun
3,1963-07-01,9400.0,1963,Jul
4,1963-07-02,13400.0,1963,Jul


In [8]:
# Reorder the dataframe columns
new_lake_powell_df = lake_powell_df.iloc[:,[3,2,1,0]]
new_lake_powell_df.head()

Unnamed: 0,month,year,storage,datetime
0,Jun,1963,0.0,1963-06-28
1,Jun,1963,5400.0,1963-06-29
2,Jun,1963,7400.0,1963-06-30
3,Jul,1963,9400.0,1963-07-01
4,Jul,1963,13400.0,1963-07-02


In [9]:
# Drop 'datetime' column
new_lake_powell_df = new_lake_powell_df.drop(['datetime'], axis=1)
new_lake_powell_df.head(10)

Unnamed: 0,month,year,storage
0,Jun,1963,0.0
1,Jun,1963,5400.0
2,Jun,1963,7400.0
3,Jul,1963,9400.0
4,Jul,1963,13400.0
5,Jul,1963,17400.0
6,Jul,1963,17400.0
7,Jul,1963,20400.0
8,Jul,1963,20400.0
9,Jul,1963,20400.0


In [12]:
# For Lake Mead calculate the storage average by month and year
final_lake_powell_df = new_lake_powell_df.groupby(['year', 'month'], as_index=False).mean()

final_lake_powell_df.head(20)

Unnamed: 0,year,month,storage
0,1963,Aug,127722.6
1,1963,Dec,900064.5
2,1963,Jul,33174.19
3,1963,Jun,4266.667
4,1963,Nov,736400.0
5,1963,Oct,580271.0
6,1963,Sep,436433.3
7,1964,Apr,809686.7
8,1964,Aug,3957335.0
9,1964,Dec,4088474.0
