In [None]:
#Import packages
import pandas as pd
%matplotlib inline

In [None]:
#Read the csv file, setting 'site_no' to string and 'datetime' as date time object 
df = pd.read_csv('./data/GageData.csv', 
                 dtype={'site_no':'str'},
                 parse_dates=['datetime']) 

Create new "month" and "year" columns in our dataframe

In [None]:
#Extract new month and year columns from the "datetime" column
df['month'] = df['datetime'].dt.month
df['year'] = df['datetime'].dt.year

Calculate "water year" - following year if in Oct, Nov, Dec

In [None]:
#Default water year is the current year
df['water_year'] = df['year']
#Update records in Oct, Nov, and Dec to year + 1
df.loc[df['month']>=10,'water_year'] = df['year'] + 1

In [None]:
#Examine the data
df.sample(5)

Compute a new column indicating records before Falls Lake dam construction (before 1980) and after completion of Falls Lake dam (1984 and onwards). 

In [None]:
#Step 1: Create masks base on year
beforeMask = df['year'] < 1980
afterMask = df['year'] >= 1984

In [None]:
#Step 2: Use the masks as "loc" row index filters, setting the [new] status column values
df.loc[beforeMask,'Status'] = "Before dam"
df.loc[afterMask,'Status'] = "After dam"

In [None]:
#Examine the data
df.sample(5)

### Plotting

In [None]:
#Create a basic plot of the MeanFlow_csf column
df['MeanFlow_cfs'].plot();

In [None]:
#Explicitl ySet which columns are used to set the X and Y axes
df.plot(x='datetime',y='MeanFlow_cfs');

In [None]:
#Alternatively, set the datetime column to be the dataframe's index
df2 = df.set_index('datetime')
df2.plot(y='MeanFlow_cfs');

In [None]:
#Repeat above, but add some "aesthetics"
df.plot(
    x='datetime',
    y='MeanFlow_cfs',
    kind='line',
    figsize=(10,5),#figure size
    lw=0.2,        #line weigth
    alpha=0.4,     #opacticty
    color='green', #color
    title='Neuse River near Clayton, NC',
);

In [None]:
#Changing the aesthetics alters the message...
df.plot(
    x='month',
    y='MeanFlow_cfs',
    kind='scatter', #Changed from line to point
    figsize=(10,5),
    lw=0.2,        
    alpha=0.1,      #Decreased the opacity
    color='blue',   
    title='Neuse River near Clayton, NC'
);

In [None]:
#Create a box and whiskers plot of our MeanFlow_cms values
df.boxplot(column='MeanFlow_cfs');

In [None]:
#Create a box and whiskers plot of our MeanFlow_cms values, broken by status
df.boxplot(column='MeanFlow_cfs',by='Status');

In [None]:
#Set aesthetics
import matplotlib.pyplot as plt
df.boxplot(column='MeanFlow_cfs',by='Status')
plt.ylabel("Mean flow (cfs)")
plt.title("Mean flow before and after Falls Lake")
plt.suptitle("");

In [None]:
#Create the canvas
plt.figure(figsize=(20,6))

#Add plot lines for the entire dataframe, then the two dataframe subsets
plt.plot(df2['MeanFlow_cfs'],color='grey',linewidth=0.1) 
plt.plot(df2.loc[df2['year']<1980,'MeanFlow_cfs'],color='green',alpha=0.5,linewidth=0.5)
plt.plot(df2.loc[df2['year']>=1984,'MeanFlow_cfs'],color='blue',alpha=0.5,linewidth=0.5)

#Add the aesthetics
plt.title("Neuse River Near Goldsboro, NC")
plt.ylabel("Discharge (cfs)")

plt.axvline(x='1979-12-31',color='red',ls='--')
plt.annotate('Construction begins',
             xy=('1980',16000),
             xytext=('1965',18000),
             arrowprops=dict(facecolor='orange',shrink=0.05))


plt.axvline(x='1984-01-01',color='red',ls='--')
plt.annotate('Dam completed',
             xy=('1984-01-01',16000),
             xytext=('1993',21000),
             arrowprops=dict(facecolor='orange',shrink=0.05))


#Show the plot
plt.show;