In [1]:
import pandas as pd
import numpy as np
import json as js

In [2]:
temp_quarter_df = pd.read_csv('../SourceMaterial/Measures/Results/water_result.csv')
conductivity_quarter_df = pd.read_csv('../SourceMaterial/Measures/Results/conductivity_result.csv')
ph_quarter_df = pd.read_csv('../SourceMaterial/Measures/Results/ph_result.csv')

df_list = [temp_quarter_df, conductivity_quarter_df, ph_quarter_df]
name_list = ['temp', 'conductivity', 'ph']
quarter_list = df_list[0]['q'].unique()

In [3]:
temp_quarter_df.head()

Unnamed: 0.1,Unnamed: 0,huc,quarter,q,year,var,stdev,avg,max,min,max_time_diff,max_date,min_date
0,0,Totuskey Creek,Q1 2012,Q1,2012,18.94,4.352011,9.1,16.0,2.5,37,2019-12-19,2012-01-05
1,1,Totuskey Creek,Q2 2012,Q2,2012,5.0625,2.25,17.25,21.0,14.0,37,2019-12-19,2012-01-05
2,2,Totuskey Creek,Q3 2012,Q3,2012,4.888889,2.211083,23.166667,25.0,18.5,37,2019-12-19,2012-01-05
3,3,Totuskey Creek,Q4 2012,Q4,2012,20.333333,4.50925,11.5,20.5,7.0,37,2019-12-19,2012-01-05
4,4,Totuskey Creek,Q1 2013,Q1,2013,1.426649,1.194424,5.645833,9.0,4.5,37,2019-12-19,2012-01-05


In [4]:
conductivity_quarter_df.head()

Unnamed: 0.1,Unnamed: 0,huc,quarter,q,year,var,stdev,avg,max,min,max_time_diff,max_date,min_date
0,0,Totuskey Creek,Q1 2012,Q1,2012,18.94,4.352011,9.1,16.0,2.5,37,2019-12-19,2012-01-05
1,1,Totuskey Creek,Q2 2012,Q2,2012,5.0625,2.25,17.25,21.0,14.0,37,2019-12-19,2012-01-05
2,2,Totuskey Creek,Q3 2012,Q3,2012,4.888889,2.211083,23.166667,25.0,18.5,37,2019-12-19,2012-01-05
3,3,Totuskey Creek,Q4 2012,Q4,2012,20.333333,4.50925,11.5,20.5,7.0,37,2019-12-19,2012-01-05
4,4,Totuskey Creek,Q1 2013,Q1,2013,1.426649,1.194424,5.645833,9.0,4.5,37,2019-12-19,2012-01-05


In [5]:
# check each df for huc names, if a huc appears in one df but not another, drop that huc from the source df
# sort hucs
all_hucs = []
suitable_hucs = []
bad_hucs = []

for df in df_list:
    hucs = df['huc'].unique()
    for huc in hucs:
        all_hucs.append(huc)
for huc in all_hucs:
    if all_hucs.count(huc) == 3:
        if huc not in suitable_hucs:
            suitable_hucs.append(huc)
    elif all_hucs.count(huc) < 3:
        if huc not in bad_hucs:
            bad_hucs.append(huc)

for df in df_list:
    df = df[df['huc'].isin(suitable_hucs)]

In [6]:
print(f"Found {len(suitable_hucs)} suitable hucs, and {len(bad_hucs)} un-usable hucs")

Found 40 suitable hucs, and 6 un-usable hucs


In [7]:
result_list = []
index = 0
# for each measure
for df in df_list:
    avg_over_time_list = []
     # for each suitable huc
    for huc in suitable_hucs: 
        # for each quarter
        huc_df = df.loc[df['huc'] == huc]
        for quarter in quarter_list:
            # find avg change over time
            working_df = huc_df.loc[df['q'] == quarter]
            earliest_avg = working_df['avg'].loc[working_df['year']==working_df['year'].min()].values[0]
            final_avg = working_df['avg'].loc[working_df['year']==working_df['year'].max()].values[0]
            percent_change = (final_avg - earliest_avg)/earliest_avg
            lowest_avg = working_df['avg'].min()
            highest_avg = working_df['avg'].max()
            q_avg_result = {
                'huc': huc,
                'quarter': quarter,
                'percent_change': percent_change,
                'highest_avg': highest_avg,
                'lowest_avg': lowest_avg,
                'start': working_df['year'].min(),
                'end': working_df['year'].max()
            }
            avg_over_time_list.append(q_avg_result)
    result_df = pd.DataFrame(avg_over_time_list)
    result_df.to_csv(f"../SourceMaterial/Measures/Results/Final_Analytics/{name_list[index]}_percent_change.csv")
    result_list.append(result_df)
    index+=1

  from ipykernel import kernelapp as app


In [8]:
result_list[0]

Unnamed: 0,huc,quarter,percent_change,highest_avg,lowest_avg,start,end
0,Totuskey Creek,Q1,-0.118263,9.100000,5.583333,2012,2019
1,Totuskey Creek,Q2,0.037681,20.712500,15.425000,2012,2019
2,Totuskey Creek,Q3,0.153777,27.600000,23.166667,2012,2019
3,Totuskey Creek,Q4,0.079710,13.391304,11.291667,2012,2019
4,Little Totuskey Creek,Q1,-0.132653,9.800000,5.083333,2012,2019
...,...,...,...,...,...,...,...
155,Muddy Run-Susquehanna River,Q4,-0.146512,10.750000,7.633333,2015,2018
156,Fishing Creek-Susquehanna River,Q1,-0.218447,9.133333,2.823077,2016,2019
157,Fishing Creek-Susquehanna River,Q2,-0.070842,15.014737,13.575000,2016,2019
158,Fishing Creek-Susquehanna River,Q3,-0.051615,19.836364,18.300000,2016,2019


In [9]:
result_list[1]

Unnamed: 0,huc,quarter,percent_change,highest_avg,lowest_avg,start,end
0,Totuskey Creek,Q1,-0.118263,9.100000,5.583333,2012,2019
1,Totuskey Creek,Q2,0.037681,20.712500,6.707500,2012,2019
2,Totuskey Creek,Q3,0.153777,27.600000,6.776500,2012,2019
3,Totuskey Creek,Q4,0.079710,13.391304,6.595000,2012,2019
4,Little Totuskey Creek,Q1,-0.132653,9.800000,5.083333,2012,2019
...,...,...,...,...,...,...,...
155,Muddy Run-Susquehanna River,Q4,-0.146512,271.000000,7.197143,2015,2018
156,Fishing Creek-Susquehanna River,Q1,-0.218447,265.636364,2.823077,2016,2019
157,Fishing Creek-Susquehanna River,Q2,-0.070842,255.857143,7.147143,2016,2019
158,Fishing Creek-Susquehanna River,Q3,-0.051615,256.076923,7.132500,2016,2019


In [10]:
result_list[2]

Unnamed: 0,huc,quarter,percent_change,highest_avg,lowest_avg,start,end
0,Totuskey Creek,Q1,-0.118263,9.100000,5.583333,2012,2019
1,Totuskey Creek,Q2,0.037681,20.712500,6.707500,2012,2019
2,Totuskey Creek,Q3,0.153777,27.600000,6.776500,2012,2019
3,Totuskey Creek,Q4,0.079710,13.391304,6.595000,2012,2019
4,Little Totuskey Creek,Q1,-0.132653,9.800000,5.083333,2012,2019
...,...,...,...,...,...,...,...
155,Muddy Run-Susquehanna River,Q4,-0.146512,10.750000,7.197143,2015,2018
156,Fishing Creek-Susquehanna River,Q1,-0.218447,9.133333,2.823077,2016,2019
157,Fishing Creek-Susquehanna River,Q2,-0.070842,15.014737,7.147143,2016,2019
158,Fishing Creek-Susquehanna River,Q3,-0.051615,19.836364,7.132500,2016,2019
