In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [5]:
# Load the temperature data
file_path = 'final_temperature_data.csv'
df = pd.read_csv(file_path)

# Check the structure of the dataframe
print(df.head())

  county_name  state_id  Week 1 (2013)  Week 2 (2013)  Week 3 (2013)  \
0     Autauga         1     280.562622     288.842010     281.019053   
1     Baldwin         1     283.186765     291.629847     284.186692   
2     Barbour         1     282.083567     290.148231     284.864902   
3        Bibb         1     279.695063     288.398234     279.948275   
4      Blount         1     278.090549     286.715763     278.540562   

   Week 4 (2013)  Week 5 (2013)  Week 6 (2013)  Week 7 (2013)  Week 8 (2013)  \
0     283.691543     283.189447     285.475239     280.999300     283.736256   
1     287.163466     286.215749     288.865460     283.877893     287.485569   
2     284.801261     284.175462     286.447901     281.902147     285.091628   
3     283.121328     282.845102     284.854205     280.734786     283.384290   
4     280.142595     280.622356     282.888349     279.140631     281.312564   

   ...  Week 43 (2023)  Week 44 (2023)  Week 45 (2023)  Week 46 (2023)  \
0  ...      

In [6]:
# select the temperature columns for analysis
temperature_columns = [col for col in df.columns if 'Week' in col]

In [15]:
# now we can calculate the yearly average temperature for each county
# we can start by creating a new dataframe to store the yearly average temperature
yearly_avg_temp = pd.DataFrame()
yearly_avg_temp['county_name'] = df['county_name']
yearly_avg_temp['state_id'] = df['state_id']

for year in range(2013, 2024):
    year_cols = [col for col in temperature_columns if f'({year})' in col]
    yearly_avg_temp[f'avg_temp_{year}'] = df[year_cols].mean(axis=1)

# let's check the structure of the new dataframe
yearly_avg_temp.head()

Unnamed: 0,county_name,state_id,avg_temp_2013,avg_temp_2014,avg_temp_2015,avg_temp_2016,avg_temp_2017,avg_temp_2018,avg_temp_2019,avg_temp_2020,avg_temp_2021,avg_temp_2022,avg_temp_2023
0,Autauga,1,290.90237,290.597038,292.018782,292.667079,292.345222,291.777345,292.594105,292.249862,291.517589,291.711859,292.581212
1,Baldwin,1,292.652161,292.089295,293.697071,293.681256,293.619363,293.29509,293.910049,293.734038,292.878258,293.017565,294.369468
2,Barbour,1,291.239236,291.16706,292.651214,293.120926,292.723436,292.108261,293.091958,292.93933,292.157328,292.245464,292.893423
3,Bibb,1,290.534336,290.348639,291.669195,292.52547,292.108974,291.513839,292.329667,291.814021,291.247235,291.503114,292.38186
4,Blount,1,289.067362,288.972093,290.309957,291.272173,290.774257,290.368187,291.096258,290.446524,290.148208,290.236192,290.985789


In [20]:
# now we can calculate the temperature change over this time frame
yearly_avg_temp['temp_change_2013_2023'] = yearly_avg_temp['avg_temp_2023'] - yearly_avg_temp['avg_temp_2013']


# we can sort the counties by the temperature change to see which counties have experienced the most temperature change (warmer or cooler)
temp_change_sorted = yearly_avg_temp.sort_values(by='temp_change_2013_2023', ascending=False)

temp_change_sorted.head()


Unnamed: 0,county_name,state_id,avg_temp_2013,avg_temp_2014,avg_temp_2015,avg_temp_2016,avg_temp_2017,avg_temp_2018,avg_temp_2019,avg_temp_2020,avg_temp_2021,avg_temp_2022,avg_temp_2023,temp_change_2013_2023
1996,Cavalier,38,274.934917,275.28377,278.166821,278.615778,277.74854,276.598125,275.673176,278.002636,279.16643,275.756974,278.01428,3.079363
2034,Towner,38,275.031285,275.495342,278.341611,278.792439,277.79701,276.680784,275.781365,278.241253,279.279732,275.915436,278.084848,3.053563
3105,Washburn,55,278.093312,277.422869,280.25624,280.765702,279.798588,279.119414,278.579113,280.129881,281.003589,278.954749,281.137043,3.043731
3046,Burnett,55,278.472486,277.809513,280.713841,281.200479,280.264854,279.434177,278.878889,280.521184,281.385346,279.259859,281.508322,3.035836
2024,Renville,38,275.998149,276.504757,279.248085,279.778976,278.730663,277.708763,276.744335,279.375367,280.262922,277.213545,279.019422,3.021274


In [21]:
temp_change_sorted.tail()

Unnamed: 0,county_name,state_id,avg_temp_2013,avg_temp_2014,avg_temp_2015,avg_temp_2016,avg_temp_2017,avg_temp_2018,avg_temp_2019,avg_temp_2020,avg_temp_2021,avg_temp_2022,avg_temp_2023,temp_change_2013_2023
1758,Storey,32,284.011058,285.261154,284.914358,284.427187,284.296681,284.822153,283.404235,285.383136,285.292631,284.221035,282.953467,-1.057591
199,Inyo,6,289.14941,290.348913,289.797593,289.633812,289.93678,290.203612,288.288654,290.094517,290.092035,289.527691,288.050654,-1.098755
3120,Hot Springs,56,278.400858,278.037558,279.320897,279.235242,278.415917,278.162387,277.082303,278.649575,279.549878,278.19761,277.277615,-1.123243
1749,Esmeralda,32,284.951498,286.67823,286.071331,285.635542,285.835372,286.40626,284.36799,286.526406,286.062302,285.430829,283.806538,-1.14496
241,Ventura,6,288.873547,290.014849,289.519537,289.377326,289.680208,289.440502,288.161756,289.688585,289.409099,289.541269,287.683956,-1.189591


In [22]:
# now lets export this new dataframe to a csv file to use it in the next step
export_file_path = 'temperature_yearly_change_data.csv'
yearly_avg_temp.to_csv(export_file_path, index=False)

In [25]:
# now lets plot the cooling and warming counties
warming_counties = temp_change_sorted.head(3)['county_name'].tolist()
cooling_counties = temp_change_sorted.tail(3)['county_name'].tolist()

# put the warming and cooling counties in a single list
# Combine them into a single list
selected_counties = warming_counties + cooling_counties

In [24]:
print(f'warming counties: {warming_counties}')
print(f'cooling counties: {cooling_counties}')

warming counties: ['Cavalier', 'Towner', 'Washburn']
cooling counties: ['Hot Springs', 'Esmeralda', 'Ventura']
