In [1]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set the color palette to a colorblind-friendly palette
sns.set_palette('colorblind')

In [2]:
def concat_weather_files(list_of_files=[]):
  for filename in os.listdir():
    search = re.search("^[0-9].+to+",filename)
    try:
      search[0]
      list_of_files.append(filename)
    except:
      print("ERROR: Not a weather file.")

  frames = [pd.read_csv(f) for f in list_of_files]
  return pd.concat(frames)

In [None]:
# concat weather files and rename 'name' to 'zipcode' to join with geo data zipcode
files = []
weather_df = concat_weather_files(files)
weather_df['zipcode'] = weather_df['name']
weather_df = weather_df.drop('name', axis=1)

In [4]:
# retrieve the geo data
try:
  geo_data = pd.DataFrame(pd.read_csv("geo-data.csv"))
except:
  print("ERROR: could not read in geo data.")

In [5]:
# gather the set of zipcodes we need from the geo data
weather_zips = list(set([str(v) for v in weather_df['zipcode'].values]))
geo_df = geo_data.loc[geo_data['zipcode'].isin(weather_zips)]

In [None]:
# check for nulls
geo_df.apply(lambda x: x.isnull().any())

In [None]:
# fill state null values by mapping there abbreviation to their full name
states = np.sort(list(set([str(v) for v in geo_df['state'].values])))[:-1]
states_abbr = np.sort(list(set([str(v) for v in geo_df['state_abbr'].values])))
state_identity = dict(zip(states_abbr,states))

geo_df.loc[geo_df['state'].isnull(), 'state'] = geo_df.loc[geo_df['state'].isnull(), 'state_abbr'].map(state_identity)
geo_df.apply(lambda x: x.isnull().any())

In [8]:
# prep for before merging
geo_df = geo_df.copy()
weather_df = weather_df.copy()

weather_df['zipcode'] = weather_df['zipcode'].astype("string")
geo_df['zipcode'] = geo_df['zipcode'].astype("string")

In [9]:
# merge into one
weather_geo_df = pd.merge(weather_df, geo_df, on='zipcode', how='inner')

In [None]:
# Stacked Bar Chart: For each month gather the max temperate for that month in each zip code
weather_geo_df['datetime'] = pd.to_datetime(weather_geo_df['datetime'])
weather_geo_df['Month'] = weather_geo_df['datetime'].dt.month

pivot_table = weather_geo_df.pivot_table(index=['Month', 'zipcode'], values='tempmax')
pivot_table.unstack().plot(kind='bar', stacked=True)

# Create Bar Chart
plt.xlabel('Month')
plt.ylabel('Max Temperature')
plt.title('Max Temperatures for Each Zip Code by Month')
plt.legend(labels=weather_zips, fontsize='7')
plt.show()

In [None]:
# Draw a line graph of average temp of each zip code for month
pivot_table = weather_geo_df.pivot_table(index=['Month', 'zipcode'], values='temp', aggfunc='mean')
pivot_table.reset_index(inplace=True)

# Create a line graph of average temp for each zip code
for zipcode, group in pivot_table.groupby('zipcode'):
    plt.plot(group['Month'], group['temp'], marker='o', label=f'Zip Code {zipcode}')

plt.xlabel('Month')
plt.ylabel('Average Temperature')
plt.title('Average Temperature for Each Zip Code by Month')
plt.xticks(range(1, 13))
plt.legend(title='Zip Code', fontsize='7')
plt.show()