In [1]:
# Import pandas
import pandas as pd

In [2]:
FILE_PATHS = [r'dataset\2020 Annual Campaign - Archived.csv', r'dataset\2021 Annual Campaign - Archived.csv']

In [3]:
df_2020 = pd.read_csv(FILE_PATHS[0])
df_2021 = pd.read_csv(FILE_PATHS[1])
df_2020.drop(axis=1, columns=['Unnamed: 18'], inplace=True) # No idea why this column exists
df_2021.drop(axis=1, columns=['Unnamed: 18'], inplace=True)

In [4]:
df_2020 = df_2020[['State/Province', 'City', 'Donation Amount']]
df_2021 = df_2021[['State/Province', 'City', 'Donation Amount']]

In [5]:
df_2020.fillna('Unknown', inplace=True)
df_2020 = df_2020.applymap(lambda x: x.strip() if isinstance(x, str) else x) # strip everything

df_2021.fillna('Unknown', inplace=True)
df_2021 = df_2021.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [6]:
df_2020['City'] = df_2020['City'].apply(lambda x: x.lower())
df_2021['City'] = df_2021['City'].apply(lambda x: x.lower())

In [7]:
df_2020 = df_2020[(df_2020['City'] != 'unknown') & (df_2020['State/Province'] != 'Unknown')]
df_2021 = df_2021[(df_2021['City'] != 'unknown') & (df_2021['State/Province'] != 'Unknown')]

In [8]:
df_2020 = df_2020.groupby(['State/Province', 'City']).agg({'Donation Amount': 'sum'}).reset_index()
df_2021 = df_2021.groupby(['State/Province', 'City']).agg({'Donation Amount': 'sum'}).reset_index()

In [9]:
df_2020.head(5)

Unnamed: 0,State/Province,City,Donation Amount
0,CA,la mesa,26.06
1,CA,san francisco,100.0
2,DC,washington,50.0
3,FL,celebration,150.0
4,MN,richfield,275.0


In [10]:
df_2020[['lat', 'long']] = 0
df_2021[['lat', 'long']] = 0
df_2020.head(5)

Unnamed: 0,State/Province,City,Donation Amount,lat,long
0,CA,la mesa,26.06,0,0
1,CA,san francisco,100.0,0,0
2,DC,washington,50.0,0,0
3,FL,celebration,150.0,0,0
4,MN,richfield,275.0,0,0


In [11]:
df_2020.to_csv('intermediate_csvs/For asking AI/2020_unfilled_latlong.csv', index=False)
df_2021.to_csv('intermediate_csvs/For asking AI/2021_unfilled_latlong.csv', index=False)

In [12]:
# After asking AI
ai = pd.read_csv('intermediate_csvs/For asking AI/AI_response.csv')
ai.head(5)

Unnamed: 0,City,State/Province,lat,long
0,la mesa,CA,32.7695,-117.0203
1,san francisco,CA,37.773972,-122.431297
2,washington,DC,38.8951,-77.0364
3,celebration,FL,28.325289,-81.533127
4,richfield,MN,44.883244,-93.28624


In [13]:
df_2020 = df_2020.merge(ai, how='left', on='City')
df_2020.drop(axis=1, columns=['lat_x', 'long_x', 'State/Province_y'], inplace=True)
df_2020.rename(columns={'State/Province_x': 'State/Province', 'lat_y': 'lat', 'long_y': 'long'}, inplace=True)
df_2020.head(5)

Unnamed: 0,State/Province,City,Donation Amount,lat,long
0,CA,la mesa,26.06,32.7695,-117.0203
1,CA,san francisco,100.0,37.773972,-122.431297
2,DC,washington,50.0,38.8951,-77.0364
3,FL,celebration,150.0,28.325289,-81.533127
4,MN,richfield,275.0,44.883244,-93.28624


In [14]:
df_2021 = df_2021.merge(ai, how='left', on='City')
df_2021.drop(axis=1, columns=['lat_x', 'long_x', 'State/Province_y'], inplace=True)
df_2021.rename(columns={'State/Province_x': 'State/Province', 'lat_y': 'lat', 'long_y': 'long'}, inplace=True)
df_2021.head(5)

Unnamed: 0,State/Province,City,Donation Amount,lat,long
0,IL,willow springs,51.8,41.7359,-87.8778
1,MA,andover,150.0,42.6584,-71.137
2,MA,andover,150.0,45.2333,-93.2916
3,MA,boston,51.8,42.3601,-71.0589
4,MN,andover,51.8,42.6584,-71.137


In [15]:
df_2020.to_csv('intermediate_csvs/For asking AI/2020_loc_latlongai.csv', index=False)
df_2021.to_csv('intermediate_csvs/For asking AI/2021_loc_latlongai.csv', index=False)