In [1]:
#Import dependants
import pandas as pd
from pathlib import Path
import requests
import json
from pprint import pprint

from api_keys import geoapify_key

In [2]:
# load and read the CSVs
clean_2_path = Path("Resources/clean_2.csv")
clean_2_df = pd.read_csv(clean_2_path)

clean_2_df.head()

Unnamed: 0,School,Year,Date,Time,City,State,Killed,Injured,Longitude,Latitude
0,Columbine High School,1999,4/20/1999,11:19 AM,Littleton,Colorado,13,21,-105.075,39.60391
1,Scotlandville Middle School,1999,4/22/1999,12:30 PM,Baton Rouge,Louisiana,0,1,-91.169966,30.529958
2,Heritage High School,1999,5/20/1999,8:03 AM,Conyers,Georgia,0,6,-84.04796,33.626922
3,John Bartram High School,1999,10/4/1999,10:00 AM,Philadelphia,Pennsylvania,0,1,-75.234108,39.921509
4,Dorchester High School,1999,11/3/1999,7:40 AM,Boston,Massachusetts,0,1,-71.075901,42.285268


In [3]:
# Eliminate current "Longitude" and "Latitude" to make way for the new ones
clean_2_dropped = clean_2_df.drop(columns=["Longitude", "Latitude"], axis=1)
clean_2_dropped.head()

Unnamed: 0,School,Year,Date,Time,City,State,Killed,Injured
0,Columbine High School,1999,4/20/1999,11:19 AM,Littleton,Colorado,13,21
1,Scotlandville Middle School,1999,4/22/1999,12:30 PM,Baton Rouge,Louisiana,0,1
2,Heritage High School,1999,5/20/1999,8:03 AM,Conyers,Georgia,0,6
3,John Bartram High School,1999,10/4/1999,10:00 AM,Philadelphia,Pennsylvania,0,1
4,Dorchester High School,1999,11/3/1999,7:40 AM,Boston,Massachusetts,0,1


In [4]:
# Function to geocode a city and state and update the DataFrame
def geocode_and_update(city, state, api_key):
    query = f"{city}, {state}"
    url = f"https://api.geoapify.com/v1/geocode/search?text={query}&apiKey={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['features']:
            coordinates = data['features'][0]['geometry']['coordinates']
            return coordinates
    return None

# Example usage: Loop through cities and states in the DataFrame and geocode
api_key = geoapify_key
clean_2_dropped['Latitude'] = None  # Create new columns for latitude and longitude
clean_2_dropped['Longitude'] = None
for index, row in clean_2_df.iterrows():
    city = row['City']
    state = row['State']
    coordinates = geocode_and_update(city, state, api_key)
    if coordinates:
        clean_2_dropped.at[index, 'Latitude'] = coordinates[1]  # Assuming Geoapify returns latitude first
        clean_2_dropped.at[index, 'Longitude'] = coordinates[0]
        print(f"Geocoded {city}, {state}")
    else:
        print(f"Failed to geocode {city}, {state}")

# View DataFrame with "Latitude" and "Longitude" columns added
clean_2_dropped.head()

Geocoded Littleton, Colorado
Geocoded Baton Rouge, Louisiana
Geocoded Conyers, Georgia
Geocoded Philadelphia, Pennsylvania
Geocoded Boston, Massachusetts
Geocoded Deming, New Mexico
Geocoded Fort Gibson, Oklahoma
Geocoded New Port Richey, Florida
Geocoded Merced, California
Geocoded Chicago, Illinois
Geocoded Horn Lake, Mississippi
Geocoded Tecumseh, Oklahoma
Geocoded Mount Morris Township, Michigan
Geocoded Hugo, Oklahoma
Geocoded Sierra Vista, Arizona
Geocoded Lake Worth, Florida
Geocoded Renton, Washington
Geocoded Mount Healthy, Ohio
Geocoded New Orleans, Louisiana
Geocoded Oxnard, California
Geocoded Baltimore, Maryland
Geocoded Dallas, Texas
Geocoded Santee, California
Geocoded Williamsport, Pennsylvania
Geocoded El Cajon, California
Geocoded Gary, Indiana
Geocoded Monroe, Louisiana
Geocoded Ennis, Texas
Geocoded Los Angeles, California
Geocoded Covington, Kentucky
Geocoded Redondo Beach, California
Geocoded Caro, Michigan
Geocoded New York City, New York
Geocoded Los Angeles, Ca

Unnamed: 0,School,Year,Date,Time,City,State,Killed,Injured,Latitude,Longitude
0,Columbine High School,1999,4/20/1999,11:19 AM,Littleton,Colorado,13,21,39.613321,-105.016649
1,Scotlandville Middle School,1999,4/22/1999,12:30 PM,Baton Rouge,Louisiana,0,1,30.449416,-91.186966
2,Heritage High School,1999,5/20/1999,8:03 AM,Conyers,Georgia,0,6,33.66761,-84.01769
3,John Bartram High School,1999,10/4/1999,10:00 AM,Philadelphia,Pennsylvania,0,1,39.952724,-75.163526
4,Dorchester High School,1999,11/3/1999,7:40 AM,Boston,Massachusetts,0,1,42.355433,-71.060511


In [5]:
# Save the updated DataFrame to a new CSV file
clean_2_dropped.to_csv('Outputs/clean_2_updated.csv', index=False)
