In [1]:
# Install haversine package for calculating distance between geometric coordinates
!pip install haversine



In [2]:
# Import dependencies
import pandas as pd
import haversine as hs
from haversine import Unit

# Read in csv containing county midpoint location data
county_locs = pd.read_csv('../cleaned_data/corrected_midpoints.csv', dtype={'GEOID': str})

# Read in csv containing nuclear power plant locations and generation amounts
nuke_locs = pd.read_csv('../cleaned_data/operating_reactors.csv')

In [3]:
county_locs.head()

Unnamed: 0,latitude,longitude,GEOID,County_State
0,32.53492,-86.642749,1001,"Autauga County, Alabama"
1,30.66097,-87.74984,1003,"Baldwin County, Alabama"
2,31.869603,-85.393197,1005,"Barbour County, Alabama"
3,32.998644,-87.126439,1007,"Bibb County, Alabama"
4,33.980867,-86.567371,1009,"Blount County, Alabama"


In [4]:
nuke_locs.head()

Unnamed: 0,plant_name,latitude,longitude,capacity_Mwe
0,Arkansas Nuclear One,35.311,-93.2351,1845.0
1,Beaver Valley Power Station,40.6219,-80.4336,1846.8
2,Braidwood Station,41.2435,-88.2286,2449.8
3,Browns Ferry Nuclear Plant,34.7042,-87.1189,3567.5
4,Brunswick Steam Electric Plant,33.9597,-78.0114,2003.2


In [5]:
# Define a function that takes in a location and iterates through all of the nuclear power plant locations
# to calculate distance to the given location using haversine.
# Use min() to find the closest nuclear plant to the given point, and return the name, capacity, 
# and distance for that nuclear plant.


def closest_plant(county_lat,county_lng):
    for index, row in nuke_locs.iterrows():
        loc1 = [county_lat, county_lng]
        loc2 = [row["latitude"], row["longitude"]]
        nuke_locs.loc[index,"dist_from_county"] = round(hs.haversine(loc1,loc2,unit=Unit.MILES))
    closest = pd.DataFrame(nuke_locs[nuke_locs.dist_from_county == nuke_locs.dist_from_county.min()])
    name = closest.iloc[0][0]
    distance = closest.iloc[0][4]
    capacity = closest.iloc[0][3]
    return name, distance, capacity

In [6]:
# Iterate through all counties in the list, calculating the closest nuclear plant for each one
# and storing the plant name, distance, and capacity in that county's row.

for index, row in county_locs.iterrows():
    county_lat = row["latitude"]
    county_lng = row["longitude"]
    [name, distance, capacity] = closest_plant(county_lat,county_lng)
    county_locs.loc[index,["closest_plant","distance","plant_capacity"]] = [name, distance, capacity]
county_locs.head()

Unnamed: 0,latitude,longitude,GEOID,County_State,closest_plant,distance,plant_capacity
0,32.53492,-86.642749,1001,"Autauga County, Alabama",Joseph M. Farley Nuclear Plant,128.0,1776.4
1,30.66097,-87.74984,1003,"Baldwin County, Alabama",Joseph M. Farley Nuclear Plant,161.0,1776.4
2,31.869603,-85.393197,1005,"Barbour County, Alabama",Joseph M. Farley Nuclear Plant,48.0,1776.4
3,32.998644,-87.126439,1007,"Bibb County, Alabama",Browns Ferry Nuclear Plant,118.0,3567.5
4,33.980867,-86.567371,1009,"Blount County, Alabama",Browns Ferry Nuclear Plant,59.0,3567.5


In [7]:
# Export the data to a csv file
county_locs.to_csv('../cleaned_data/closest_nuc_plant_in_each_county.csv',index=False)