# Prepare Illinois Energy Plant Data

In [None]:
# imports
import pandas as pd
import re

## Calculate Lat/Long Coordinates from ...N...W values from wikipedia

In [None]:
import re

def convertDMSToDD(degrees: str, minutes: str, seconds: str, direction: str):
	dd = float(degrees) + (float(minutes)/60) + (float(seconds)/(60*60))

	if direction == "S" or direction == "W":
		dd *= -1

	return dd

def castDMS(input: str):
	"""
	cast input from coord to decimal

	old regex: /\d+(\.\d+)?°(\d+(\.\d+)?′)?(\d+(\.\d+)?″)?[NESW]/
	"""
	parts = re.split("[^\d\w\.]+", input)

	if re.match("\d+(\.\d+)?°\d+(\.\d+)?′\d+(\.\d+)?″[NESW]", input):
		# 38°16′40.2″N
		return convertDMSToDD(parts[0], parts[1], parts[2], parts[3])
	elif re.match("\d+(\.\d+)?°\d+(\.\d+)?′[NESW]", input):
		# 38.204°N
		return convertDMSToDD(parts[0], parts[1], 0, parts[2])
	elif re.match("\d+(\.\d+)?°[NESW]", input):
		# 38°16.12N
		return convertDMSToDD(parts[0], 0, 0, parts[1])
	else:
		# it's something else, just return original input
		return input

In [None]:
csv_path = "../data/energy/illinois/natural_gas_plants.tsv"
df = pd.read_csv(csv_path, delimiter="\t")

if 'coordinates' in df:
    df['yCoordinate'] = df.apply(lambda row: castDMS(row.coordinates.split(' ')[0]), axis=1)
    df['xCoordinate'] = df.apply(lambda row: castDMS(row.coordinates.split(' ')[1]), axis=1)
    df.to_csv(csv_path, sep="\t", index=False)
else:
    print("'coordinates' column not found in the file")

## Get Lat/Long coordinates of plants without such information

In [76]:
# using geocoding API: https://console.cloud.google.com/apis/library/geocoding-backend.googleapis.com
import requests
import os
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
from time import sleep

load_dotenv(dotenv_path=Path('../.env'))
API_KEY = os.getenv('API_KEY')

In [80]:
def get_lat_long(address: str, api_key: str = API_KEY):
    api_response = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}'.format(address, api_key))
    api_response_dict = api_response.json()
    sleep(1)

    if api_response_dict['status'] == 'OK':
        latitude = api_response_dict['results'][0]['geometry']['location']['lat']
        longitude = api_response_dict['results'][0]['geometry']['location']['lng']
        return (latitude, longitude)

In [81]:
csv_path = "../data/energy/illinois/solar_plants.tsv"
solar_df = pd.read_csv(csv_path, delimiter="\t")
get_row_address = lambda row: f"{row['name']} solar plant {row['location']} illinois"

solar_df['yCoordinate'], solar_df['xCoordinate'] = zip(*solar_df.apply(lambda row: get_lat_long(get_row_address(row)), axis='columns'))

In [84]:
solar_df.to_csv('../data/energy/illinois/solar_plants.tsv', sep="\t", index=False)

In [85]:
csv_path = "../data/energy/illinois/wind_plants.tsv"
wind_df = pd.read_csv(csv_path, delimiter="\t")
get_row_address = lambda row: f"{row['name']} wind farm {row['location']} illinois"

wind_df['yCoordinate'], wind_df['xCoordinate'] = zip(*wind_df.apply(lambda row: get_lat_long(get_row_address(row)), axis='columns'))

In [None]:
wind_df.to_csv('../data/energy/illinois/wind_plants.tsv', sep="\t", index=False)