In [1]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import numpy as np
import pandas as pd
import datetime as dt
import gmaps
import gmaps.geojson_geometries

import sqlalchemy as sqlalchemy_package
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import relationship
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.declarative import DeferredReflection
from sqlalchemy import *

import requests
import pickle
import config as creds
from urllib.parse import urlencode, urlparse, parse_qsl

from my_func import GoogleMapClient
# Import my GoogleMap object with following methods:
        # extract_lat_lng(location)
        # search(keyword, radius = 1000, location=None)
        # detail(place_id, fields=["name", "rating", "formatted_phone_number", "formatted_address"])

In [2]:
# Set up a connection to the postgres server
DATABASE_URL = f"postgres://{creds.PGUSER}:{creds.PGPASSWORD}@{creds.PGHOST}:5433/{creds.PGDATABASE}"
engine = sqlalchemy_package.create_engine(DATABASE_URL)

# US Energy Information Administration API Query Browser, Open Data Source
class EIA_Client():
    api_key=None
    def __init__(self, api_key=creds.eai_key, data='category', *args, **kwargs):
        super().__init__(*args, **kwargs)
        if api_key == None:
            raise Exception('Api key is required')
        self.api_key = api_key
        self. eia_url = f"http://api.eia.gov/{data}/"
    def category(self, cat = 1293027):
        params = {'api_key': self.api_key, 'category_id':cat}
        params_url = urlencode(params)
        url = f"{self.eia_url}?{params_url}"
        response = requests.get(url).json()
        return response
    def series(self, ser = 'INTL.55-1-AFG-TBPD.M'):
        params = {'api_key': self.api_key, 'series_id':ser}
        params_url = urlencode(params)
        url = f"{self.eia_url}?{params_url}"
        response = requests.get(url).json()
        return response
eia_client = EIA_Client()

## Define Schema and Tables for States Data

In [3]:
Base = declarative_base()

# Creating Schema States
class States(Base):
    __tablename__ = "states"
    state_code = Column(String, primary_key=True)
    state = Column(String)
    state_lat = Column(Float)
    state_lng = Column(Float)
    state_refineries = relationship('Refineries', backref = 'states')
    state_ports = relationship('Ports', backref = 'states')

class Ports(Base):
    __tablename__ = "ports"
    port_eia_id = Column(Integer, primary_key=True)
    state_code = Column(String, ForeignKey('states.state_code'))
    port_name = Column(String)
    port_lat = Column(Float)
    port_lng = Column(Float)

class Refineries(Base):
    __tablename__ = "refineries"
    ref_eia_id = Column(Integer, primary_key=True)
    state_code = Column(String, ForeignKey('states.state_code'))
    refinery_name = Column(String)
    refinery_place = Column(String)
    refinery_lat = Column(Float)
    refinery_lng = Column(Float)
    
States.__table__.create(bind=engine, checkfirst=True)
Ports.__table__.create(bind=engine, checkfirst=True)
Refineries.__table__.create(bind=engine, checkfirst=True)

## Clean and Transform States Data

In [4]:
# Transform Nested JSON from API request into a list of dictionaries
# Ports Data
ports_json = eia_client.category()['category']['childcategories']
ports=[]
for result in ports_json:
    row = {}
    port_id, port_full_name = result.get('category_id'), result.get('name')
    port_full_name = port_full_name.split(", ", 2)
    port_name = port_full_name[0]
    state_code = port_full_name[1]
    client = GoogleMapClient(api_key = creds.g_key, address_or_postal_code = f"{port_name}, {state_code}")
    lat, lng = client.extract_lat_lng()
    row['port_eia_id'] = port_id
    row['state_code'] = state_code 
    row['port_name'] = port_name
    row['port_lat'] = lat
    row['port_lng'] = lng
    ports.append(row)
    num_of_ports = len(ports)
    if num_of_ports % 10 == 0: 
            print('Processing records: appended ', num_of_ports,' of ports')

Processing records: appended  10  of ports
Processing records: appended  20  of ports
Processing records: appended  30  of ports
Processing records: appended  40  of ports
Processing records: appended  50  of ports
Processing records: appended  60  of ports
Processing records: appended  70  of ports
Processing records: appended  80  of ports
Processing records: appended  90  of ports
Processing records: appended  100  of ports
Processing records: appended  110  of ports


In [5]:
# Refineries Data   
refinery_json = eia_client.category(cat=1292739)['category']['childcategories']
refineries = []
for result in refinery_json:
    row = {}
    refinery_id, refinery_full_name = result.get('category_id'), result.get('name')
    refinery_full_name = refinery_full_name.split(" / ", 3)
    refinery_name = refinery_full_name[0]
    refinery_place = refinery_full_name[1]
    state_code = refinery_full_name[2]
    client = GoogleMapClient(api_key = creds.g_key, address_or_postal_code = f"{refinery_full_name}")
    lat, lng = client.extract_lat_lng()
    if state_code == 'SUGARLAND': state_code = 'TX'
    if state_code == 'WILMINGTON': state_code = 'CA'
    row['ref_eia_id'] = refinery_id
    row['state_code'] = state_code
    row['refinery_name'] = refinery_name
    row['refinery_place'] = refinery_place
    row['refinery_lat'] = lat
    row['refinery_lng'] = lng 
    if 'UNKNOWN' not in refinery_name: refineries.append(row)
    num_of_refineries = len(ports)
    if num_of_refineries % 30 == 0: 
            print('Processing records: appended ', num_of_refineries,' of refineries')

In [7]:
# Transform States Data from CSV
states_df = pd.read_csv('../resources/us_states.csv')
states =[]
for result in states_df.itertuples():
    row = {}
    row['state_code'] = result.State
    row['state'] = result.City
    row['state_lat'] = round(result.Latitude, 4)
    row['state_lng'] = round(result.Longitude, 4)
    states.append(row)
virgin_islands = {'state_code': 'VI', 'state': 'US Virgin Islands',
                  'state_lat': 18.3434, 'state_lng': -64.8672}
states.append(virgin_islands)

## Load Data into the Database via Session API

In [9]:
Session = sessionmaker(bind=engine)
session = Session()

# Uploading states
for state in states:
    row = States(**state)
    session.add(row)

# Uploading ports
for port in ports:
    row = Ports(**port)
    session.add(row)

# Uploading refineries
for refinery in refineries:
    row = Refineries(**refinery)
    session.add(row)
    
session.commit()

IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "states_pkey"
DETAIL:  Key (state_code)=(AL) already exists.

[SQL: INSERT INTO states (state_code, state, state_lat, state_lng) VALUES (%(state_code)s, %(state)s, %(state_lat)s, %(state_lng)s)]
[parameters: ({'state_code': 'AL', 'state': 'Alabama', 'state_lat': 32.601, 'state_lng': -86.6807}, {'state_code': 'AK', 'state': 'Alaska', 'state_lat': 61.3025, 'state_lng': -158.775}, {'state_code': 'AZ', 'state': 'Arizona', 'state_lat': 34.1682, 'state_lng': -111.9309}, {'state_code': 'AR', 'state': 'Arkansas', 'state_lat': 34.7519, 'state_lng': -92.1314}, {'state_code': 'CA', 'state': 'California', 'state_lat': 37.2719, 'state_lng': -119.2704}, {'state_code': 'CO', 'state': 'Colorado', 'state_lat': 38.9979, 'state_lng': -105.5506}, {'state_code': 'CT', 'state': 'Connecticut', 'state_lat': 41.5188, 'state_lng': -72.7575}, {'state_code': 'DE', 'state': 'Delaware', 'state_lat': 39.1453, 'state_lng': -75.4189}  ... displaying 10 of 52 total bound parameter sets ...  {'state_code': 'WY', 'state': 'Wyoming', 'state_lat': 43.0003, 'state_lng': -107.5546}, {'state_code': 'VI', 'state': 'US Virgin Islands', 'state_lat': 18.3434, 'state_lng': -64.8672})]
(Background on this error at: http://sqlalche.me/e/13/gkpj)