# Geo Data of Imports by US Ports (October 2023)

In [1]:
import requests
import pandas as pd
import hvplot.pandas
from dotenv import load_dotenv
import os
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import time

load_dotenv()

True

In [2]:
api_key = os.getenv('CENSUS_API_KEY')

In [3]:
base_uri = 'https://api.census.gov/data/timeseries/intltrade/imports/porths'
fields = 'PORT,PORT_NAME,GEN_VAL_MO'
time = '2023-10'

r = requests.get(f'{base_uri}?get={fields}&time={time}&key={api_key}')
data = r.json()

us_ports_imports_by_month_df = pd.DataFrame(data)
us_ports_imports_by_month_df.head()

Unnamed: 0,0,1,2,3
0,PORT,PORT_NAME,GEN_VAL_MO,time
1,-,TOTAL FOR ALL PORTS,276809669615,2023-10
2,0104,"JACKMAN, ME",45330556,2023-10
3,0101,"PORTLAND, ME",206842205,2023-10
4,0102,"BANGOR, ME",172499706,2023-10


In [4]:
# Set columns and drop header + summary rows
us_ports_imports_by_month_df.columns = us_ports_imports_by_month_df.iloc[0]
us_ports_imports_by_month_df = us_ports_imports_by_month_df[2:].reset_index(drop=True)

us_ports_imports_by_month_df['GEN_VAL_MO'] = us_ports_imports_by_month_df['GEN_VAL_MO'].astype(float)

us_ports_imports_by_month_df.head()

Unnamed: 0,PORT,PORT_NAME,GEN_VAL_MO,time
0,104,"JACKMAN, ME",45330556.0,2023-10
1,101,"PORTLAND, ME",206842205.0,2023-10
2,102,"BANGOR, ME",172499706.0,2023-10
3,103,"EASTPORT, ME",75600.0,2023-10
4,105,"VANCEBORO, ME",35203342.0,2023-10


In [5]:
# Set Port as index
us_ports_imports_by_month_df.set_index('PORT', inplace=True)

us_ports_imports_by_month_df.head()

Unnamed: 0_level_0,PORT_NAME,GEN_VAL_MO,time
PORT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
104,"JACKMAN, ME",45330556.0,2023-10
101,"PORTLAND, ME",206842205.0,2023-10
102,"BANGOR, ME",172499706.0,2023-10
103,"EASTPORT, ME",75600.0,2023-10
105,"VANCEBORO, ME",35203342.0,2023-10


In [6]:
# Add series for lat and lon
us_ports_imports_by_month_df['LAT'] = None
us_ports_imports_by_month_df['LON'] = None

us_ports_imports_by_month_df.head()

Unnamed: 0_level_0,PORT_NAME,GEN_VAL_MO,time,LAT,LON
PORT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
104,"JACKMAN, ME",45330556.0,2023-10,,
101,"PORTLAND, ME",206842205.0,2023-10,,
102,"BANGOR, ME",172499706.0,2023-10,,
103,"EASTPORT, ME",75600.0,2023-10,,
105,"VANCEBORO, ME",35203342.0,2023-10,,


In [8]:
geolocator = Nominatim(user_agent="student_project", timeout=10)

for index in us_ports_imports_by_month_df.index:
    
    port = us_ports_imports_by_month_df.loc[index].PORT_NAME
    location = None
    try: location = geolocator.geocode(port)
    except GeocoderTimedOut: continue
    
    if (location and location.latitude):
        us_ports_imports_by_month_df.at[index, 'LAT'] = float(location.latitude)
    if (location and location.longitude):
        us_ports_imports_by_month_df.at[index, 'LON'] = float(location.longitude)

In [9]:
# Dropa any rows without go
us_ports_imports_by_month_df.dropna(inplace=True)

us_ports_imports_by_month_df.isna().any()

us_ports_imports_by_month_df.shape

(339, 5)

In [11]:
us_ports_imports_by_month_df.hvplot.points(
    'LON', 
    'LAT',
    tiles='OSM',
    geo=True,
    size='GEN_VAL_MO',
    scale=0.00015,
    # color='PORT_NAME',
    frame_width=800,
    frame_height=400,
    title='Total USD Value of Monthly Imports by US Port (October 2023)',
    hover_cols=['PORT_NAME', 'GEN_VAL_MO'],
)

![US Ports](./plots/geo_us_ports.png)

In [12]:
# Create two new columns by splitting port name
us_ports_imports_by_month_df[['city', 'state']] = us_ports_imports_by_month_df['PORT_NAME'].str.split(', ', n=1, expand=True)
us_ports_imports_by_month_df.dropna(inplace=True)

display(len(us_ports_imports_by_month_df))

us_ports_imports_by_month_df = us_ports_imports_by_month_df[us_ports_imports_by_month_df['state'].str.len() == 2]
display(len(us_ports_imports_by_month_df))

338

333

In [13]:
group_by_state = us_ports_imports_by_month_df.groupby('state')
group_by_state.head()

Unnamed: 0_level_0,PORT_NAME,GEN_VAL_MO,time,LAT,LON,city,state
PORT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0104,"JACKMAN, ME",4.533056e+07,2023-10,45.623763,-70.253983,JACKMAN,ME
0101,"PORTLAND, ME",2.068422e+08,2023-10,43.658974,-70.256958,PORTLAND,ME
0102,"BANGOR, ME",1.724997e+08,2023-10,44.801626,-68.771329,BANGOR,ME
0103,"EASTPORT, ME",7.560000e+04,2023-10,44.905057,-66.984564,EASTPORT,ME
0105,"VANCEBORO, ME",3.520334e+07,2023-10,45.5635,-67.429766,VANCEBORO,ME
...,...,...,...,...,...,...,...
5104,"CHRISTIANSTED, VI",3.361112e+07,2023-10,17.743948,-64.707982,CHRISTIANSTED,VI
5401,"WASHINGTON, DC",1.056634e+09,2023-10,38.895037,-77.036543,WASHINGTON,DC
5402,"ALEXANDRIA, VA",0.000000e+00,2023-10,38.80511,-77.047023,ALEXANDRIA,VA
5504,"OKLAHOMA CITY, OK",2.996600e+06,2023-10,35.472989,-97.517054,OKLAHOMA CITY,OK


In [14]:
us_ports_imports_by_month_df.hvplot.points(
    'LON', 
    'LAT',
    tiles='OSM',
    geo=True,
    size='GEN_VAL_MO',
    scale=0.0005,
    color='state',
    frame_width=800,
    frame_height=400,
    title='Total USD Value of Monthly Imports by US Port (October 2023)',
    hover_cols=['PORT_NAME', 'GEN_VAL_MO'],
)

![Geo US Ports by State](./plots/geo_us_ports_by_state.png)

In [15]:
# Filter out the top US Port by import value
top_port = us_ports_imports_by_month_df.sort_values('GEN_VAL_MO', ascending=False).iloc[0]
top_port

0
PORT_NAME       CHICAGO, IL
GEN_VAL_MO    23721454967.0
time                2023-10
LAT               41.875562
LON              -87.624421
city                CHICAGO
state                    IL
Name: 3901, dtype: object

In [16]:
# Fetch data for imports by country across all ports
r2 = requests.get(f'{base_uri}?get=GEN_VAL_MO,CTY_NAME,CTY_CODE&time={time}&key={api_key}')

d2 = r2.json()

imports_by_country = pd.DataFrame(d2)
imports_by_country.head()

Unnamed: 0,0,1,2,3
0,GEN_VAL_MO,CTY_NAME,CTY_CODE,time
1,276809669615,TOTAL FOR ALL COUNTRIES,-,2023-10
2,51062032428,EUROPEAN UNION,0003,2023-10
3,87660180397,PACIFIC RIM COUNTRIES,0014,2023-10
4,2913745193,CAFTA-DR,0017,2023-10


In [17]:
# Set columns with first row
imports_by_country.columns = imports_by_country.iloc[0]
imports_by_country = imports_by_country[1:].reset_index(drop=True)

# Cast total value column to type float
imports_by_country['GEN_VAL_MO'] = imports_by_country['GEN_VAL_MO'].astype(float)

imports_by_country.head()

Unnamed: 0,GEN_VAL_MO,CTY_NAME,CTY_CODE,time
0,276809700000.0,TOTAL FOR ALL COUNTRIES,-,2023-10
1,51062030000.0,EUROPEAN UNION,0003,2023-10
2,87660180000.0,PACIFIC RIM COUNTRIES,0014,2023-10
3,2913745000.0,CAFTA-DR,0017,2023-10
4,79552180000.0,NAFTA,0020,2023-10


In [19]:
# Drop summary row
imports_by_country.drop(imports_by_country.index[0], inplace=True)

imports_by_country.head()

Unnamed: 0,GEN_VAL_MO,CTY_NAME,CTY_CODE,time
1,51062030000.0,EUROPEAN UNION,3,2023-10
2,87660180000.0,PACIFIC RIM COUNTRIES,14,2023-10
3,2913745000.0,CAFTA-DR,17,2023-10
4,79552180000.0,NAFTA,20,2023-10
5,53992660000.0,TWENTY LATIN AMERICAN REPUBLICS,21,2023-10


In [21]:
# Get geo data for countries and trade entities
for index in imports_by_country.index:
    
    country = imports_by_country.loc[index].CTY_NAME
    location = None
    try: location = geolocator.geocode(country)
    except GeocoderTimedOut: continue
    
    if (location and location.latitude):
        imports_by_country.at[index, 'LAT'] = float(location.latitude)
    if (location and location.longitude):
        imports_by_country.at[index, 'LON'] = float(location.longitude)

In [22]:
# Drop rows without geo data and set index
imports_by_country.dropna(inplace=True)
imports_by_country.isna().any()
imports_by_country.set_index('CTY_CODE', inplace=True)

imports_by_country.head()

Unnamed: 0_level_0,GEN_VAL_MO,CTY_NAME,time,LAT,LON
CTY_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,51062030000.0,EUROPEAN UNION,2023-10,42.679596,23.321483
14,87660180000.0,PACIFIC RIM COUNTRIES,2023-10,41.903411,12.452853
17,2913745000.0,CAFTA-DR,2023-10,14.16667,36.9
20,79552180000.0,NAFTA,2023-10,52.349221,21.241482
22,170522400000.0,OECD,2023-10,48.861674,2.269236


In [32]:
imports_by_country.hvplot.points(
    'LON', 
    'LAT',
    tiles='OSM',
    geo=True,
    size='GEN_VAL_MO',
    scale=0.0001,
    frame_width=800,
    frame_height=400,
    title='Total USD Value of Monthly Imports by Country into Top US Port(September 2023)',
    hover_cols=['CTY_NAME', 'GEN_VAL_MO'],
)