In [101]:
#import required libraries

import os
import requests
import json
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
import hvplot.pandas
# didn't install beasy yet, still working from basic api call
#from beasy.beasy import Bea
%matplotlib inline

In [102]:
#load the API keys stored in .env file
load_dotenv()

True

In [103]:
# https://apps.bea.gov/api/signup/index.cfm to signup for API key for Bureau of Economic Analysis

# https://apps.bea.gov/api/_pdf/bea_web_service_api_user_guide.pdf for user guide

In [141]:
#get BEA API key from .env file
bea_api_key=os.getenv("BEA_API_KEY")

#Defining the beasy client to read the BEA Data using the Beasy API reader # disabled this temporarily
#client = Bea(bea_api_key)

In [142]:
# set up query URL
bea_url = "http://apps.bea.gov/api/data?UserID=" + bea_api_key + "&method=GETDATASETLIST&ResultFormat=JSON" 

In [143]:
#send request to BEA
bea_response = requests.get(bea_url).json()["BEAAPI"]["Results"]

In [144]:
#review response
bea_response


{'Dataset': [{'DatasetName': 'NIPA',
   'DatasetDescription': 'Standard NIPA tables'},
  {'DatasetName': 'NIUnderlyingDetail',
   'DatasetDescription': 'Standard NI underlying detail tables'},
  {'DatasetName': 'MNE', 'DatasetDescription': 'Multinational Enterprises'},
  {'DatasetName': 'FixedAssets',
   'DatasetDescription': 'Standard Fixed Assets tables'},
  {'DatasetName': 'ITA',
   'DatasetDescription': 'International Transactions Accounts'},
  {'DatasetName': 'IIP',
   'DatasetDescription': 'International Investment Position'},
  {'DatasetName': 'InputOutput', 'DatasetDescription': 'Input-Output Data'},
  {'DatasetName': 'IntlServTrade',
   'DatasetDescription': 'International Services Trade'},
  {'DatasetName': 'GDPbyIndustry', 'DatasetDescription': 'GDP by Industry'},
  {'DatasetName': 'Regional', 'DatasetDescription': 'Regional data sets'},
  {'DatasetName': 'UnderlyingGDPbyIndustry',
   'DatasetDescription': 'Underlying GDP by Industry'},
  {'DatasetName': 'APIDatasetMetaData'

In [145]:
#Beasy client to get the BEA Data set list
#client.getDatasetList()
# disabled this temporarily

In [146]:
# the following code was used to test the response for regional data

In [147]:
 bea_regional_url = "http://apps.bea.gov/api/data?UserID=" + bea_api_key + "&method=getparameterlist&datasetname=Regional&ResultFormat=JSON" 

In [148]:
 bea_regional_response = requests.get(bea_regional_url).json()["BEAAPI"]["Results"]

In [149]:
 bea_regional_response

{'Parameter': [{'ParameterName': 'GeoFips',
   'ParameterDataType': 'string',
   'ParameterDescription': 'Comma-delimited list of 5-character geographic codes; COUNTY for all counties, STATE for all states, MSA for all MSAs, MIC for all Micropolitan Areas, PORT for all state metro/nonmetro portions, DIV for all Metropolitan Divisions, CSA for all Combined Statistical Areas, state post office abbreviation for all counties in one state (e.g. NY)',
   'ParameterIsRequiredFlag': '1',
   'MultipleAcceptedFlag': '1'},
  {'ParameterName': 'LineCode',
   'ParameterDataType': 'integer',
   'ParameterDescription': 'Line code for a statistic or industry',
   'ParameterIsRequiredFlag': '1',
   'MultipleAcceptedFlag': '0'},
  {'ParameterName': 'TableName',
   'ParameterDataType': 'string',
   'ParameterDescription': 'Regional income or product table to retrieve',
   'ParameterIsRequiredFlag': '1',
   'ParameterDefaultValue': '',
   'MultipleAcceptedFlag': '0'},
  {'ParameterName': 'Year',
   'Param

In [150]:
# A few codes which may be useful for further API calls and data analysis:

# Codes (from the BEA API instructions pdf):

# SQINC1 Personal Income Summary: Personal Income, Population, Per Capita Personal Income by States

# SQGDP9 Real GDP by state 

# SQGDP2 Gross domestic product (GDP) by state 2

In [151]:
# this query will get the data for all states for the last five years for personal income per capita.

bea_states_personal_income_linecode_query_url = "http://apps.bea.gov/api/data?UserID=" + bea_api_key + "&method=GetData&datasetname=Regional&TableName=SQINC1&GeoFIPS=STATE&LineCode=3&Year=LAST5&ResultFormat=JSON"

In [152]:
#query for last five years personal income by state.
personal_income_by_state_5year = pd.DataFrame(requests.get(bea_states_personal_income_linecode_query_url).json()["BEAAPI"]["Results"]["Data"])

In [153]:
#review the results of the query
personal_income_by_state_5year

Unnamed: 0,Code,GeoFips,GeoName,TimePeriod,CL_UNIT,UNIT_MULT,DataValue,NoteRef
0,SQINC1-3,00000,United States,2018Q4,Dollars,0,54874,
1,SQINC1-3,00000,United States,2018Q3,Dollars,0,54409,
2,SQINC1-3,00000,United States,2018Q2,Dollars,0,53807,
3,SQINC1-3,00000,United States,2018Q1,Dollars,0,53287,
4,SQINC1-3,00000,United States,2017Q1,Dollars,0,51018,
...,...,...,...,...,...,...,...,...
1075,SQINC1-3,98000,Far West,2017Q4,Dollars,0,57919,
1076,SQINC1-3,98000,Far West,2018Q4,Dollars,0,60639,
1077,SQINC1-3,98000,Far West,2018Q3,Dollars,0,60178,
1078,SQINC1-3,98000,Far West,2018Q2,Dollars,0,59483,


In [154]:
#client.Regional.getParameterValues('LineCode')

In [155]:
# Using the Beasy client to read the Personal Income data from the BEA API

#client.Regional.getData(TableName='SQINC5N', LineCode= '100', GeoFIPS='STATE', Year='LAST5')

In [156]:
# clip out the unnecessary columns
personal_income_by_state_5year=personal_income_by_state_5year[["GeoName", "TimePeriod", "DataValue"]]
personal_income_by_state_5year


Unnamed: 0,GeoName,TimePeriod,DataValue
0,United States,2018Q4,54874
1,United States,2018Q3,54409
2,United States,2018Q2,53807
3,United States,2018Q1,53287
4,United States,2017Q1,51018
...,...,...,...
1075,Far West,2017Q4,57919
1076,Far West,2018Q4,60639
1077,Far West,2018Q3,60178
1078,Far West,2018Q2,59483


In [157]:
# Used str.replace() to remove the ',' from the Income datavalue and convert to float from string

personal_income_by_state_5year['DataValue'] = personal_income_by_state_5year['DataValue'].str.replace(',', '').astype('float')
personal_income_by_state_5year

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,GeoName,TimePeriod,DataValue
0,United States,2018Q4,54874.0
1,United States,2018Q3,54409.0
2,United States,2018Q2,53807.0
3,United States,2018Q1,53287.0
4,United States,2017Q1,51018.0
...,...,...,...
1075,Far West,2017Q4,57919.0
1076,Far West,2018Q4,60639.0
1077,Far West,2018Q3,60178.0
1078,Far West,2018Q2,59483.0


In [158]:
# Filter the dataframe to get just the states

list_of_states = [ 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
                  'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
                  'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 
                   'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island',
                   'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia',
                  'Wisconsin', 'Wyoming']

personal_income_state_filter = personal_income_by_state_5year['GeoName'].isin(list_of_states)
personal_income_filter_by_state = personal_income_by_state_5year[personal_income_state_filter]
personal_income_filter_by_state

Unnamed: 0,GeoName,TimePeriod,DataValue
18,Alabama,2020Q1,45031.0
19,Alabama,2020Q2,48797.0
20,Alabama,2020Q3,46288.0
21,Alabama,2020Q4,45736.0
22,Alabama,2019Q1,43530.0
...,...,...,...
931,Wyoming,2021Q2,63589.0
932,Wyoming,2017Q1,55381.0
933,Wyoming,2017Q2,56299.0
934,Wyoming,2017Q3,57082.0


In [159]:
# Setting the time period to be the index and converting the datatype from string to dateTime
personal_income_filter_by_state_2 = personal_income_filter_by_state
personal_income_filter_by_state = personal_income_filter_by_state.set_index('TimePeriod')
personal_income_filter_by_state.index = pd.to_datetime(personal_income_filter_by_state.index)
personal_income_filter_by_state

Unnamed: 0_level_0,GeoName,DataValue
TimePeriod,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,Alabama,45031.0
2020-04-01,Alabama,48797.0
2020-07-01,Alabama,46288.0
2020-10-01,Alabama,45736.0
2019-01-01,Alabama,43530.0
...,...,...
2021-04-01,Wyoming,63589.0
2017-01-01,Wyoming,55381.0
2017-04-01,Wyoming,56299.0
2017-07-01,Wyoming,57082.0


In [160]:
# Group the data by state and calculate the average personal income for every year from 2017 to current year

# personal_income_filter_annual = personal_income_filter_by_state.groupby('GeoName').resample('A').mean()
# personal_income_filter_annual

In [161]:
personal_income_filter_annual = personal_income_filter_by_state.groupby('GeoName').resample('A').mean()
personal_income_filter_annual

Unnamed: 0_level_0,Unnamed: 1_level_0,DataValue
GeoName,TimePeriod,Unnamed: 2_level_1
Alabama,2017-12-31,40855.25
Alabama,2018-12-31,42319.25
Alabama,2019-12-31,43987.75
Alabama,2020-12-31,46463.00
Alabama,2021-12-31,50383.00
...,...,...
Wyoming,2017-12-31,56698.25
Wyoming,2018-12-31,59877.25
Wyoming,2019-12-31,61036.25
Wyoming,2020-12-31,61838.00


In [162]:
# Bar chart for states with an interactive slider

personal_income_filter_annual.sort_values('DataValue').hvplot.bar(groupby = 'TimePeriod', x = 'GeoName', rot = 90)

In [163]:
#Plots for data pivoted by states

# Convert TimePeriod to datetime to get results in a sorted time manner
personal_income_filter_by_state_2['TimePeriod'] = pd.to_datetime(personal_income_filter_by_state_2['TimePeriod'])

# Pivoting the dataframe around GeoName and TimePeriod
personal_income_melt = personal_income_filter_by_state_2.melt(id_vars = ['GeoName', 'TimePeriod'])

# PLots with x value = timeperiod
layout_plots = personal_income_melt.hvplot.bar(
    x='TimePeriod',
    y='value',
    by='variable',
    width=300,
    height=350,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

layout_plots

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [164]:
#Sort the States by Personal Income
personal_income_sorted = personal_income_filter_by_state_2.sort_values(by = 'DataValue')

#Pivoting the table around Geoname and Timeperiod
personal_income_melt_2 = personal_income_sorted.melt(id_vars = ['GeoName', 'TimePeriod'])

# Plots with x_value = states
layout_plots_2 = personal_income_melt_2.hvplot.bar(
    x='GeoName',
    y='value',
    by='variable',
    width=400,
    height=200,
    stacked=True,
    groupby='TimePeriod',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout().cols(3)

layout_plots_2

In [165]:
# Dataframe grouped by state and extracted the last element from each group which is the personal income value for 2021Q4

personal_income_2021Q4 = personal_income_sorted.groupby('GeoName').last()
personal_income_2021Q4


Unnamed: 0_level_0,TimePeriod,DataValue
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,2021-01-01,52633.0
Arizona,2021-01-01,54620.0
Arkansas,2021-01-01,53643.0
California,2021-01-01,77642.0
Colorado,2021-01-01,70324.0
Connecticut,2021-01-01,85474.0
Delaware,2021-01-01,61798.0
Florida,2021-01-01,62357.0
Georgia,2021-01-01,58080.0
Idaho,2021-01-01,55043.0


In [166]:
# Importing Bokeh dependencies to plot chloropleth map of US

from bokeh.models import LogColorMapper
from bokeh.palettes import Oranges256 as oranges
from bokeh.sampledata.us_states import data as us_states
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
%matplotlib inline

In [167]:
# loading US states boundary data which is available in bokeh as a part of bokeh.sampledata.us_states
us_states_df = pd.DataFrame(us_states).T
us_states_df

Unnamed: 0,name,region,lats,lons
NV,Nevada,Southwest,"[40.68928, 40.4958, 40.30302, 40.09896, 39.999...","[-114.04392, -114.04558, -114.04619, -114.0464..."
AZ,Arizona,Southwest,"[34.87057, 35.00186, 35.00332, 35.07971, 35.11...","[-114.63332, -114.63349, -114.63423, -114.6089..."
WI,Wisconsin,Central,"[42.49273, 42.49433, 42.49562, 42.49561, 42.49...","[-87.8156, -87.93137, -88.10268, -88.20645, -8..."
GA,Georgia,Southeast,"[32.29667, 32.24425, 32.09197, 32.03256, 32.02...","[-81.12387, -81.15654, -81.02071, -80.75203, -..."
KS,Kansas,Central,"[36.99927, 36.99879, 36.99914, 36.99903, 36.99...","[-96.28415, -96.55381, -96.91244, -97.1197, -9..."
CT,Connecticut,Northeast,"[42.0324, 42.025, 42.03006, 42.03398, 42.03198...","[-72.64313, -72.60487, -72.57337, -72.45609, -..."
IN,Indiana,Central,"[41.42618, 41.57005, 41.69559, 41.76096, 41.76...","[-87.5259, -87.52459, -87.52462, -87.20777, -8..."
ME,Maine,Northeast,"[45.41417, 45.36062, 45.31094, 45.27537, 45.16...","[-67.45613, -67.43075, -67.45703, -67.47637, -..."
MA,Massachusetts,Northeast,"[42.0324, 42.0324, 42.0362, 42.02907, 42.02112...","[-72.64313, -72.64329, -72.75134, -72.75203, -..."
MT,Montana,Northwest,"[44.79795, 44.8772, 45.03517, 45.40292, 45.621...","[-113.18786, -113.45913, -113.44991, -113.7509..."


In [168]:
# reset index for merging
personal_income_2021Q4 = personal_income_2021Q4.reset_index()
personal_income_2021Q4

Unnamed: 0,GeoName,TimePeriod,DataValue
0,Alabama,2021-01-01,52633.0
1,Arizona,2021-01-01,54620.0
2,Arkansas,2021-01-01,53643.0
3,California,2021-01-01,77642.0
4,Colorado,2021-01-01,70324.0
5,Connecticut,2021-01-01,85474.0
6,Delaware,2021-01-01,61798.0
7,Florida,2021-01-01,62357.0
8,Georgia,2021-01-01,58080.0
9,Idaho,2021-01-01,55043.0


In [169]:
# deleting states of Alaska and Hawaii
us_states_df = us_states_df[~us_states_df["name"].isin(['Alaska', "Hawaii"])]

us_states_df["lons"] = us_states_df.lons.values.tolist()
us_states_df["lats"] = us_states_df.lats.values.tolist()
#us_states_df = us_states_df.reset_index() # Needed initially, not after the index is already reset
#personal_income_2021Q4 = personal_income_2021Q4.reset_index()

# Merge the two dataframes
us_states_df = us_states_df.merge(personal_income_2021Q4[["GeoName", "DataValue"]], how="left", left_on="name", right_on="GeoName")
us_states_df.head()
#result_merge = pd.concat([us_states_df,personal_income_2021Q4], axis = 1 )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,name,region,lats,lons,GeoName,DataValue
0,Nevada,Southwest,"[40.68928, 40.4958, 40.30302, 40.09896, 39.999...","[-114.04392, -114.04558, -114.04619, -114.0464...",Nevada,59691.0
1,Arizona,Southwest,"[34.87057, 35.00186, 35.00332, 35.07971, 35.11...","[-114.63332, -114.63349, -114.63423, -114.6089...",Arizona,54620.0
2,Wisconsin,Central,"[42.49273, 42.49433, 42.49562, 42.49561, 42.49...","[-87.8156, -87.93137, -88.10268, -88.20645, -8...",Wisconsin,61811.0
3,Georgia,Southeast,"[32.29667, 32.24425, 32.09197, 32.03256, 32.02...","[-81.12387, -81.15654, -81.02071, -80.75203, -...",Georgia,58080.0
4,Kansas,Central,"[36.99927, 36.99879, 36.99914, 36.99903, 36.99...","[-96.28415, -96.55381, -96.91244, -97.1197, -9...",Kansas,62072.0


In [170]:
us_states_datasource = {}
us_states_datasource["lons"] = us_states_df.lons.values.tolist()
us_states_datasource["lats"] = us_states_df.lats.values.tolist()
us_states_datasource["name"] = us_states_df.name.values.tolist()
us_states_datasource["StateCodes"] = us_states_df.index.values.tolist()
us_states_datasource["DataValue"] = us_states_df.DataValue.values.tolist()


In [171]:
# Creating the plot

fig = figure(plot_width=900, plot_height=500,
             title="Personal Income by state for 2020Q4",
             x_axis_location=None, y_axis_location=None,
             tooltips=[
                        ("State", "@name"), ("DataValue", "@DataValue"), ("(Long, Lat)", "($x, $y)")
                      ]
            )
#fig.grid.grid_line_color = None

fig.patches("lons", "lats", source=us_states_datasource,
            fill_color={'field': 'DataValue', 'transform': LogColorMapper(palette=oranges[::-1])},
            fill_alpha=0.7, line_color="white", line_width=0.5)

show(fig)

In [64]:
#Inserted GDP by state & industy csv file
gdp_country_state = pd.read_csv(Path('Resources/GDP2N__ALL_AREAS_1997_2020.csv'))


In [65]:
#Filtered the data to only include GeoName, Description, Years
gdp_state_10year = gdp_country_state[[
    'GeoName',
    'Description',
    '2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019']]

#dropped NAN values
gdp_state_10year.set_index('GeoName').dropna().head()

Unnamed: 0_level_0,Description,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Alabama,All industry total,169128.5,175470.1,181929.9,186553.9,192166.5,195037.7,200197.5,204454.7,210895.7,221030.7,228142.6
Alabama,Private industries,138168.5,144017.0,149921.8,154601.2,159979.9,161985.5,166301.8,169903.9,175407.6,184432.7,189941.9
Alabama,"Agriculture, forestry, fishing and hunting",1916.0,1928.3,1591.0,1802.2,3113.4,2518.2,2665.5,2036.7,2408.8,2475.4,1936.0
Alabama,Farms,1427.6,1389.7,1062.3,1260.6,2579.4,1941.8,2068.1,1425.6,1803.9,1834.7,1255.8
Alabama,"Forestry, fishing, and related activities",488.4,538.6,528.7,541.5,534.1,576.4,597.4,611.0,604.9,640.7,680.2


In [66]:
#grouped by Description to pick out All industry total
gdp_state_10year_filter = gdp_state_10year.groupby('Description')

gdp_all_industry_df = gdp_state_10year_filter.get_group('All industry total')

gdp_all_industry_df.head()

Unnamed: 0,GeoName,Description,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Alabama,All industry total,169128.5,175470.1,181929.9,186553.9,192166.5,195037.7,200197.5,204454.7,210895.7,221030.7,228142.6
92,Alaska,All industry total,49651.6,52947.7,56333.9,57717.5,56625.3,55750.6,50728.1,49755.6,51737.0,54292.9,54385.6
184,Arizona,All industry total,243764.6,248125.3,257881.1,268068.2,275007.5,284851.4,298615.0,313056.5,330146.6,350718.3,370119.1
276,Arkansas,All industry total,96983.7,100970.8,105107.8,107718.8,112764.6,116151.8,117734.3,119191.8,122978.5,127761.3,130954.1
368,California,All industry total,1919271.1,1973511.9,2049336.8,2144089.6,2261510.6,2399077.7,2559643.2,2671100.5,2831038.4,2975083.0,3132800.6


In [67]:
#melting the df arounf GeoName and Description
gdp_all_industry = gdp_all_industry_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#changing all values to integers
gdp_all_industry["Value"] = pd.to_numeric(gdp_all_industry["Value"], downcast="integer")

gdp_all_industry.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,All industry total,2019,556905.2
Washington,All industry total,2019,612996.5
West Virginia,All industry total,2019,78863.9
Wisconsin,All industry total,2019,349416.5
Wyoming,All industry total,2019,40420.1


In [91]:
#Plotting visual
gdp_all_industry.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

In [69]:
#Creating a df for agriculture gdp in all states
gdp_agriculture_df = gdp_state_10year_filter.get_group("  Agriculture, forestry, fishing and hunting")

#Melting df around GeoName and Description
gdp_agriculture= gdp_agriculture_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#changing values to intergers
gdp_agriculture['Value'] = pd.to_numeric(gdp_agriculture["Value"], downcast="integer")

gdp_agriculture.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,"Agriculture, forestry, fishing and hunting",2019,1696.4
Washington,"Agriculture, forestry, fishing and hunting",2019,7764.1
West Virginia,"Agriculture, forestry, fishing and hunting",2019,356.9
Wisconsin,"Agriculture, forestry, fishing and hunting",2019,4944.0
Wyoming,"Agriculture, forestry, fishing and hunting",2019,746.6


In [90]:
#visualizing states ag gdp
gdp_agriculture.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

In [71]:
#filtering out Healthcare df
gdp_healthcare_df = gdp_state_10year_filter.get_group("   Health care and social assistance")

#melting df around GeoName and Description
gdp_healthcare = gdp_healthcare_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#Turning values into integers
gdp_healthcare['Value'] = pd.to_numeric(gdp_healthcare["Value"], downcast="integer")

gdp_healthcare.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,Health care and social assistance,2019,35348.5
Washington,Health care and social assistance,2019,37457.2
West Virginia,Health care and social assistance,2019,8509.0
Wisconsin,Health care and social assistance,2019,30691.6
Wyoming,Health care and social assistance,2019,1786.5


In [89]:
#visualizing healthcare gdp by state
gdp_healthcare.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

In [73]:
#filtering a df out for manufacturing
gdp_manufacturing_df = gdp_state_10year_filter.get_group("  Manufacturing")

#melting df around GeoName and Description
gdp_manufacturing= gdp_manufacturing_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#changing values to integers
gdp_manufacturing['Value'] = pd.to_numeric(gdp_manufacturing["Value"], downcast="integer")

gdp_manufacturing.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,Manufacturing,2019,45716.5
Washington,Manufacturing,2019,65200.2
West Virginia,Manufacturing,2019,7942.5
Wisconsin,Manufacturing,2019,64884.6
Wyoming,Manufacturing,2019,2179.7


In [88]:
#visualizing manufactuing gdp by state
gdp_manufacturing.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

In [75]:
#filtering df for private gdp by state
gdp_private_df = gdp_state_10year_filter.get_group(' Private industries')


#melting df around GeoName and Description
gdp_private= gdp_private_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#turning values to integers
gdp_private['Value'] = pd.to_numeric(gdp_private["Value"], downcast="integer")

gdp_private.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,Private industries,2019,457146.6
Washington,Private industries,2019,537509.0
West Virginia,Private industries,2019,66244.2
Wisconsin,Private industries,2019,312247.7
Wyoming,Private industries,2019,33829.9


In [87]:
#visualizing private sector gdp by state
gdp_private.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

In [77]:
#filteting out df for Real Estate gdp by state
gdp_real_estate_df = gdp_state_10year_filter.get_group('    Real estate')

#melting df around GeoName and Description
gdp_real_estate = gdp_real_estate_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#changing values to integers
gdp_real_estate['Value'] = pd.to_numeric(gdp_real_estate["Value"], downcast="integer")

gdp_real_estate.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,Real estate,2019,74908.7
Washington,Real estate,2019,81797.9
West Virginia,Real estate,2019,7261.3
Wisconsin,Real estate,2019,38779.8
Wyoming,Real estate,2019,4074.4


In [86]:
#visualizing real estate gdp by state
gdp_real_estate.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()

In [79]:
#filtering df for transportation gdp by state
gdp_transportation_df = gdp_state_10year_filter.get_group('  Transportation and warehousing')

#melting df around GeoName and Description
gdp_transportation = gdp_transportation_df.melt(['GeoName', 'Description'], var_name='Date', value_name='Value').set_index('GeoName')

#changing values to integers
gdp_transportation['Value'] = pd.to_numeric(gdp_transportation["Value"], downcast="integer")

gdp_transportation.tail()

Unnamed: 0_level_0,Description,Date,Value
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Virginia,Transportation and warehousing,2019,14921.4
Washington,Transportation and warehousing,2019,17058.2
West Virginia,Transportation and warehousing,2019,2280.7
Wisconsin,Transportation and warehousing,2019,10329.2
Wyoming,Transportation and warehousing,2019,4168.7


In [85]:
#visualizing transportation gdp by state
gdp_transportation.hvplot.bar(
    x='Date',
    y='Value',
    by='Description',
    width=300,
    height=150,
    stacked=True,
    groupby='GeoName',
    legend=False,
    xlabel='',
    bar_width=1.0,
    rot = 90
).layout()