In [1]:
# Get data from working directory
import os

base_dir = os.getcwd()
print('Working in:', base_dir)

ERP_GDP_Industry = os.path.join(base_dir, 'ERP_GDP_INDUSTRY.ods')

Working in: /home/rentfree/Documents/FIRE and Inequality/Contributors to GDP


In [2]:
# Move data into dataframe
import pandas as pd

# Data is GDP by Industry
# Scraped from Economic Report of the President using
# 2021 Table B-8 "Used years 1997-2019" https://www.govinfo.gov/app/collection/erp/2021
# 2006 Table B-12 "Used years 1974-1996" https://www.govinfo.gov/app/collection/erp/2006
# Since inflation adjusted calculations dont exactly match up in overlapping years we have a data integrity issue
# This will be eliminated when we simply turn each industrys product into a percentage of total GDP for each year
gdpi_df = pd.read_excel(ERP_GDP_Industry)

gdpi_df.head()

Unnamed: 0,YEAR,GDP,TOTAL PRIVATE INDUSTRIES,"AGRICULTURE, FORESTRY, FISHING, AND HUNTING",MINING,CONSTRUCTION,MANUFACTURING,UTILITIES,WHOLESALE TRADE,RETAIL TRADE,TRANSPORTATION AND WAREHOUSING,INFORMATION,"FINANCE, INSURANCE, REAL ESTATE, RENTAL, AND LEASING",PROFESSIONAL AND BUSINESS SERVICES,"EDUCATIONAL SERVICES, HEALTH CARE, AND SOCIAL ASSISTANCE","ARTS, ENTERTAINMENT, RECREATION, ACCOMMODATION, AND FOOD SERVICES","OTHER SERVICES, EXCEPT GOVERNMENT",GOVERNMENT,PRIVATE GOODS PRODUCING INDUSTRIES,PRIVATE SERVICES PRODUCING INDUSTRIES
0,1974,1500.0,1277.3,50.1,29.3,74.0,318.2,29.2,104.7,113.4,58.5,50.9,223.3,84.6,64.3,40.9,35.8,222.6,471.7,805.6
1,1975,1638.3,1391.5,51.4,33.8,74.8,337.1,37.1,114.6,127.3,59.4,56.5,248.2,92.9,74.2,45.7,38.4,246.9,497.2,894.3
2,1976,1825.3,1556.2,50.2,37.5,85.5,386.7,41.5,122.7,144.0,68.8,63.5,272.1,105.1,84.0,51.9,42.8,269.1,559.8,996.4
3,1977,2030.9,1739.4,51.3,43.4,94.2,438.6,45.9,134.9,158.5,76.2,71.1,304.0,122.7,93.8,58.8,46.1,291.5,627.5,1111.9
4,1978,2294.7,1977.0,59.8,49.5,111.5,489.9,50.4,153.4,177.6,86.7,81.4,347.4,141.9,106.4,67.9,53.2,317.7,710.6,1266.4


In [3]:
# Get all the columns in the dataframe
index = []

for col in gdpi_df.columns:
    index.append(col)
    
index.remove('YEAR')

In [4]:
# Make a copy of the data and turn the industry product into percentages of GDP
gdpir_df = gdpi_df.copy()

for i in index:
    gdpir_df[i] = gdpir_df[i]/gdpi_df['GDP']
      
gdpir_df.tail()

Unnamed: 0,YEAR,GDP,TOTAL PRIVATE INDUSTRIES,"AGRICULTURE, FORESTRY, FISHING, AND HUNTING",MINING,CONSTRUCTION,MANUFACTURING,UTILITIES,WHOLESALE TRADE,RETAIL TRADE,TRANSPORTATION AND WAREHOUSING,INFORMATION,"FINANCE, INSURANCE, REAL ESTATE, RENTAL, AND LEASING",PROFESSIONAL AND BUSINESS SERVICES,"EDUCATIONAL SERVICES, HEALTH CARE, AND SOCIAL ASSISTANCE","ARTS, ENTERTAINMENT, RECREATION, ACCOMMODATION, AND FOOD SERVICES","OTHER SERVICES, EXCEPT GOVERNMENT",GOVERNMENT,PRIVATE GOODS PRODUCING INDUSTRIES,PRIVATE SERVICES PRODUCING INDUSTRIES
41,2015,1.0,0.871731,0.009995,0.014354,0.038101,0.116765,0.016405,0.062665,0.055932,0.031023,0.049725,0.205556,0.122648,0.086137,0.040947,0.021471,0.128269,0.179216,0.692515
42,2016,1.0,0.872772,0.008888,0.01163,0.039845,0.112013,0.016095,0.060474,0.056164,0.031048,0.051662,0.210508,0.122928,0.08813,0.042048,0.021334,0.127228,0.172376,0.700396
43,2017,1.0,0.874697,0.009036,0.01402,0.040823,0.111672,0.015868,0.059535,0.055442,0.03106,0.051446,0.210653,0.124392,0.087356,0.042189,0.021199,0.125298,0.175556,0.699145
44,2018,1.0,0.8763,0.008665,0.016049,0.041151,0.112285,0.015612,0.058748,0.054134,0.031569,0.051417,0.212106,0.124821,0.08645,0.042049,0.021245,0.1237,0.178145,0.698155
45,2019,1.0,0.876855,0.008184,0.01444,0.04165,0.109447,0.015644,0.058895,0.054224,0.032506,0.052605,0.211905,0.126472,0.087033,0.042327,0.021523,0.12315,0.173721,0.703133


In [5]:
removal_list = ['GDP', 'TOTAL PRIVATE INDUSTRIES', 
                'PRIVATE GOODS PRODUCING INDUSTRIES', 'PRIVATE SERVICES PRODUCING INDUSTRIES']

for i in removal_list:
    index.remove(i)

index

['AGRICULTURE, FORESTRY, FISHING, AND HUNTING',
 'MINING',
 'CONSTRUCTION',
 'MANUFACTURING',
 'UTILITIES',
 'WHOLESALE TRADE',
 'RETAIL TRADE',
 'TRANSPORTATION AND WAREHOUSING',
 'INFORMATION',
 'FINANCE, INSURANCE, REAL ESTATE, RENTAL, AND LEASING',
 'PROFESSIONAL AND BUSINESS SERVICES',
 'EDUCATIONAL SERVICES, HEALTH CARE, AND SOCIAL ASSISTANCE',
 'ARTS, ENTERTAINMENT, RECREATION, ACCOMMODATION, AND FOOD SERVICES',
 'OTHER SERVICES, EXCEPT GOVERNMENT',
 'GOVERNMENT']

In [6]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [7]:
fig = go.Figure()
for col in index:
    fig.add_trace(go.Scatter(x = gdpir_df['YEAR'],
                             y = gdpir_df[col],
                             name = col,
                             stackgroup='one')
    )
    
fig.update_yaxes(tickformat='%', title_text="Percent of Sector Contribution to GDP", range = [0,1])
fig.update_xaxes( title_text="Year")


fig.update_layout(
    font_family="Times New Roman",
    title_font_family="Times New Roman",
    height=400, width=1000,
    title={
        'text': "Sector Contribution to GDP from 1974-2019",
        'y':0.9,
        'x':0.4,
        'xanchor': 'center',
        'yanchor': 'top'}
)

fig.show()

In [8]:
fig.write_image("Sector Contribution to GDP from 1974-2019.png")