In [None]:
import pandas as pd
import requests
import numpy as np
from datetime import datetime as dt
from bs4 import BeautifulSoup as BS
from time import sleep
from random import randint
import regex as re
from babel.numbers import format_currency

In [None]:
#bring in gdp excel worksheet
gdp = pd.read_excel('../data/gdp.xlsx',header = [3], nrows = 3219)
gdp.head()

In [None]:
#rename unamed column
gdp = gdp.rename(columns = {'Unnamed: 0':'County'})
gdp.head()

In [None]:
#pull out columns for particular counties
counties = gdp.loc[(gdp['County']=='Davidson')|(gdp['County']=='Fulton')|(gdp['County']=='Travis')]
counties

In [None]:
#locate proper fulton county
fulton = counties.iloc[1].to_frame().transpose().reset_index(drop = True)
fulton

In [None]:
#locate proper davidson county
davidson = counties.iloc[9].to_frame().transpose().reset_index(drop=True)
davidson

In [None]:
#locate proper travis county
travis = counties.iloc[10].to_frame().transpose().reset_index(drop=True)
travis

In [None]:
#add city column
fulton['city'] = ['Atlanta']
fulton

In [None]:
#add city column
davidson['city'] = ['Nashville']
davidson

In [None]:
#add city column
travis['city'] = ['Austin']
travis

In [None]:
#concatenating all counties
counties_gdp = pd.concat([fulton,davidson,travis])
counties_gdp

In [None]:
#setting columns
counties_gdp = counties_gdp.loc[:,['County','city',2018,2019, 2020,2021, '2019.1','2020.1', '2021.2']]
counties_gdp

In [None]:
#gross gdp DF
gross = counties_gdp[['city',2018,2019, 2020,2021]]
gross

In [None]:
#percent change DF
perc_change = counties_gdp[['city', '2019.1','2020.1', '2021.2']]
perc_change

In [None]:
#percent change DF renaming
perc_change = perc_change.rename(columns = {'2019.1':'2019','2020.1':'2020', '2021.2':'2021'})
perc_change

In [None]:
#first melt for gdp value
gross = pd.melt(gross, id_vars = ['city'],var_name = 'year',value_name = 'gross_gdp')
gross

In [None]:
#formatting gross gdp column to currency
gross['gross_gdp'] = gross['gross_gdp'].apply(lambda x: format_currency(x, currency="USD", locale="en_US"))
gross

In [None]:
#first melt for gdp % change
perc_change = pd.melt(perc_change, id_vars = ['city'],var_name = 'year',value_name = 'gdp_perc_change')
perc_change

In [None]:
#save gross to csv
gross.to_csv('../data/cleaned/economic/gross_gdp.csv', index = False)

In [None]:
#save percent change to csv
perc_change.to_csv('../data/cleaned/economic/gdp_perc_change.csv', index = False)

In [None]:
#pull in price parity csv
price_parity = pd.read_csv('../data/price_parity.csv', nrows = 9)
price_parity

In [None]:
#splitting up description column values
price_parity[['rpp','desc','service']] = price_parity['Description'].str.split(': ', expand = True)
price_parity

In [None]:
#assigning string type to column to deal with None values
price_parity['service'].astype(str)

In [None]:
#replacing None value with Goods
price_parity = price_parity.replace(to_replace ={ None: 'Goods'})
price_parity

In [None]:
#splitting out city from Geoname value
price_parity[['city','1','2','3','4','5']] = price_parity['GeoName'].str.split('-',expand = True)
price_parity

In [None]:
#pulling out relevant columns 
price_parity = price_parity.loc[:,['city','service','2017','2018','2019','2020']]
price_parity 

In [None]:
#saving price parity to csv 
price_parity.to_csv('../data/cleaned/economic/price_parity.csv', index = False)