In [1]:
import pandas
import requests
from GIS_Data_Library_ext import *
import os
from databaseUpdaterGIM import *

In [2]:
oe_api_key = os.environ['OE_API_KEY']

# Bring in data from OE databanks
Find saved databank selections

In [3]:
gem_selection = find_selection(oe_api_key, 'PPI_GEM_Q')
nac_selection = find_selection(oe_api_key, 'PPI_NAC_Q')
gis_selection = find_selection(oe_api_key, 'PPI_GIS_Q')
usind_selection = find_selection(oe_api_key, 'PPI_USIND_Q')

In [4]:
selections = [gem_selection, nac_selection, gis_selection, usind_selection]
databanks = ['gem', 'nac', 'gis', 'usind']

In [5]:
selection_dict = dict(zip(databanks, selections))

Download databank selections

In [6]:
data_main = {}
metadata_main = {}

for i in selection_dict.keys():
    # Pull Global Economic Service data
    response = requests.get("https://services.oxfordeconomics.com/api/download/" + selection_dict[i],
                            headers = {'Accept' : 'application/json', 
                                        'API-Key' : oe_api_key,
                                        'Content-Type' : 'application/json; charset=utf8'})

    data_pull = response.json()

    # extract mnemonics, quarterly data and metadata
    data = {}
    metadata = {}
    for j in range(0, len(data_pull)):
        vbl = data_pull[j]['VariableCode']
        data_ts = data_pull[j]['QuarterlyData']
        metadata = data_pull[j]['Metadata']
        data[vbl] = data_ts
        metadata[vbl] = metadata

    data_main[i] = data
    metadata_main[i] = metadata

Convert to dataframes

In [7]:
dataframes = {}

In [8]:
for i in data_main.keys():
    dataframes[i] = pd.DataFrame(data_main[i])
    dataframes[i].index = pd.period_range(start = '1980-01-01', end = '2050-12-31', freq = 'Q')

In [9]:
# calculate profit margin by industy as PRx/GYx!
gis_sectors = ['A', 'E', 'EXOIL', 'COAL', 'OTHE', 'OIL', 'M', 'FDBVT', 'FDBV', 'FD', 'BV', 'TOBC', 'TEXT', 'TETEX', 'TECLO', 'TELEA', 'WOOD', 'PAPR', 'PAPB', 'PAPT', 'FCHR', 'FUEL', 'CHEM', 'CHXPHA', 'CHBAS', 'CHAGR', 'CHPNT', 'CHSOP', 'CHOTH', 'CHMMF', 'CHPHA', 'RUBP', 'MINL', 'MLGL', 'MLCER', 'MLCSN', 'BMETP', 'BMET', 'BMIST', 'BMNFR', 'BMCST', 'METP', 'ELO', 'ELC', 'COMP', 'ELEC', 'ELEC32', 'ELCMP', 'ELTEL', 'ELCSM', 'PREC', 'PRMED', 'PROTH', 'ELEC31', 'ELPWR', 'MEDOM', 'ELDIS', 'ELOTH', 'MECH', 'MEGPM', 'MEPWR', 'MEOGN', 'MESPM', 'MEAGR', 'MEMAC', 'MEOTH', 'MEOTHMIN', 'MOTRA', 'MOTR', 'MOVEH', 'MOPRT', 'OTRA', 'OTAER', 'OTNES', 'OTHF', 'OTHM', 'REPAIR', 'U', 'UEGAS', 'ELECTR', 'UGAS', 'UWSWM', 'P', 'C', 'S', 'DTRC', 'DIS', 'DISTR', 'HOTL', 'TRACOM', 'TRA', 'TRALRP', 'TRAW', 'TRAA', 'TRAOTH', 'COM', 'COMPUB', 'COMTEL', 'COMIT', 'FIBU', 'FIN', 'BUS', 'REAL', 'BUSOTH', 'OTH', 'PAD', 'PUBLIC', 'EDUC', 'HEAL', 'OTHS', 'T', 'CONN', 'CON', 'INT', 'INV', 'ENG', 'COND', 'ALL']

for i in gis_sectors:
    if i == 'ALL':
        dataframes['gis']['PM'+i] = dataframes['gis']['PR'+i] / dataframes['gis']['GY'+'GVA'+'!']
    else:
        dataframes['gis']['PM'+i] = dataframes['gis']['PR'+i] / dataframes['gis']['GY'+i+'!']

  dataframes['gis']['PM'+i] = dataframes['gis']['PR'+i] / dataframes['gis']['GY'+i+'!']
  dataframes['gis']['PM'+i] = dataframes['gis']['PR'+i] / dataframes['gis']['GY'+'GVA'+'!']


In [10]:
# for i in dataframes.keys():
#     dataframes[i].to_csv(i+"_covariates.csv")

# Pull COSTx and PENGx variables from the current internal database

In [11]:
# dictionary Country : Variables
sel_vbls = {'US' : ['PENGIND',
                    'COSTBMMET',
                    'COSTBMNFR',
                    'COSTCHEM',
                    'COSTCOM',
                    'COSTELC',
                    'COSTELEC31',
                    'COSTFDBVT',
                    'COSTFUEL',
                    'COSTMECH',
                    'COSTMETP',
                    'COSTMINL',
                    'COSTMOTR',
                    'COSTOTHFMRE',
                    'COSTOTHM',
                    'COSTOTRA',
                    'COSTPAPR',
                    'COSTRUBP',
                    'COSTTEXT',
                    'COSTTOT',
                    'COSTTRAA',
                    'COSTTRALRP',
                    'COSTTRAOTH',
                    'COSTTRAW',
                    'COSTU',
                    'COSTWOOD',
                    'PENGTOT',
                    'PENGA',
                    'PENGBMMET',
                    'PENGBMNFR',
                    'PENGC',
                    'PENGCHEM',
                    'PENGCOMMER',
                    'PENGE',
                    'PENGELECTR',
                    'PENGFDBVT',
                    'PENGFUEL',
                    'PENGMINL',
                    'PENGMMAC',
                    'PENGMOTH',
                    'PENGMOTRA',
                    'PENGPAPR',
                    'PENGPUBLIC',
                    'PENGTEXT',
                    'PENGTRAA',
                    'PENGTRALRP',
                    'PENGTRAOTH',
                    'PENGTRAW',
                    'PENGUGAS',
                    'PENGWOOD']}

start = '1980'
end = '2050'
var = True
agg = 'L'

In [12]:
cost_peng_df = databaseUpdater(start, end, sel_vbls, var, agg)

sel file created in c:\DSWG_PPI\Databases\GIM\


In [13]:
cost_peng_df.columns = cost_peng_df.loc[2]
cost_peng_df = cost_peng_df.drop([0,1,2,3,4,5,6,7,8])

In [14]:
cost_peng_df.index = pd.period_range(start=start+'-01-01', end=end+'-12-31', freq='Q')

In [15]:
# cost_peng_df.to_csv('cost_peng_covariates.csv')

# Combine all covariates files

In [16]:
all_covariates = pd.concat([dataframes['gem'],dataframes['nac'],dataframes['usind'],dataframes['gis'],cost_peng_df],axis=1)

In [17]:
all_covariates.to_csv('all_covariates.csv')