In [1]:
import pandas as pd
from sqlalchemy import create_engine 
from sqlalchemy.orm import Session

In [None]:
# Updated reference for unemployment data:
# OECD (2021), Unemployment rate (indicator). doi: 10.1787/52570002-en (Accessed on 07 August 2021)
# Definition of Unemployment rate
# The unemployed are people of working age who are without work, are available for work, and have taken specific steps to find work. The uniform application of this definition results in estimates of unemployment rates that are more internationally comparable than estimates based on national definitions of unemployment. This indicator is measured in numbers of unemployed people as a percentage of the labour force and it is seasonally adjusted. The labour force is defined as the total number of unemployed people plus those in employment. Data are based on labour force surveys (LFS).  For European Union countries where monthly LFS information is not available, the monthly unemployed figures are estimated by Eurostat.

In [7]:
csv_file_unemp ="../group_project_2_copy/Resources/unemployment.csv"
unemployment_df= pd.read_csv(csv_file_unemp)
unemployment_df.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,HUR,TOT,PC_LF,A,1967,1.875,
1,AUS,HUR,TOT,PC_LF,A,1968,1.85,
2,AUS,HUR,TOT,PC_LF,A,1969,1.8,
3,AUS,HUR,TOT,PC_LF,A,1970,1.625,
4,AUS,HUR,TOT,PC_LF,A,1971,1.925,


In [None]:
# https://data.oecd.org/emp/employment-rate.htm
## OECD (2021), Employment rate (indicator). doi: 10.1787/1de68a9b-en (Accessed on 07 August 2021)
# Definition of Employment rate
# Employment rates are defined as a measure of the extent to which available labour resources (people available to work) are being used. They are calculated as the ratio of the employed to the working age population. Employment rates are sensitive to the economic cycle, but in the longer term they are significantly affected by governments' higher education and income support policies and by policies that facilitate employment of women and disadvantaged groups. Employed people are those aged 15 or over who report that they have worked in gainful employment for at least one hour in the previous week or who had a job but were absent from work during the reference week. The working age population refers to people aged 15 to 64. This indicator is seasonally adjusted and it is measured in terms of thousand persons aged 15 and over; and in numbers of employed persons aged 15 to 64 as a percentage of working age population.

In [12]:
csv_file_emp ="../group_project_2_copy/Resources/employment.csv"
employment_df= pd.read_csv(csv_file_emp)
employment_df.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,EMP,TOT,PC_WKGPOP,A,1979,64.38335,
1,AUS,EMP,TOT,PC_WKGPOP,A,1980,65.17445,
2,AUS,EMP,TOT,PC_WKGPOP,A,1981,65.36742,
3,AUS,EMP,TOT,PC_WKGPOP,A,1982,64.16441,
4,AUS,EMP,TOT,PC_WKGPOP,A,1983,61.94022,


In [10]:
rds_connection_string = "postgres:Class123@localhost:5432/stock_exchange"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [11]:
engine.table_names()

['indexinfo', 'indexdata', 'redux_unemp']

In [14]:
unemployment_raw_df.to_sql(name='raw_unemployment', con=engine, if_exists='append', index=True,index_label='id')

In [16]:
employment_raw_df.to_sql(name='raw_employment', con=engine, if_exists='append', index=True, index_label='id')

In [17]:
pd.read_sql_query(f'select * from raw_unemployment', con=engine).head()

Unnamed: 0,id,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,0,AUS,HUR,TOT,PC_LF,A,1967,1.875,
1,1,AUS,HUR,TOT,PC_LF,A,1968,1.85,
2,2,AUS,HUR,TOT,PC_LF,A,1969,1.8,
3,3,AUS,HUR,TOT,PC_LF,A,1970,1.625,
4,4,AUS,HUR,TOT,PC_LF,A,1971,1.925,


In [18]:
pd.read_sql_query(f'select * from raw_employment', con=engine).head()

Unnamed: 0,id,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,0,AUS,EMP,TOT,PC_WKGPOP,A,1979,64.38335,
1,1,AUS,EMP,TOT,PC_WKGPOP,A,1980,65.17445,
2,2,AUS,EMP,TOT,PC_WKGPOP,A,1981,65.36742,
3,3,AUS,EMP,TOT,PC_WKGPOP,A,1982,64.16441,
4,4,AUS,EMP,TOT,PC_WKGPOP,A,1983,61.94022,


In [31]:
# removing countries that do not have possible stock exchange ties (e.g., Australia & New Zealand)
query="""select * from raw_unemployment where "LOCATION" <> 'AUS' OR "LOCATION" <> 'CHL' OR "LOCATION" <> 'COL' OR "LOCATION" <> 'CRI' OR "LOCATION" <> 'ISR' OR "LOCATION" <> 'MEX' OR "LOCATION" <> 'NZL'"""
redux_unemp_df=pd.read_sql_query(query, con=engine)

In [32]:
redux_unemp_df

Unnamed: 0,id,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,0,AUS,HUR,TOT,PC_LF,A,1967,1.875000,
1,1,AUS,HUR,TOT,PC_LF,A,1968,1.850000,
2,2,AUS,HUR,TOT,PC_LF,A,1969,1.800000,
3,3,AUS,HUR,TOT,PC_LF,A,1970,1.625000,
4,4,AUS,HUR,TOT,PC_LF,A,1971,1.925000,
...,...,...,...,...,...,...,...,...,...
1315,1315,CRI,HUR,TOT,PC_LF,A,2015,9.606182,
1316,1316,CRI,HUR,TOT,PC_LF,A,2016,9.472075,
1317,1317,CRI,HUR,TOT,PC_LF,A,2017,9.174437,
1318,1318,CRI,HUR,TOT,PC_LF,A,2018,10.178950,


In [33]:
# keeping the countries relevant to our stock exchanges
query1="""select * from unemployment where "LOCATION" = 'USA' OR "LOCATION" = 'CAN' OR "LOCATION" = 'CHE' OR "LOCATION" = 'DEU' OR "LOCATION" = 'JPN' OR "LOCATION" = 'KOR' OR "LOCATION" = 'EA19' OR "LOCATION" = 'EU27_2020'"""
redux_unemp_df=pd.read_sql_query(query1, con=engine)

In [34]:
redux_unemp_df

Unnamed: 0,id,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,117,CAN,HUR,TOT,PC_LF,A,1955,4.408333,
1,118,CAN,HUR,TOT,PC_LF,A,1956,3.433333,
2,119,CAN,HUR,TOT,PC_LF,A,1957,4.650000,
3,120,CAN,HUR,TOT,PC_LF,A,1958,7.066667,
4,121,CAN,HUR,TOT,PC_LF,A,1959,5.991667,
...,...,...,...,...,...,...,...,...,...
309,1306,EU27_2020,HUR,TOT,PC_LF,A,2015,10.075000,
310,1307,EU27_2020,HUR,TOT,PC_LF,A,2016,9.175000,
311,1308,EU27_2020,HUR,TOT,PC_LF,A,2017,8.175000,
312,1309,EU27_2020,HUR,TOT,PC_LF,A,2018,7.300000,


In [35]:
redux_unemp_df.to_sql(name='redux_unemp', con=engine, if_exists='append', index=False)

In [None]:
# keeping the countries relevant to our stock exchanges
query2="""select * from raw_employment where "LOCATION" = 'USA' OR "LOCATION" = 'CAN' OR "LOCATION" = 'CHE' OR "LOCATION" = 'DEU' OR "LOCATION" = 'JPN' OR "LOCATION" = 'KOR' OR "LOCATION" = 'EA19' OR "LOCATION" = 'EU27_2020'"""
redux_emp_df=pd.read_sql_query(query2, con=engine)

In [None]:
redux_emp_df

In [None]:
redux_emp_df.to_sql(name='redux_unemp', con=engine, if_exists='append', index=False)

In [None]:
# creating dictionary key to map countries
conversion_dict = {
    'Country': [], # this is standard country or group names (few different Euro groups)
    'Job_Loc': [], # this is the abbreviation from jobs files
    'Stock_Region': [], # this is Region from indexInfo
    'Unemp': [], # boolean
    'Emp': [] # boolean 
}