# Load Dependencies and Data

In [218]:
# Dependencies
import pandas as pd
import numpy as np
import datetime

In [219]:
# Store filepath in a variable
BigMac = "Resources/BigmacPrice.csv"
MeatConsumption = "Resources/meat_consumption.csv"
MinWage = "Resources/Min_wage.csv"
AvgIncome = "Resources/avg_income.csv"

In [220]:
# Read our Data file with the pandas library
# Not every CSV requires an encoding, but be aware this can come up
BigMac_df = pd.read_csv(BigMac)
MeatConsumption_df = pd.read_csv(MeatConsumption)
MinWage_df = pd.read_csv(MinWage)
GDP_df = pd.read_csv(GDP)
AvgIncome_df = pd.read_csv(AvgIncome)

# Preview data

In [221]:
BigMac_df.head()

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price
0,4/1/00,ARS,Argentina,2.5,1,2.5
1,4/1/00,AUD,Australia,2.59,1,2.59
2,4/1/00,BRL,Brazil,2.95,1,2.95
3,4/1/00,GBP,Britain,1.9,1,1.9
4,4/1/00,CAD,Canada,2.85,1,2.85


In [222]:
MeatConsumption_df.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2000,26.515,
1,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2001,24.606,
2,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2002,25.263,
3,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2003,25.958,
4,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2004,28.719,


In [223]:
MinWage_df.head()

Unnamed: 0,COUNTRY,Country,SERIES,Series,PERIOD,Pay period,TIME,Time,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2001,2001,USD,US Dollar,0,Units,,,6.97,,
1,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2002,2002,USD,US Dollar,0,Units,,,6.9,,
2,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2003,2003,USD,US Dollar,0,Units,,,6.83,,
3,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2004,2004,USD,US Dollar,0,Units,,,7.01,,
4,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2005,2005,USD,US Dollar,0,Units,,,7.33,,


In [224]:
AvgIncome_df.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,AVWAGE,TOT,USD,A,2000,45864.224684,
1,AUS,AVWAGE,TOT,USD,A,2001,46325.645206,
2,AUS,AVWAGE,TOT,USD,A,2002,46714.713801,
3,AUS,AVWAGE,TOT,USD,A,2003,47349.662983,
4,AUS,AVWAGE,TOT,USD,A,2004,48813.62697,


# Create/cleanup tables for database

In [225]:
#Create country_code table from MinWage_df
countries_df = MinWage_df[['Country','COUNTRY']].drop_duplicates().set_index(['Country']).sort_index()
countries_df = countries_df.rename(columns={'COUNTRY': 'country_code'})
countries_df.head()

Unnamed: 0_level_0,country_code
Country,Unnamed: 1_level_1
Australia,AUS
Belgium,BEL
Brazil,BRA
Canada,CAN
Chile,CHL


In [226]:
#BigMac_df cleanup
#Change date to yyyy
BigMac_df['date'] = pd.to_datetime(BigMac_df['date'])
BigMac_df['date'] = pd.DatetimeIndex(BigMac_df['date']).year
#rename 'name' to 'country' and rearrange columns
BigMac_df = BigMac_df.rename(columns={"name": "country"})
BigMac_df = BigMac_df[['country', 'date', 'currency_code', 'local_price', 'dollar_ex', 'dollar_price']]
#preview cleaned up df
BigMac_df.head()

Unnamed: 0,country,date,currency_code,local_price,dollar_ex,dollar_price
0,Argentina,2000,ARS,2.5,1,2.5
1,Australia,2000,AUD,2.59,1,2.59
2,Brazil,2000,BRL,2.95,1,2.95
3,Britain,2000,GBP,1.9,1,1.9
4,Canada,2000,CAD,2.85,1,2.85


In [227]:
#MeatConsumption_df cleanup
#Rename 'location' to 'country code' and 'time' to 'year'
MeatConsumption_df = MeatConsumption_df.rename(columns={"LOCATION": "country_code", "TIME": "year", "Value": "KG_person"})
#Drop Indicator, Subject, Measure, Frequency, Flag Codes
MeatConsumption_df = MeatConsumption_df.drop(columns=['INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'])
#Preview cleaned table
MeatConsumption_df.head()

Unnamed: 0,country_code,year,KG_person
0,AUS,2000,26.515
1,AUS,2001,24.606
2,AUS,2002,25.263
3,AUS,2003,25.958
4,AUS,2004,28.719


In [228]:
#Drop SERIES, series, PERIOD, pay period, TIME, Unit Code, Unit, Power Code, Power Code, Reference Period, reference period, flag code, flags
MinWage_df = MinWage_df.drop(columns=['COUNTRY', 'SERIES', 'Series', 'Pay period', 'PERIOD', 'Time', 'Unit Code', 'Unit', 'PowerCode Code', 'PowerCode', 'Reference Period Code', 'Flag Codes', 'Flags', 'Reference Period'])
#Rename TIME to year and Value is hourly wage
MinWage_df = MinWage_df.rename(columns={'TIME': 'year', 'Value': 'hourly_wage'})
MinWage_df.head()

Unnamed: 0,Country,year,hourly_wage
0,Spain,2001,6.97
1,Spain,2002,6.9
2,Spain,2003,6.83
3,Spain,2004,7.01
4,Spain,2005,7.33


In [229]:
#AvgIncome_df Cleanup
#Pull in location, time, and value columns
AvgIncome_df = AvgIncome_df[['LOCATION', 'TIME', 'Value']]
#rename columns for clarity and consistency
AvgIncome_df = AvgIncome_df.rename(columns={'LOCATION': 'country', 'TIME': 'year', 'Value': 'avg_income'})
#Round income column
AvgIncome_df.round(2)
#Display preview of cleaned table
AvgIncome_df.head()

Unnamed: 0,country,year,avg_income
0,AUS,2000,45864.224684
1,AUS,2001,46325.645206
2,AUS,2002,46714.713801
3,AUS,2003,47349.662983
4,AUS,2004,48813.62697
