# Load Dependencies and Data

In [865]:
# Dependencies
import pandas as pd
import numpy as np
import datetime
from pathlib import Path

In [866]:
# Store filepath in a variable
BigMac = "Resources/Original/BigmacPrice.csv"
MeatConsumption = "Resources/Original/meat_consumption.csv"
MinWage = "Resources/Original/Min_wage.csv"
AvgIncome = "Resources/Original/avg_income.csv"

In [867]:
# Read our Data file with the pandas library
# Not every CSV requires an encoding, but be aware this can come up
BigMac_df = pd.read_csv(BigMac)
MeatConsumption_df = pd.read_csv(MeatConsumption)
MinWage_df = pd.read_csv(MinWage)
AvgIncome_df = pd.read_csv(AvgIncome)

# Preview data

In [868]:
BigMac_df.head()


Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price
0,4/1/00,ARS,Argentina,2.5,1,2.5
1,4/1/00,AUD,Australia,2.59,1,2.59
2,4/1/00,BRL,Brazil,2.95,1,2.95
3,4/1/00,GBP,Britain,1.9,1,1.9
4,4/1/00,CAD,Canada,2.85,1,2.85


In [869]:
MeatConsumption_df.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2000,26.515,
1,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2001,24.606,
2,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2002,25.263,
3,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2003,25.958,
4,AUS,MEATCONSUMP,BEEF,KG_CAP,A,2004,28.719,


In [870]:
MinWage_df.head()

Unnamed: 0,COUNTRY,Country,SERIES,Series,PERIOD,Pay period,TIME,Time,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2001,2001,USD,US Dollar,0,Units,,,6.97,,
1,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2002,2002,USD,US Dollar,0,Units,,,6.9,,
2,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2003,2003,USD,US Dollar,0,Units,,,6.83,,
3,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2004,2004,USD,US Dollar,0,Units,,,7.01,,
4,ESP,Spain,PPP,In 2021 constant prices at 2021 USD PPPs,H,Hourly,2005,2005,USD,US Dollar,0,Units,,,7.33,,


In [871]:
AvgIncome_df.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,AVWAGE,TOT,USD,A,2000,45864.224684,
1,AUS,AVWAGE,TOT,USD,A,2001,46325.645206,
2,AUS,AVWAGE,TOT,USD,A,2002,46714.713801,
3,AUS,AVWAGE,TOT,USD,A,2003,47349.662983,
4,AUS,AVWAGE,TOT,USD,A,2004,48813.62697,


# Create/cleanup tables for database

In [872]:
#List of countries/codes from MinWage
country_codes = MinWage_df[['Country','COUNTRY']].drop_duplicates()
country_codes = country_codes.sort_values(['Country'])
country_codes

Unnamed: 0,Country,COUNTRY
419,Australia,AUS
461,Belgium,BEL
531,Brazil,BRA
293,Canada,CAN
146,Chile,CHL
615,Colombia,COL
636,Costa Rica,CRI
167,Czech Republic,CZE
188,Estonia,EST
21,France,FRA


In [873]:
#List of countries with currency codes from BigMac
currency_codes = BigMac_df[['name', 'currency_code']].drop_duplicates()
currency_codes = currency_codes.sort_values(['name'])
currency_codes

Unnamed: 0,name,currency_code
0,Argentina,ARS
1,Australia,AUD
522,Austria,EUR
1301,Azerbaijan,AZN
1302,Bahrain,BHD
...,...,...
27,United States,USD
159,Uruguay,UYU
88,Venezuela,VEF
1801,Venezuela,VES


In [874]:
#Check to see if Russia, South Korea, and Turkey are included
print('Russia' in BigMac_df['name'].unique())
print('South Korea'in BigMac_df['name'].unique())
print('Turkey'in BigMac_df['name'].unique())

True
True
True


In [875]:
#Change spelling of Russian Federation to 'Russia', Korea to 'South Korea', Turkiye to Turkey in country_codes
country_codes = country_codes.replace(['Korea', 'Türkiye', 'Russian Federation'], ['South Korea', 'Turkey', 'Russia']) 

In [876]:
#Create countries table from MinWage_df with country, country code, and currency code
countries_df = pd.merge(country_codes, currency_codes, left_on='Country', right_on='name')
countries_df = countries_df.rename(columns={'COUNTRY': 'country_code'}).drop(columns=['name']).set_index(['Country']).sort_index()
countries_df

Unnamed: 0_level_0,country_code,currency_code
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,AUS,AUD
Belgium,BEL,EUR
Brazil,BRA,BRL
Canada,CAN,CAD
Chile,CHL,CLP
Colombia,COL,COP
Costa Rica,CRI,CRC
Czech Republic,CZE,CZK
Estonia,EST,EUR
France,FRA,EUR


In [877]:
#create currency exchange table
#Change date to yyyy in BigMac_df
BigMac_df['date'] = pd.to_datetime(BigMac_df['date'])
BigMac_df['date'] = pd.DatetimeIndex(BigMac_df['date']).year
#merge countries_df and BigMac_df to make currency table
CurrencyEx_df = pd.merge(BigMac_df, countries_df, on='currency_code').rename(columns={'date':'year'})
CurrencyEx_df = CurrencyEx_df[['currency_code', 'year', 'dollar_ex']]
CurrencyEx_df = CurrencyEx_df.drop_duplicates(subset=['currency_code', 'year'], keep='first')
CurrencyEx_df = CurrencyEx_df.set_index('currency_code').sort_index()
CurrencyEx_df


Unnamed: 0_level_0,year,dollar_ex
currency_code,Unnamed: 1_level_1,Unnamed: 2_level_1
AUD,2000,1
AUD,2022,1
AUD,2021,1
AUD,2020,1
AUD,2019,1
...,...,...
USD,2004,1
USD,2003,1
USD,2002,1
USD,2010,1


In [878]:
#BigMac_df cleanup
#rename 'name' to 'country' and 'date' to 'year'
BigMac_df = BigMac_df.rename(columns={"name": "country", "date": "year"})
BigMac_df = BigMac_df[['country', 'year', 'dollar_price']]
#preview cleaned up df
BigMac_df = BigMac_df.set_index(['country']).sort_index()
BigMac_df

Unnamed: 0_level_0,year,dollar_price
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,2000,2.50
Argentina,2002,0.83
Argentina,2021,3.76
Argentina,2020,3.52
Argentina,2003,2.05
...,...,...
Vietnam,2022,3.05
Vietnam,2019,2.80
Vietnam,2014,2.83
Vietnam,2016,2.69


In [879]:
#MeatConsumption_df cleanup
#Rename 'location' to 'country code' and 'time' to 'year'
MeatConsumption_df = MeatConsumption_df.rename(columns={"LOCATION": "country_code", "TIME": "year", "Value": "kg_person"})
#Drop Indicator, Subject, Measure, Frequency, Flag Codes
MeatConsumption_df = MeatConsumption_df.drop(columns=['INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'])
#Preview cleaned table
MeatConsumption_df = MeatConsumption_df.set_index(['country_code']).sort_index()
MeatConsumption_df

Unnamed: 0_level_0,year,kg_person
country_code,Unnamed: 1_level_1,Unnamed: 2_level_1
ARG,2000,45.282
ARG,2003,41.864
ARG,2004,43.711
ARG,2005,42.824
ARG,2006,44.105
...,...,...
ZAF,2019,11.142
ZAF,2021,11.692
ZAF,2022,11.968
ZAF,2010,12.103


In [880]:
#Drop SERIES, series, PERIOD, pay period, TIME, Unit Code, Unit, Power Code, Power Code, Reference Period, reference period, flag code, flags
MinWage_df = MinWage_df.drop(columns=['COUNTRY', 'SERIES', 'Series', 'Pay period', 'PERIOD', 'Time', 'Unit Code', 'Unit', 'PowerCode Code', 'PowerCode', 'Reference Period Code', 'Flag Codes', 'Flags', 'Reference Period'])
#Rename TIME to year and Value is hourly wage
MinWage_df = MinWage_df.rename(columns={'TIME': 'year', 'Value': 'hourly_wage', 'Country': 'country'})
MinWage_df = MinWage_df.set_index(['country']).sort_index()
MinWage_df

Unnamed: 0_level_0,year,hourly_wage
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,2007,11.71
Australia,2006,11.33
Australia,2021,12.79
Australia,2020,12.88
Australia,2019,12.68
...,...,...
United States,2002,7.76
United States,2001,7.88
United States,2021,7.25
United States,2010,9.01


In [881]:
#AvgIncome_df Cleanup
#Pull in location, time, and value columns
AvgIncome_df = AvgIncome_df[['LOCATION', 'TIME', 'Value']]
#rename columns for clarity and consistency
AvgIncome_df = AvgIncome_df.rename(columns={'LOCATION': 'country_code', 'TIME': 'year', 'Value': 'avg_income'})
#Round income column
AvgIncome_df = AvgIncome_df.round(2)
#Display preview of cleaned table
AvgIncome_df = AvgIncome_df.set_index(['country_code']).sort_index()
AvgIncome_df

Unnamed: 0_level_0,year,avg_income
country_code,Unnamed: 1_level_1,Unnamed: 2_level_1
AUS,2000,45864.22
AUS,2020,56454.64
AUS,2019,55170.16
AUS,2018,54585.74
AUS,2017,54446.19
...,...,...
USA,2003,59033.84
USA,2002,58405.48
USA,2001,57939.66
USA,2009,62711.31


# Write cleaned data to new CSVs to import to SQL

In [882]:
#Make a path for each df
CurrencyEx_filepath = Path('Resources/Cleaned/CurrencyEx.csv') 
Countries_filepath = Path('Resources/Cleaned/Countries.csv') 
BigMac_filepath = Path('Resources/Cleaned/BigMac.csv') 
MeatConsumption_filepath = Path('Resources/Cleaned/MeatConsumption.csv') 
MinWage_filepath = Path('Resources/Cleaned/MinWage.csv') 
AvgIncome_filepath = Path('Resources/Cleaned/AvgIncome.csv') 

#write dfs to csvs via path
CurrencyEx_df.to_csv(CurrencyEx_filepath)
countries_df.to_csv(Countries_filepath)
BigMac_df.to_csv(BigMac_filepath)
MeatConsumption_df.to_csv(MeatConsumption_filepath)
MinWage_df.to_csv(MinWage_filepath)
AvgIncome_df.to_csv(AvgIncome_filepath)


