<h1>Gather and Clean Data</h1>

<h4>Import Dependencies</h4>

In [1]:
import requests 
import json
from pprint import pprint
import os
import pandas as pd
import csv
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

<h4>Read in GDP csv data</h4>

In [6]:
# Import GDP data
path_gdp = os.path.join("Resources", "GDP_1991_to_2016.csv")
gdp_df = pd.read_csv(path_gdp, encoding = "ISO-8859-1", engine='python')
gdp_df = gdp_df.rename(columns={'ï»¿Country Name' : 'Country_Name' , 'Country Code': 'Country_Code', 'Indicator Name': 'Indicator_Name', 
                                'Indicator Code': 'Indicator_Code', '1991': '1991_GDPData', '1992': '1992_GDPData', '1993': '1993_GDPData', 
                                '1994' : '1994_GDPData', '1995': '1995_GDPData', '1996': '1996_GDPData', '1997': '1997_GDPData', '1998': '1998_GDPData', 
                                '1999': '1999_GDPData', '2000': '2000_GDPData', '2001': '2001_GDPData', '2002': '2002_GDPData', '2003': '2003_GDPData', 
                                '2004': '2004_GDPData', '2005': '2005_GDPData', '2006': '2006_GDPData', '2007': '2007_GDPData', '2008': '2008_GDPData', 
                                '2009': '2009_GDPData', '2010': '2010_GDPData', '2011': '2011_GDPData', '2012': '2012_GDPData', '2013': '2013_GDPData', 
                                '2014': '2014_GDPData', '2015': '2015_GDPData', '2016': '2016_GDPData'})
gdp_df.head()

Unnamed: 0,Country_Name,Country_Code,Indicator_Name,Indicator_Code,1991_GDPData,1992_GDPData,1993_GDPData,1994_GDPData,1995_GDPData,1996_GDPData,...,2007_GDPData,2008_GDPData,2009_GDPData,2010_GDPData,2011_GDPData,2012_GDPData,2013_GDPData,2014_GDPData,2015_GDPData,2016_GDPData
0,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,872138700.0,958463200.0,1082980000.0,1245688000.0,1320475000.0,1379961000.0,...,2615084000.0,2745251000.0,2498883000.0,2390503000.0,2549721000.0,2534637000.0,2701676000.0,2765363000.0,2919553000.0,2965922000.0
1,Afghanistan,AFG,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0
2,Angola,AGO,GDP (current US$),NY.GDP.MKTP.CD,10603780000.0,8307811000.0,5768720000.0,4438321000.0,5538749000.0,7526447000.0,...,65266450000.0,88538610000.0,70307160000.0,83799500000.0,111790000000.0,128053000000.0,136710000000.0,145712000000.0,116194000000.0,101124000000.0
3,Albania,ALB,GDP (current US$),NY.GDP.MKTP.CD,1099559000.0,652175000.0,1185315000.0,1880952000.0,2392765000.0,3199643000.0,...,10677320000.0,12881350000.0,12044220000.0,11926930000.0,12890770000.0,12319830000.0,12776220000.0,13228140000.0,11386850000.0,11861200000.0
4,Andorra,AND,GDP (current US$),NY.GDP.MKTP.CD,1106929000.0,1210014000.0,1007026000.0,1017549000.0,1178739000.0,1223945000.0,...,3952601000.0,4085631000.0,3674410000.0,3449967000.0,3629204000.0,3188809000.0,3193704000.0,3271808000.0,2789870000.0,2896679000.0


In [7]:
# Import Weather data
# Import temperature data
path_temp = os.path.join("Resources", "Temp_Data.csv")
temp_df = pd.read_csv(path_temp, encoding = "ISO-8859-1", engine='python')

# rename columns
temp_df = temp_df.rename(columns={'Temp_C': 'Temperature_Celsius', ' Year': 'Year', ' Statistics': 'Statistics', ' Country': 'Country', ' ISO3': 'ISO3'})
temp_df.columns

# # # Convert Temps from C to F
temp_df['Temp_Farenheit'] = ((temp_df['Temperature_Celsius']*(9/5)+32))
temp_df.head()

#  Get average temp per yr for eeach country
meanTemp_df = temp_df.groupby(['Year','Country', 'ISO3'])['Temp_Farenheit'].mean()
meanTemp_df

# # Import rainfall data
rain_df = pd.read_csv('Resources/Precip_Data.csv')
rain_df.columns
# rename columns
rain_df = rain_df.rename(columns={'Rainfall - (MM)': 'Rainfall_mm', ' Year': 'Year', ' Statistics': 'Statistics', ' Country': 'Country', ' ISO3': 'ISO3'})
rain_df.columns

#  Get average temp per yr for eeach country
meanRain_df = rain_df.groupby(['Year','Country', 'ISO3'])['Rainfall_mm'].mean()
meanRain_df

# # # # Combine temp and weather data into a single dataframe
weather_df = pd.merge(meanTemp_df, meanRain_df, how='left', on=['ISO3', 'Year', 'Country'])
weather_df = weather_df.sort_values(by=['ISO3', 'Year'])
weather_df = weather_df.reset_index()
weather_df = weather_df.dropna(how='any')
weather_df

weather_df.to_csv('Resources/weather_df.csv')

<h4>Read in Country ISO3 Codes from API source and store it in a dictionary</h4>

In [22]:
import requests

code_dict = {}
my_request_thing = [x.strip() for x in weather_df['Country'].unique().tolist()]
for name in my_request_thing:
    url = f"https://restcountries.eu/rest/v2/name/{name}?fullText=true"
    response = requests.get(url)
    print(name, end='\r')
    try:
        code_dict[name] = response.json()[0]['alpha3Code']
    except KeyError:
        code_dict[name] = None
code_dict

Zimbabwericaand the Grenadinesthe)

{'Afghanistan': 'AFG',
 'Angola': 'AGO',
 'Albania': 'ALB',
 'Andorra': 'AND',
 'United Arab Emirates': 'ARE',
 'Argentina': 'ARG',
 'Armenia': 'ARM',
 'Antigua and Barbuda': 'ATG',
 'Australia': 'AUS',
 'Austria': 'AUT',
 'Azerbaijan': 'AZE',
 'Burundi': 'BDI',
 'Belgium': 'BEL',
 'Benin': 'BEN',
 'Burkina Faso': 'BFA',
 'Bangladesh': 'BGD',
 'Bulgaria': 'BGR',
 'Bahrain': 'BHR',
 'Bahamas': 'BHS',
 'Bosnia and Herzegovina': 'BIH',
 'Belarus': 'BLR',
 'Belize': 'BLZ',
 'Bolivia': None,
 'Brazil': 'BRA',
 'Barbados': 'BRB',
 'Brunei': None,
 'Bhutan': 'BTN',
 'Botswana': 'BWA',
 'Central African Republic': 'CAF',
 'Canada': 'CAN',
 'Switzerland': 'CHE',
 'Chile': 'CHL',
 'China': 'CHN',
 "Cote d'Ivoire": 'CIV',
 'Cameroon': 'CMR',
 'Congo (Democratic Republic of the)': 'COD',
 'Congo (Republic of the)': None,
 'Colombia': 'COL',
 'Comoros': 'COM',
 'Cape Verde': None,
 'Costa Rica': 'CRI',
 'Cuba': 'CUB',
 'Cyprus': 'CYP',
 'Czech Republic': 'CZE',
 'Germany': 'DEU',
 'Djibouti': 'DJI'

In [11]:
# Get GDP Data as a dict of lists for each country plotting
YearGDPList = []
YearGDPDict = {}
gdp_edit_df = gdp_df.sort_values(by=['Country_Code'])
for index, rows in gdp_edit_df.iterrows():
    # Create list for the current row 
    CountryGDP_list =[rows['1991_GDPData'], rows['1992_GDPData'], rows['1993_GDPData'], rows['1994_GDPData'], rows['1995_GDPData'], 
                     rows['1996_GDPData'], rows['1997_GDPData'], rows['1998_GDPData'], rows['1999_GDPData'], rows['2000_GDPData'], 
                     rows['2001_GDPData'], rows['2002_GDPData'], rows['2003_GDPData'], rows['2004_GDPData'], rows['2005_GDPData'], 
                     rows['2006_GDPData'], rows['2007_GDPData'], rows['2008_GDPData'], rows['2009_GDPData'], rows['2010_GDPData'], 
                     rows['2011_GDPData'], rows['2012_GDPData'], rows['2013_GDPData'], rows['2014_GDPData'], rows['2015_GDPData'], 
                     rows['2016_GDPData']]
    
    # append the list to the final Dict 
    YearGDPDict[rows.Country_Name] = CountryGDP_list
YearGDPDict    

# Make the lists I need
YrRows_step1 = weather_df[['ISO3', 'Country', 'Year']]
YrRows_step2 = YrRows_step1.drop_duplicates(subset = ['Country', 'ISO3', 'Year'])
CountryList = YrRows_step2['Country'].unique()
ISO3List = YrRows_step2['ISO3'].unique()
YrsList = YrRows_step2['Year'].unique()

# Get Temperature Data as a dict of lists for each country plotting
CountryTempList = []
CountryTempDict = {}
for country in CountryList:
    weather_df = weather_df.sort_values(by=['ISO3'])
    Country_dfv1 = weather_df.loc[(weather_df['Year']>=1991) & (weather_df['Year']<=2017)]
    Country_dfv1 = weather_df.loc[weather_df['Country']==country]
    Country_df = Country_dfv1.sort_values(by=['Year'])
    CountryTempList = Country_df['Temp_Farenheit'].values.tolist()
    CountryTempDict[country] = CountryTempList

CountryTempDict

{' Afghanistan': [55.210077500000004,
  54.580185500000006,
  55.35884899999999,
  55.544033000000006,
  55.473611000000005,
  54.950481499999995,
  55.162229,
  56.1918785,
  57.3330575,
  56.79299600000001,
  57.21512600000002,
  56.867807,
  55.96416499999999,
  57.4775435,
  56.08794500000001,
  57.501470000000005,
  56.3200535,
  56.755069999999996,
  56.807911999999995,
  57.39057349999999,
  56.710691,
  55.3573055,
  57.230246,
  56.566340000000004,
  56.96045899999999,
  58.124417],
 ' Angola': [71.09039,
  71.49297500000002,
  71.36321000000001,
  71.356685,
  72.227525,
  71.60528,
  72.055775,
  72.83079500000001,
  71.26287500000001,
  71.464925,
  72.026045,
  72.38690000000001,
  72.46475,
  71.95104500000001,
  72.93929000000003,
  71.65607,
  71.58845000000001,
  71.34569,
  72.11462,
  72.361655,
  71.12657,
  71.38154,
  71.538035,
  71.56205,
  72.06878,
  72.60063500000001],
 ' Albania': [51.867768500000004,
  52.81496150000001,
  52.872212,
  54.54822199999999,
  

In [4]:
# Make Line plots
YearGDPDict['Andorra']
CountryTempDict[' Andorra']
PlotYrList = [1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 
             2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
plt.plot(PlotYrList, YearGDPDict['Andorra'], marker = 'o', color = 'red', linewidth =1.5, label = 'Savings')

NameError: name 'YearGDPDict' is not defined