In [1]:
#Create Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
from scipy.stats import linregress

In [2]:
#Read in DataFrames

#GDP per Capita
gdp_per_capita_file = "Baseline_Indicator_Data/GDP_per_Capita/GDP_per_Capita_Data.csv"
gdp_per_capita_df = pd.read_csv(gdp_per_capita_file, skiprows = 4)

#GNI per Capita
gni_per_capita_file = "Baseline_Indicator_Data/GNI_per_Capita/GNI_per_Capita_Data.csv"
gni_per_capita_df = pd.read_csv(gni_per_capita_file, skiprows = 4)

#Life Expectancy
life_expectancy_file = "Baseline_Indicator_Data/Life_Expectancy/Life_Expectancy_Data.csv"
life_expectancy_df = pd.read_csv(life_expectancy_file, skiprows = 4)

#Infant Mortality
infant_mortality_file = "Baseline_Indicator_Data/Infant_Mortality/Infant_Mortality_Data.csv"
infant_mortality_df = pd.read_csv(infant_mortality_file, skiprows = 4)

#Exports
exports_file = "Baseline_Indicator_Data/Exports/Export_Data.csv"
exports_df = pd.read_csv(exports_file, skiprows = 4)

#Imports
imports_file = "Baseline_Indicator_Data/Imports/Import_Data.csv"
imports_df = pd.read_csv(imports_file, skiprows = 4)

In [3]:
#Add Suffixes to Columns Except Country Code
#Code found on https://stackoverflow.com/questions/34049618/how-to-add-a-suffix-or-prefix-to-each-column-name
#Add a Suffix to Differentiate Columns For Merge
gdp_per_capita_df = gdp_per_capita_df.add_suffix("_GDP_Cap")
#Drop Suffix from 'CountryCode' column to Allow Merge
gdp_per_capita_df = gdp_per_capita_df.rename(columns = {"CountryCode_GDP_Cap":"CountryCode"})

#Add a Suffix to Differentiate Columns For Merge
life_expectancy_df = life_expectancy_df.add_suffix("_Life")
#Drop Suffix from 'CountryCode' column to Allow Merge
life_expectancy_df = life_expectancy_df.rename(columns = {"CountryCode_Life":"CountryCode"})

#Add a Suffix to Differentiate Columns For Merge
infant_mortality_df = infant_mortality_df.add_suffix("_Mortality")
#Drop Suffix from 'CountryCode' column to Allow Merge
infant_mortality_df = infant_mortality_df.rename(columns = {"CountryCode_Mortality":"CountryCode"})

#Add a Suffix to Differentiate Columns For Merge
exports_df = exports_df.add_suffix("_Exports")
#Drop Suffix from 'CountryCode' column to Allow Merge
exports_df = exports_df.rename(columns = {"CountryCode_Exports":"CountryCode"})

#Add a Suffix to Differentiate Columns For Merge
imports_df = imports_df.add_suffix("_Imports")
#Drop Suffix from 'CountryCode' column to Allow Merge
imports_df = imports_df.rename(columns = {"CountryCode_Imports":"CountryCode"})

In [4]:
#Merge the DataFrames
#GDP per Capita & Life Expectancy
summary_df = pd.merge(gdp_per_capita_df, life_expectancy_df, how = "inner", on = "CountryCode")
summary_df.to_csv("World_Development_indicators.csv")
summary_df

PermissionError: [Errno 13] Permission denied: 'World_Development_indicators.csv'

In [None]:
#Merge the DataFrames
#Summary & Infant Mortality
summary_df = pd.merge(summary_df, infant_mortality_df, how = "inner", on = "CountryCode")
summary_df.to_csv("World_Development_indicators.csv")
summary_df

In [None]:
#Merge the DataFrames
#Summary & Exports
summary_df = pd.merge(summary_df, exports_df, how = "inner", on = "CountryCode")
summary_df.to_csv("World_Development_indicators.csv")
summary_df

In [None]:
#Merge the DataFrames
#Summary & Imports Mortality
summary_df = pd.merge(summary_df, imports_df, how = "inner", on = "CountryCode")
summary_df.to_csv("World_Development_indicators.csv")
summary_df

In [None]:
#Create a Scatter Plot of Exports vs GDP per Capita
#Drop Null Values
summary_df1 = summary_df.dropna(how = "any")

#1990
#Apply Mask to Handle NaN Data
#Code Found on https://stackoverflow.com/questions/13643363/linear-regression-of-arrays-containing-nans-in-python-numpy
mask = ~np.isnan(summary_df['1990_GDP_Cap']) & ~np.isnan(summary_df['1990_Exports'])
#Make the Regression Parameters
(slope, intercept, rvalue, pvalue, stderr) = linregress(summary_df['1990_GDP_Cap'][mask], summary_df['1990_Exports'][mask])
#Calculate the Regress Values
regress_1990 = slope * summary_df['1990_GDP_Cap'] + intercept
#Create the Line Equation
line_eq_1990 = "y= " + str(round(slope,0)) + "x+ " + str(round(intercept,0))
#Plot the Export vs GDP per Capita Data
plt.scatter(summary_df['1990_GDP_Cap'], summary_df['1990_Exports'], c = "blue", edgecolors = "black")
#Plot the Regress Values
plt.plot(summary_df['1990_GDP_Cap'], regress_1990, c = "red")
#Annotate the Line Equation
plt.annotate(line_eq_1990, xy = (15000, 2.5*10**12), fontsize = 15, color = "red")
#Create the Labels
plt.title("Exports vs GDP per Capita - 1990")
plt.xlabel("GDP per Capita (Current US$)")
plt.ylabel("Exports of Goods and Services (Current US$) * 10**12")
#Display the R squared Value
print(f"The Rsquared value is {round(rvalue,2)}.")
#Save the Plot as a PNG
plt.savefig("Exports_vs_GDP_per_Capita_1990.png")
#Show the Plot
plt.show()


#2017
#Apply Mask to Handle NaN Data
#Code Found on https://stackoverflow.com/questions/13643363/linear-regression-of-arrays-containing-nans-in-python-numpy
mask = ~np.isnan(summary_df['2017_GDP_Cap']) & ~np.isnan(summary_df['2017_Exports'])
#Make the Regression Parameters
(slope, intercept, rvalue, pvalue, stderr) = linregress(summary_df['2017_GDP_Cap'][mask], summary_df['2017_Exports'][mask])
#Calculate the Regress Values
regress_2017 = slope * summary_df['2017_GDP_Cap'] + intercept
#Create the Line Equation
line_eq_2017 = "y= " + str(round(slope,0)) + "x+ " + str(round(intercept,0))
#Plot the Export vs GDP per Capita Data
plt.scatter(summary_df['2017_GDP_Cap'], summary_df['2017_Exports'], c = "red", edgecolors = "black")
#Plot the Regress Values
plt.plot(summary_df['2017_GDP_Cap'], regress_1990, c = "red")
#Annotate the Line Equation
plt.annotate(line_eq_2017, xy = (40000, 1.0*10**13), fontsize = 15, color = "red")
#Create the Labels
plt.title("Exports vs GDP per Capita - 2017")
plt.xlabel("GDP per Capita (Current US$)")
plt.ylabel("Exports of Goods and Services (Current US$) * 10 **12")
#Display the R squared Value
print(f"The Rsquared value is {round(rvalue,2)}.")
#Save the Plot as a PNG
plt.savefig("Exports_vs_GDP_per_Capita_2017")
#Show the Plot
plt.show()

In [None]:
#Create a Scatter Plot of Imports vs GDP per Capita
plt.scatter(summary_df['1990_GDP_Cap'], summary_df['1990_Imports'], c = "blue", edgecolors = "black")
#Labels
plt.title("Imports vs GDP per Capita - 1990")
plt.xlabel("GDP per Capita (Current US$)")
plt.ylabel("Imports of Goods and Services (Current US$) * 10**12")
#Save the Plot as a PNG
plt.savefig("Imports_vs_GDP_per_Capita_1990.png")
#Show the Plot
plt.show()


