# [INDEX](#INDEX)
* [Time_Series](#Time_Series)
* [Country_Code](#Country_Code)
* [Day_Change](#Day_Change)
* [Death_Rate](#Death_Rate)
* [Country_Profiles](#Country_Profiles)
* [gapminder](#gapminder)
* [US_by_County](#US_by_County)
* [UPLOAD](#UPLOAD)


In [1]:
%matplotlib inline
import numpy as np
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import random
import statsmodels.api as sm
import os
import re
import io
import requests
import sys
import math

sns.set(style="darkgrid")

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


# [Time_Series](#INDEX)

In [2]:

url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
s=requests.get(url).content
data_confirmed=pd.read_csv(io.StringIO(s.decode('utf-8')))

url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
s=requests.get(url).content
data_death= pd.read_csv(io.StringIO(s.decode('utf-8')))

url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
s=requests.get(url).content
data_recovered= pd.read_csv(io.StringIO(s.decode('utf-8')))

In [3]:
last_day= data_confirmed.columns[-1]
first_day= data_confirmed.columns[4]
days = data_confirmed.columns[4::]

# [Country_Code](#INDEX)

In [4]:
import io
import requests
url="https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv"
s=requests.get(url).content
country_code=pd.read_csv(io.StringIO(s.decode('utf-8')))
#country_code.to_csv('./resources/country_codes.csv')

In [5]:
country_code.loc[country_code['alpha-3']== 'GBR', 'name'] = 'United Kingdom'
country_code.loc[country_code['alpha-3']== 'USA', 'name'] = 'US'
country_code.loc[country_code['alpha-3']== 'KOR', 'name'] = 'Korea, South'
country_code.loc[country_code['alpha-3']== 'IRN', 'name'] = 'Iran'
country_code.loc[country_code['alpha-3']== 'VEN', 'name'] = 'Venezuela'
country_code.loc[country_code['alpha-3']== 'BOL', 'name'] = 'Bolivia'
country_code.loc[country_code['alpha-3']== 'BRN', 'name'] = 'Brunei'
country_code.loc[country_code['alpha-3']== 'LAO', 'name'] = 'Laos'
country_code.loc[country_code['alpha-3']== 'COG', 'name'] = 'Congo (Kinshasa)'
country_code.loc[country_code['alpha-3']== 'COD', 'name'] = 'Congo (Brazzaville)'
country_code.loc[country_code['alpha-3']== 'RUS', 'name'] = 'Russia'
country_code.loc[country_code['alpha-3']== 'CIV', 'name'] = 'Cote d\'Ivoire'
country_code.loc[country_code['alpha-3']== 'MDA', 'name'] = 'Moldova'
country_code.loc[country_code['alpha-3']== 'TWN', 'name'] = 'Taiwan*'
country_code.loc[country_code['alpha-3']== 'TZA', 'name'] = 'Tanzania'
country_code.loc[country_code['alpha-3']== 'VNM', 'name'] = 'Vietnam'
country_code.loc[country_code['alpha-3']== 'SYR', 'name'] = 'Syria'
country_code.loc[country_code['alpha-3']== 'PSE', 'name'] = 'West Bank and Gaza'
country_code.loc[country_code['alpha-3']== 'SRB', 'name'] = 'Serbia'
country_code.loc[country_code['alpha-3']== 'MMR', 'name'] = 'Burma'

In [6]:
confirmed =pd.merge(country_code[['name','alpha-3']], data_confirmed,  how='right', right_on='Country/Region', left_on = 'name')
deaths =pd.merge(country_code[['name','alpha-3']], data_death,  how='right', right_on='Country/Region', left_on = 'name')
recovered =pd.merge(country_code[['name','alpha-3']], data_recovered,  how='right', right_on='Country/Region', left_on = 'name')
# manually adding country_codes for Kosovo
confirmed.loc[confirmed['Country/Region']=='Kosovo','alpha-3'] = 'XKS'
confirmed[confirmed['alpha-3'].isnull()] # We are going to ingore these since they are boats, not countries

Unnamed: 0,name,alpha-3,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
263,,,,Diamond Princess,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,61,61,64,135,135,175,175,218,285,355,454,542,621,634,634,634,691,691,691,705,705,705,705,705,705,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712
265,,,,MS Zaandam,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9


In [64]:
confirmed_ = confirmed.iloc[:,1::]
confirmed_.to_csv('./resources/confirmed_time_series.csv')
deaths_ = deaths.iloc[:,1::]
deaths_.to_csv('./resources/deaths_time_series.csv')
recovered_ = recovered.iloc[:,1::]
recovered_.to_csv('./resources/recovered_time_series.csv')

In [8]:
#confirmed_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().drop(['alpha-3','Lat', 'Long'], axis = 1)

In [9]:
# Latest Confirmed Numbers
confirmed_last = confirmed_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().iloc[:,[0,1,2,3,-1]] 

In [10]:
# Latest Deaths Numbers
deaths_last = deaths_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().iloc[:,[0,1,2,3,-1]]

In [11]:
# Latest Recovered Numbers
recovered_last = recovered_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().iloc[:,[0,1,2,3,-1]]

In [12]:
last_day= confirmed_last.columns[-1]
top_20 = confirmed_last.sort_values(by= last_day,ascending = False).iloc[0:20,:].set_index(keys='Country/Region')
top_20

Unnamed: 0_level_0,alpha-3,Lat,Long,5/9/20
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
US,USA,37.0902,-95.7129,1309550.0
Spain,ESP,40.0,-4.0,223578.0
Italy,ITA,43.0,12.0,218268.0
Russia,RUS,60.0,90.0,198676.0
Germany,DEU,51.0,9.0,171324.0
Brazil,BRA,-14.235,-51.9253,156061.0
Turkey,TUR,38.9637,35.2433,137115.0
Iran,IRN,32.0,53.0,106220.0
Peru,PER,-9.19,-75.0152,65015.0
India,IND,21.0,78.0,62808.0


# [Day_Change](#INDEX)

In [13]:
confirmed_day_change=pd.DataFrame()
for i in range(len(days)-1):
    confirmed_day_change[days[i+1]]= confirmed[days[i+1]] -confirmed[days[i]]
confirmed_day_change_ = pd.concat((confirmed_.iloc[:,0:5], confirmed_day_change), axis = 1)
confirmed_day_change_.to_csv('./resources/confirmed_day_change.csv')

In [14]:
deaths_day_change=pd.DataFrame()
for i in range(len(days)-1):
    deaths_day_change[days[i+1]]= deaths[days[i+1]] -deaths[days[i]]
deaths_day_change_ = pd.concat((deaths_.iloc[:,0:5], deaths_day_change), axis = 1)
deaths_day_change_.to_csv('./resources/deaths_day_change.csv')

In [15]:
recovered_day_change=pd.DataFrame()
for i in range(len(days)-1):
    recovered_day_change[days[i+1]]= recovered[days[i+1]] -recovered[days[i]]
recovered_day_change_ = pd.concat((recovered_.iloc[:,0:5], recovered_day_change), axis = 1)
recovered_day_change_.to_csv('./resources/recovered_day_change.csv')

In [16]:
confirmed_day_change_last = confirmed_day_change_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().iloc[:,[0,1,2,3,-1]]

# [Death_Rate](#INDEX)

In [17]:
deaths_rate = pd.concat((deaths_.iloc[:, 0:5], deaths_.iloc[:,5::]/(confirmed_.iloc[:,5::]+1)), axis = 1)
deaths_rate.to_csv('./resources/deaths_rate.csv')

In [18]:
deaths_rate.sort_values(by= last_day, ascending = False ).iloc[:, [0,1,2,3,4, -1]].head()

Unnamed: 0,alpha-3,Province/State,Country/Region,Lat,Long,5/9/20
49,CAN,Diamond Princess,Canada,0.0,0.0,0.5
189,NIC,,Nicaragua,12.8654,-85.2072,0.294118
265,,,MS Zaandam,0.0,0.0,0.2
260,YEM,,Yemen,15.552727,48.516388,0.2
185,NLD,Sint Maarten,Netherlands,18.0425,-63.0548,0.194805


In [19]:
# Latest Deaths Rate
deaths_rate_last = deaths_rate.groupby('alpha-3').mean().reset_index().iloc[:,[0,1,2,-1]]

In [20]:
latest_summary = pd.merge(pd.merge(pd.merge(pd.merge(confirmed_last,
                                            deaths_last.iloc[:,[0,-1]],on='alpha-3'),
                                   recovered_last.iloc[:,[0,-1]],on='alpha-3'), 
                          deaths_rate_last.iloc[:,[0,-1]],on='alpha-3'),
                confirmed_day_change_last.iloc[:,[0,-1]],on='alpha-3')
latest_summary.columns = ['alpha-3', 'Country/Region', 'Lat','Long','Confirmed', 'Deaths', 'Recovered', 'Deaths_Rate', 'Day_Change']
latest_summary

Unnamed: 0,alpha-3,Country/Region,Lat,Long,Confirmed,Deaths,Recovered,Deaths_Rate,Day_Change
0,AFG,Afghanistan,33.0,65.0,4033.0,115.0,502.0,0.028508,255.0
1,AGO,Angola,-11.2027,17.8739,43.0,2.0,13.0,0.045455,0.0
2,ALB,Albania,41.1533,20.1683,856.0,31.0,627.0,0.036173,6.0
3,AND,Andorra,42.5063,1.5218,754.0,48.0,545.0,0.063576,2.0
4,ARE,United Arab Emirates,24.0,54.0,17417.0,185.0,4295.0,0.010621,624.0
5,ARG,Argentina,-38.4161,-63.6167,5776.0,300.0,1728.0,0.05193,165.0
6,ARM,Armenia,40.0691,45.0382,3175.0,44.0,1267.0,0.013854,146.0
7,ATG,Antigua and Barbuda,17.0608,-61.7964,25.0,3.0,19.0,0.115385,0.0
8,AUS,Australia,-31.996188,141.232788,867.375,12.125,767.625,0.0178,2.625
9,AUT,Austria,47.5162,14.5501,15833.0,615.0,13928.0,0.03884,59.0


# [Country_Profiles](#INDEX)
We need to do this only once.
Skip to the next line. 

In [21]:
Country_Profile_ = pd.read_csv("C:/Users/ituki/Documents/Projects/COVID19/resources/World Bank Extracts/World_Bank_Country_Profile.csv")

In [22]:
Country_Profile = Country_Profile_.iloc[0:11352, :]
measurements_list = dict(zip(Country_Profile_['Series Name'].unique(), Country_Profile_['Series Code'].unique()))
measurements_list

{'Agriculture, forestry, and fishing, value added (% of GDP)': 'NV.AGR.TOTL.ZS',
 'Annual freshwater withdrawals, total (% of internal resources)': 'ER.H2O.FWTL.ZS',
 'CO2 emissions (metric tons per capita)': 'EN.ATM.CO2E.PC',
 'GDP (current US$)': 'NY.GDP.MKTP.CD',
 'GDP growth (annual %)': 'NY.GDP.MKTP.KD.ZG',
 'GNI per capita, Atlas method (current US$)': 'NY.GNP.PCAP.CD',
 'GNI, PPP (current international $)': 'NY.GNP.MKTP.PP.CD',
 'Immunization, measles (% of children ages 12-23 months)': 'SH.IMM.MEAS',
 'Income share held by lowest 20%': 'SI.DST.FRST.20',
 'Industry (including construction), value added (% of GDP)': 'NV.IND.TOTL.ZS',
 'Inflation, GDP deflator (annual %)': 'NY.GDP.DEFL.KD.ZG',
 'Life expectancy at birth, total (years)': 'SP.DYN.LE00.IN',
 'Merchandise trade (% of GDP)': 'TG.VAL.TOTL.GD.ZS',
 'Military expenditure (% of GDP)': 'MS.MIL.XPND.GD.ZS',
 'Population density (people per sq. km of land area)': 'EN.POP.DNST',
 'Population, total': 'SP.POP.TOTL',
 'Poverty h

In [23]:
names = Country_Profile['Country Name']
names = pd.Series(names).replace({'United States' :'US', 
                                  'Iran, Islamic Rep.': 'Iran', 
                                  'Russian Federation':'Russia', 
                                  'Korea, Rep.' : 'Korea, South'})

* Total Population (SP.POP.TOTL)
* Urban population (SP.URB.TOTL)
* Urban area 
* Urban population growth (SP.URB.GROW)
* Urban population density
* Rural Population density
* Total Population density (EN.POP.DNST)
* GDP PPP (NY.GDP.MKTP.PP.CD)
* Life Expectancy at birth  (SP.DYN.LE00.IN)
* Population over 65  (SP.POP.65UP.TO)
* Coverage of Health Insurance (%) (per_si_allsi.cov_pop_tot)
* Coverage of Social Protection (%) (per_allsp.cov_pop_tot)
* Hospital Beds / 1000 ppl (SH.MED.BEDS.ZS)
* Physician / 1000 ppl  (SH.MED.PHYS.ZS)
* Vulnerable employment, total (% of total employment) (SL.EMP.VULN.ZS)

In [24]:
Total_Pop = Country_Profile[Country_Profile.loc[:,'Series Code']=='SP.POP.TOTL'].reset_index().iloc[:, 1::]

In [25]:
Urban_Pop = Country_Profile[Country_Profile.loc[:,'Series Code']=='SP.URB.TOTL'].reset_index().iloc[:, 1::]

In [26]:
Urban_Pop_Growth = Country_Profile[Country_Profile.loc[:,'Series Code']=='SP.URB.GROW'].reset_index().iloc[:, 1::]

In [27]:
Pop_Desity = Country_Profile[Country_Profile.loc[:,'Series Code']=='EN.POP.DNST'].reset_index().iloc[:, 1::]

In [28]:
GNI_Per_Capita = Country_Profile[Country_Profile.loc[:,'Series Code']=='NY.GNP.PCAP.CD'].reset_index().iloc[:, 1::]

In [29]:
Life_Expectancy = Country_Profile[Country_Profile.loc[:,'Series Code']=='SP.DYN.LE00.IN'].reset_index().iloc[:, 1::]

In [30]:
Pop_Over_65 = Country_Profile[Country_Profile.loc[:,'Series Code']=='SP.POP.65UP.TO'].reset_index().iloc[:, 1::]

In [31]:
Health_Coverage = Country_Profile[Country_Profile.loc[:,'Series Code']== 'per_si_allsi.cov_pop_tot'].reset_index().iloc[:, 1::]

In [32]:
Social_Coverage = Country_Profile[Country_Profile.loc[:,'Series Code']== 'per_allsp.cov_pop_tot'].reset_index().iloc[:, 1::]

In [33]:
Hospital_beds_per_1000 = Country_Profile[Country_Profile.loc[:,'Series Code']== 'SH.MED.BEDS.ZS'].reset_index().iloc[:, 1::]

In [34]:
Physician_per_1000 = Country_Profile[Country_Profile.loc[:,'Series Code']== 'SH.MED.PHYS.ZS'].reset_index().iloc[:, 1::]

In [35]:
Vulnerable_emp = Country_Profile[Country_Profile.loc[:,'Series Code']== 'SL.EMP.VULN.ZS'].reset_index().iloc[:, 1::]

In [36]:
# Function for getting the latest data for each country
def get_latest(df):
    latest = []
    n = len(df)
    for i in range(n):
        row = df.iloc[i,:]
        m = len(row)-1
        j= 0
        while math.isnan(row[m-1-j])&(j<m-5):
            j= j+1
        latest.append(row[m-1-j])
    return latest


In [37]:
Total_Pop_last =  pd.DataFrame(get_latest(Total_Pop), columns=['Total_Pop'])

In [38]:
Pop_Over_65_last =  pd.DataFrame(get_latest(Pop_Over_65), columns=['Pop_Over_65'])

In [39]:
Urban_Pop_last =  pd.DataFrame(get_latest(Urban_Pop), columns=['Urban_Pop'])

In [40]:
Life_Expectancy_last =  pd.DataFrame(get_latest(Life_Expectancy), columns=['Life_Expectancy'])

In [41]:
Pop_Desity_last =  pd.DataFrame(get_latest(Pop_Desity), columns=['Pop_Desity'])

In [42]:
GNI_Per_Capita_last = pd.DataFrame(get_latest(GNI_Per_Capita), columns=['GNI_Per_Capita'])

In [43]:
Health_Coverage_last = pd.DataFrame(get_latest(Health_Coverage), columns=['Health_Coverage'])

In [44]:
Social_Coverage_last = pd.DataFrame(get_latest(Social_Coverage), columns=['Social_Coverage_Coverage'])

In [45]:
Hospital_beds_per_1000_last =  pd.DataFrame(get_latest(Hospital_beds_per_1000), columns=['Hospital_beds_per_1000'])

In [46]:
Physician_per_1000_last =  pd.DataFrame(get_latest(Physician_per_1000), columns=['Physician_per_1000'])

In [47]:
Vulnerable_emp_last =  pd.DataFrame(get_latest(Vulnerable_emp), columns=['Vulnerable_emp'])

In [48]:
Country_Profile_Latest = pd.concat([Total_Pop.iloc[:,[0,1]],
                    Total_Pop_last, 
                    Pop_Over_65_last,
                    Urban_Pop_last,
                    Life_Expectancy_last, 
                    Pop_Desity_last,
                    GNI_Per_Capita_last,
                    Health_Coverage_last,
                    Social_Coverage_last,
                    Hospital_beds_per_1000_last,
                    Physician_per_1000_last,
                    Vulnerable_emp_last
                   ],
                    axis = 1,
                    ignore_index = True
                     )
Country_Profile_Latest.columns = ['Country Name', 
                  'Country Code',
                  'Total_Pop',
                  'Pop_Over_65',
                  'Urban_Pop',
                  'Life_Expectancy',
                  'Pop_Desity',
                  'GNI_Per_Capita',
                  'Health_Coverage_last',
                  'Social_Coverage_last',
                  'Hospital_beds_per_1000_last',
                  'Physician_per_1000_last',
                  'Vulnerable_emp_last'
                 ]

In [49]:
Country_Profile_Latest['Country Name']= Country_Profile_Latest['Country Name'].replace({'United States' :'US', 
                                  'Iran, Islamic Rep.': 'Iran', 
                                  'Russian Federation':'Russia', 
                                  'Korea, Rep.' : 'Korea, South'})

In [50]:
pd.DataFrame.to_csv(Country_Profile_Latest, './resources/Country_Profile.csv')

# [gapminder](#INDEX)

In [51]:
gm_temp = confirmed_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().drop(['alpha-3','Lat', 'Long'], axis = 1)

In [52]:
colnames = gm_temp.columns
dates = colnames[1::]
#dates

In [53]:
gm_confirmed = pd.melt(gm_temp, id_vars= ['Country/Region'], value_vars= dates, value_name='confirmed', var_name='date')
#gm_confirmed

In [54]:
gm_deaths_temp = deaths_.groupby(['alpha-3', 'Country/Region']).mean().reset_index().drop(['alpha-3','Lat', 'Long'], axis = 1)
gm_deaths = pd.melt(gm_deaths_temp, id_vars= ['Country/Region'], value_vars= dates, value_name='deaths', var_name='date')
#gm_deaths

In [55]:
gm_df = pd.merge(gm_deaths, gm_confirmed,left_on=['Country/Region', 'date'], right_on=['Country/Region', 'date'])
#gm_df

In [56]:
gapminder_df = pd.merge(gm_df, Country_Profile_Latest, how='left', left_on='Country/Region', right_on='Country Name').drop('Country Name', axis =1)
gapminder_df = gapminder_df.drop(["Country Code","Health_Coverage_last", "Social_Coverage_last", "Hospital_beds_per_1000_last","Physician_per_1000_last" ], axis = 1)

In [57]:
gapminder_df['Total_Pop']= gapminder_df['Total_Pop'].fillna(gapminder_df['Total_Pop'].median())
gapminder_df['Urban_Pop']= gapminder_df['Urban_Pop'].fillna(gapminder_df['Urban_Pop'].median())
gapminder_df['Pop_Over_65']= gapminder_df['Pop_Over_65'].fillna(gapminder_df['Pop_Over_65'].median())
gapminder_df['Life_Expectancy']= gapminder_df['Life_Expectancy'].fillna(gapminder_df['Life_Expectancy'].median())
gapminder_df['Pop_Desity']= gapminder_df['Pop_Desity'].fillna(gapminder_df['Pop_Desity'].median())
gapminder_df['GNI_Per_Capita']= gapminder_df['GNI_Per_Capita'].fillna(gapminder_df['GNI_Per_Capita'].median())
gapminder_df['Vulnerable_emp_last']= gapminder_df['Vulnerable_emp_last'].fillna(gapminder_df['Vulnerable_emp_last'].median())

In [58]:
gapminder_df['confirmed'] = gapminder_df['confirmed'].replace(0,1)
gapminder_df['deaths'] = gapminder_df['deaths'].replace(0,1)
gapminder_df['log_Total_Pop'] = np.log(gapminder_df['Total_Pop']+1)
gapminder_df['log_GNI_Per_Capita'] = np.log(gapminder_df['GNI_Per_Capita']+1)
gapminder_df['log_Pop_Over_65'] = np.log(gapminder_df['Pop_Over_65']+1)

In [59]:
gapminder_df.isna().sum()

Country/Region         0
date                   0
deaths                 0
confirmed              0
Total_Pop              0
Pop_Over_65            0
Urban_Pop              0
Life_Expectancy        0
Pop_Desity             0
GNI_Per_Capita         0
Vulnerable_emp_last    0
log_Total_Pop          0
log_GNI_Per_Capita     0
log_Pop_Over_65        0
dtype: int64

In [60]:
gapminder_df.to_csv('./resources/gapminder_df.csv')

## [Data Frame for Analysis](#INDEX)

In [61]:
Country_Profile_Latest = pd.read_csv('./resources/Country_Profile.csv').iloc[:, 1::]
#Country_Profile_Latest

In [62]:
df = pd.merge(Country_Profile_Latest, latest_summary, left_on='Country Code', right_on='alpha-3')
df = df.drop(['alpha-3', 'Country/Region'], axis = 1)
df = df.replace([np.inf, -np.inf], 0)
df.to_csv('./resources/Country_Summary.csv')

In [63]:
Country_Summary = pd.read_csv('./resources/Country_Summary.csv')
Country_Summary

Unnamed: 0.1,Unnamed: 0,Country Name,Country Code,Total_Pop,Pop_Over_65,Urban_Pop,Life_Expectancy,Pop_Desity,GNI_Per_Capita,Health_Coverage_last,Social_Coverage_last,Hospital_beds_per_1000_last,Physician_per_1000_last,Vulnerable_emp_last,Lat,Long,Confirmed,Deaths,Recovered,Deaths_Rate,Day_Change
0,0,Argentina,ARG,44494500.0,4946805.0,40877099.0,76.372,16.25851,12390.0,30.590558,46.3608,5.0,3.96,21.502,-38.4161,-63.6167,5776.0,300.0,1728.0,0.05193,165.0
1,1,Australia,AUS,24992370.0,3912924.0,21496436.0,82.497561,3.249129,53230.0,,,3.8,3.5874,10.729,-31.996188,141.232788,867.375,12.125,767.625,0.0178,2.625
2,2,Brazil,BRA,209469300.0,18690609.0,181335507.0,75.456,25.061716,9140.0,30.535312,53.808711,2.2,2.1499,27.524001,-14.235,-51.9253,156061.0,10656.0,61685.0,0.068281,9167.0
3,3,China,CHN,1392730000.0,152098421.0,823827650.0,76.47,148.348833,9460.0,35.579581,63.053001,4.2,1.7855,43.828001,32.828385,111.649082,2545.152,140.515152,2397.787879,0.010969,0.424242
4,4,France,FRA,66987240.0,13420643.0,53887219.0,82.52439,122.338396,41080.0,,,6.5,3.2349,7.442,8.305673,5.429018,16071.09,2392.090909,5104.363636,0.036566,52.727273
5,5,Germany,DEU,82927920.0,17797959.0,64113235.0,80.990244,237.37097,47090.0,,,8.3,4.2087,5.825,51.0,9.0,171324.0,7549.0,143300.0,0.044062,736.0
6,6,India,IND,1352617000.0,83591151.0,460295677.0,69.165,454.938073,2020.0,17.663517,93.988803,0.7,0.7776,76.695003,21.0,78.0,62808.0,2101.0,19301.0,0.033451,3113.0
7,7,Indonesia,IDN,267663400.0,15677491.0,148084795.0,71.282,147.75219,3840.0,8.178754,57.412937,1.2,0.3777,47.222001,-0.7893,113.9213,13645.0,959.0,2607.0,0.070277,533.0
8,8,Italy,ITA,60431280.0,13749132.0,42566587.0,83.243902,205.450748,33730.0,,,3.4,4.0931,17.0,43.0,12.0,218268.0,30395.0,103031.0,0.139255,1083.0
9,9,Japan,JPN,126529100.0,34892133.0,115920900.0,84.099756,347.073458,41310.0,,,13.4,2.4118,8.393,36.0,138.0,15663.0,607.0,5906.0,0.038751,88.0


# [UPLOAD](#INDEX)