<a href="https://colab.research.google.com/github/Datangels/COVID19_analysis/blob/master/CoronaVirus_GLOBAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **DEFINE LIBRARIES**

In [0]:
import pandas as pd
import numpy as np
import io
import requests
from google.colab import files
pd.options.mode.chained_assignment = None
! pip install geopandas

# **DOWNLOAD REGION DATA**

In [0]:
url_first_part = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'

file_dates = ['02-02-2020.csv', '02-03-2020.csv', '02-04-2020.csv',
              '02-05-2020.csv', '02-06-2020.csv', '02-07-2020.csv', '02-08-2020.csv', '02-09-2020.csv', '02-10-2020.csv', '02-11-2020.csv',
              '02-12-2020.csv', '02-13-2020.csv', '02-14-2020.csv', '02-15-2020.csv', '02-16-2020.csv', '02-17-2020.csv', '02-18-2020.csv',
              '02-19-2020.csv', '02-20-2020.csv', '02-21-2020.csv', '02-22-2020.csv', '02-23-2020.csv', '02-24-2020.csv', '02-25-2020.csv',
              '02-26-2020.csv', '02-27-2020.csv', '02-28-2020.csv', '02-29-2020.csv', '03-01-2020.csv', '03-02-2020.csv', '03-03-2020.csv',
              '03-04-2020.csv', '03-05-2020.csv', '03-06-2020.csv', '03-07-2020.csv', '03-08-2020.csv', '03-09-2020.csv', '03-10-2020.csv',
              '03-11-2020.csv', '03-12-2020.csv', '03-13-2020.csv', '03-14-2020.csv', '03-15-2020.csv', '03-16-2020.csv', '03-17-2020.csv',
              '03-18-2020.csv', '03-19-2020.csv', '03-20-2020.csv', '03-21-2020.csv', '03-22-2020.csv', '03-23-2020.csv', '03-24-2020.csv']


full_data_list = []

for i in range(0, len(file_dates)):
  # print('Processing: ' + file_dates[i])
  url_complete = url_first_part + file_dates[i]
  response = requests.get(url_complete).content
  temp_df = pd.read_csv(io.StringIO(response.decode('utf-8')))
  try:
    temp_df['Last Update'] = temp_df['Last_Update']
    temp_df['Country/Region'] = temp_df['Country_Region']
    temp_df['Province/State'] = temp_df['Province_State']
  except:
    pass
  full_data_list.append(temp_df)

global_df = pd.concat(full_data_list, ignore_index=True)

# global_df.to_csv('global_df.csv')
# files.download('province_df.csv')

# **FIX AND ADAPT DATA**

In [0]:
global_df_to_manage = global_df

global_df_to_manage = global_df_to_manage.fillna(0)

global_df_to_manage['Last Update'] = global_df_to_manage['Last Update'].str[:10]

global_df_to_manage = global_df_to_manage[global_df_to_manage['Last Update'].str.contains('2020')]

global_df_to_manage = global_df_to_manage.groupby(['Last Update', 'Country/Region']).agg({'Confirmed':'sum','Deaths':'sum','Recovered':'sum'}).reset_index()

global_df_to_manage['days_from_last_obs'] = (pd.to_datetime(global_df_to_manage['Last Update'].max()) - pd.to_datetime(global_df_to_manage['Last Update'], errors='coerce')).dt.days

# **ANALYZE DATA**

In [0]:
kpi_columns = ['Confirmed', 'Deaths', 'Recovered']

global_df_to_manage_daily = pd.DataFrame()

temp_global_daily_df = global_df_to_manage.sort_values(by=['Country/Region', 'days_from_last_obs'], ascending=[False, False])

for q in range(0, len(kpi_columns)): 
  temp_global_daily_df['d_' + kpi_columns[q]] = temp_global_daily_df.groupby(['Country/Region'])[kpi_columns[q]].diff().fillna(0).replace([np.inf, -np.inf], 0)
  temp_global_daily_df['dp_' + kpi_columns[q]] = temp_global_daily_df.groupby(['Country/Region'])[kpi_columns[q]].pct_change().apply(lambda x: round(x, 2)* 100).fillna(0).replace([np.inf, -np.inf], 0)
global_df_to_manage_daily = global_df_to_manage_daily.append(temp_global_daily_df)

# **SINGLE COUNTRY RECAP**

In [148]:
COLUMNS_TO_DISPLAY = ['days_from_last_obs', 'Last Update', 'Country/Region', 'Confirmed', 'd_Confirmed', 'dp_Confirmed']

RECAP_COUNTRY = 'Argentina'

LAST_n_DAYS = 10

global_df_to_manage_recap = global_df_to_manage_daily[COLUMNS_TO_DISPLAY]
global_df_to_manage_recap[(global_df_to_manage_recap['Country/Region'] == RECAP_COUNTRY) & (global_df_to_manage_recap['days_from_last_obs'] < LAST_n_DAYS)]

Unnamed: 0,days_from_last_obs,Last Update,Country/Region,Confirmed,d_Confirmed,dp_Confirmed
1311,9,2020-03-15,Argentina,45,11.0,32.0
1400,8,2020-03-16,Argentina,56,11.0,24.0
1523,7,2020-03-17,Argentina,68,12.0,21.0
1643,6,2020-03-18,Argentina,79,11.0,16.0
1768,5,2020-03-19,Argentina,97,18.0,23.0
1894,4,2020-03-20,Argentina,128,31.0,32.0
2021,3,2020-03-21,Argentina,158,30.0,23.0
2158,1,2020-03-23,Argentina,266,108.0,68.0
2326,0,2020-03-24,Argentina,387,121.0,45.0


# **MULTI COUNTRY RECAP**

In [0]:
COLUMNS_TO_DISPLAY = ['days_from_last_obs', 'Last Update', 'Country/Region', 'Confirmed', 'd_Confirmed', 'dp_Confirmed']

RECAP_COUNTRIES = ['Mexico', 'Japan', 'South Africa', 'Australia', 'Uruguay']

DAYS_AGO = 0

global_df_to_manage_daily_recap = global_df_to_manage_daily[COLUMNS_TO_DISPLAY]
global_df_to_manage_daily_recap[(global_df_to_manage_daily_recap['days_from_last_obs'] == DAYS_AGO) & (global_df_to_manage_daily_recap['Country/Region'].isin(RECAP_COUNTRIES))]

# **SHOW PIVOT**

In [0]:
PIVOT_MAIN_KPI = 'dp_Confirmed'

PIVOT_COUNTIRES = ['Italy', 'Spain', 'Germany', 'France', 'Switzerland']

LAST_n_DAYS = 20

global_df_to_manage_daily_for_pivot = global_df_to_manage_daily.groupby(['Country/Region', 'days_from_last_obs']).last().reset_index()
global_df_to_manage_daily_pivot = global_df_to_manage_daily_for_pivot[global_df_to_manage_daily_for_pivot['Country/Region'].isin(PIVOT_COUNTIRES)].pivot(index='Country/Region', columns='days_from_last_obs', values=[PIVOT_MAIN_KPI]).fillna(0)
global_df_to_manage_daily_pivot[global_df_to_manage_daily_pivot.columns[::-1][len(list(global_df_to_manage_daily_pivot.columns))-LAST_n_DAYS:]]

# **PLOT GEODATA**

In [0]:
import os
import numpy as np
import folium
from folium import plugins
import geopandas as gpd

In [0]:
# Download Geodata
file_name = 'https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson'
global_geo_data = gpd.read_file(file_name)
global_geo_data['Country/Region'] = global_geo_data['ADMIN'].str.lower()

In [0]:
KPI_MAP = 'dp_Confirmed'
DAYS_AGO = 0 # 0 is the last data point

map_global = folium.Map([41.9028, 12.4964], zoom_start=3)

popups = global_df_to_manage_daily[global_df_to_manage_daily['days_from_last_obs'] == DAYS_AGO]
popups = popups[['Country/Region', KPI_MAP]]

popups.columns = ['Country/Region', KPI_MAP]
popups['Country/Region'] = popups['Country/Region'].str.lower()

global_geo_data_short = global_geo_data.merge(popups,on="Country/Region")

global_geo_data_short = global_geo_data_short[['Country/Region', KPI_MAP, 'geometry']]

folium.Choropleth(
    geo_data=global_geo_data_short,
    data=global_geo_data_short,
    columns=['Country/Region', KPI_MAP],
    key_on='feature.properties.Country/Region',
    fill_color='YlOrRd'
).add_to(map_global)

# COLAB IS CRASHING WHEN TRYING TO ADD OVERLAP LABELS SAVE

map_global