In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.patches as mpatches
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from scipy.cluster.hierarchy import dendrogram
from sklearn import metrics

In [None]:
shapefile = 'data/ne_110m_admin_0_countries.shp'

In [None]:
gdf = gpd.read_file(shapefile)[['ADMIN', 'NAME_PL', 'ADM0_A3', 'geometry']]
gdf.columns = ['country', 'country_pl', 'country_code', 'geometry']

In [None]:
gdf.head()

In [None]:
gdf.plot(figsize=(20,8), color="grey")

In [None]:
covid_data = pd.read_csv('data/cases_covid.csv')

In [None]:
merged = gdf.merge(covid_data, left_on = 'country_code', right_on = 'countryterritoryCode')
merged

In [None]:
covid_data.describe()

In [None]:
max_cases = covid_data["cases"].max()
merged[merged['cases'] == max_cases]

In [None]:
max_deaths = covid_data["deaths"].max()
merged[merged['deaths'] == max_deaths]

In [None]:
covid_data[covid_data['countryterritoryCode'] == 'CHN']

In [None]:
covid_data[covid_data['countryterritoryCode'] == 'CHN'].head(10)

In [None]:
covid_data[covid_data['countryterritoryCode'] == 'CHN'].tail(50)

In [None]:
covid_data['dateRep'] = pd.to_datetime(covid_data['dateRep'],dayfirst=True)
covid_data_date_index = covid_data.set_index('dateRep')

In [None]:
covid_data_date_index.deaths.plot(figsize=(20,8), legend=True)

In [None]:
covid_data_date_index.cases.plot(figsize=(20,8), legend=True)

In [None]:
for i in range(1, 6):
    print(i)
    print(merged[merged['month'] == i]['cases'].sum())
    

In [None]:
import sys
!conda install --yes --prefix {sys.prefix} seaborn
import seaborn as sns

In [None]:
covid_data_scaler = pd.read_csv('data/covid_data_scaler.csv')

In [None]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(6,6))
ax = Axes3D(fig)
ax.scatter(covid_data_scaler['month'], covid_data_scaler['cases'], covid_data_scaler['deaths'], depthshade=False)
ax.set_xlabel('year')
ax.set_ylabel('cases')
ax.set_zlabel('deaths')
ax.view_init(39, 30)
plt.show()

In [None]:
plt.scatter(covid_data['deaths'], covid_data['cases'])
plt.xlabel('Ofiary')
plt.ylabel('Przypadki')
plt.title('Covid')

In [None]:
covid = ['deaths', 'cases']

In [None]:
def print_clustering(covid_data_scaler, labels):
    labels_and_styles = dict(zip(range(5), list("v*+xDop")))
    
    for i in range(covid_data_scaler.shape[0]):
        lab = labels[i]
        marker = labels_and_styles.get(lab, '*')
        plt.scatter(covid_data_scaler.iloc[i, 0], covid_data_scaler.iloc[i, 1], marker=labels_and_styles[lab], c='b')
    
    plt.show()

In [None]:
k_3 = KMeans(n_clusters=3)
k_3.fit(covid_data_scaler[covid])
print_clustering(covid_data_scaler[covid], k_3.labels_)

In [None]:
k_4 = KMeans(n_clusters=4)
k_4.fit(covid_data_scaler[covid])
print_clustering(covid_data_scaler[covid], k_4.labels_)

In [None]:
k_2 = KMeans(n_clusters=2)
k_2.fit(covid_data_scaler[covid])
print_clustering(covid_data_scaler[covid], k_2.labels_)

In [None]:
all_countries_on_map = gdf.country_code.unique()

In [None]:
countries_on_date = merged[merged['dateRep'] == '01/01/2020']['country_code'].tolist()

In [None]:
countries_with_no_data = list(set(all_countries_on_map) - set(countries_on_date))

In [None]:
all_countries_on_map = gdf.country_code.unique()

In [None]:
import sys
!conda install --yes --prefix {sys.prefix} imageio

In [None]:
from datetime import datetime
all_dates = covid_data['dateRep'].unique().tolist()
dates = [datetime.strptime(ts, "%d/%m/%Y") for ts in all_dates]
dates.sort()
dates = [datetime.strftime(ts, "%d/%m/%Y") for ts in dates]

In [None]:
import imageio
images = []
for date in dates:
    # Tworzenie listy krajow dla ktorych brakuje danych dotyczacych covid w danym dniu.
    countries_on_date = merged[merged['dateRep'] == date]['country_code'].tolist()
    countries_with_no_data = list(set(all_countries_on_map) - set(countries_on_date))
    
    # Tworzenie podstawowego wykresu, na ktory beda nanoszone dane dotyczace liczby zgonow.
    fig, ax = plt.subplots(figsize=(20,8))
    merged[merged['dateRep'] == date].plot(column='deaths',  legend=False, ax=ax, cmap="RdYlGn_r")
    
    # Tworzenie dodatkowego obiektu w legendzie, ktory bedzie reprezentowal brak danych.
    no_data_label = mpatches.Patch(color='grey', label='No data')
    plt.legend(handles=[no_data_label], loc='lower center')
    
    plt.title("Daily deaths: " + date)
    plt.axis('off')
    
    # Nanoszenie krajow dla ktorych brakuje danych dotyczacych covid w danym dniu w kolorze szarym.
    for i in countries_with_no_data:
        gdf[gdf['country_code'] == i].plot(ax=ax, color="grey")
        
    # Eksport figur do plikow .png oraz .gif
    filename = (date.replace("/", "_") + "_123.png")
    fig.savefig(filename, dpi=300)
    images.append(imageio.imread(filename))
imageio.mimsave('timelapse_deaths_daily.gif', images)

In [None]:
# import imageio
# images = []
for date in list_of_dates:
    countries_on_date = merged[merged['dateRep'] == date]['country_code'].tolist()
    countries_with_no_data = list(set(all_countries_on_map) - set(countries_on_date))
    fig, ax = plt.subplots(figsize=(20,8))
    merged['deaths_per_pop'] = merged['deaths']/merged['popData2018']
    merged[merged['dateRep'] == date].plot(column='deaths_per_pop',  legend=True, ax=ax, cmap="RdYlGn_r")
    no_data_label = mpatches.Patch(color='grey', label='No data')
    plt.legend(handles=[no_data_label], loc='lower center')
    plt.axis('off')
    for i in countries_with_no_data:
        gdf[gdf['country_code'] == i].plot(ax=ax, color="grey")
        
        
        
#     filename = (date.replace("/", "_") + "_123.png")
#     fig.savefig(filename, dpi=300)
#     images.append(imageio.imread(filename))
# imageio.mimsave('123.gif', images)

In [None]:
import imageio
images = []
merged['deaths_per_pop'] = merged['deaths']/merged['popData2018']
for date in dates:
    # Tworzenie listy krajow dla ktorych brakuje danych dotyczacych covid w danym dniu.
    countries_on_date = merged[merged['dateRep'] == date]['country_code'].tolist()
    countries_with_no_data = list(set(all_countries_on_map) - set(countries_on_date))
    
    # Tworzenie podstawowego wykresu, na ktory beda nanoszone dane dotyczace liczby zgonow.
    fig, ax = plt.subplots(figsize=(20,8))
    merged[merged['dateRep'] == date].plot(column='deaths_per_pop', legend=False, ax=ax, cmap="RdYlGn_r")
    
    # Tworzenie dodatkowego obiektu w legendzie, ktory bedzie reprezentowal brak danych.
    no_data_label = mpatches.Patch(color='grey', label='No data')
    plt.legend(handles=[no_data_label], loc='lower center')
    
    plt.title("Daily deaths per country population: " + date)
    plt.axis('off')
    
    # Nanoszenie krajow dla ktorych brakuje danych dotyczacych covid w danym dniu w kolorze szarym.
    for i in countries_with_no_data:
        gdf[gdf['country_code'] == i].plot(ax=ax, color="grey")
        
    # Eksport figur do plikow .png oraz .gif
    filename = ("abc_" + date.replace("/", "_") + ".png")
    fig.savefig(filename, dpi=300)
    images.append(imageio.imread(filename))
imageio.mimsave('2_timelapse_deaths_per_pop_daily.gif', images)

In [None]:
import imageio
images = []
merged = merged.assign(cumulative_deaths=merged[::-1].groupby('country_code').deaths.cumsum()[::-1])
for date in dates:
    # Tworzenie listy krajow dla ktorych brakuje danych dotyczacych covid w danym dniu.
    countries_on_date = merged[merged['dateRep'] == date]['country_code'].tolist()
    countries_with_no_data = list(set(all_countries_on_map) - set(countries_on_date))
    
    # Tworzenie podstawowego wykresu, na ktory beda nanoszone dane dotyczace liczby zgonow.
    fig, ax = plt.subplots(figsize=(20,8))
    merged[merged['dateRep'] == date].plot(column='cumulative_deaths', legend=False, ax=ax, cmap="RdYlGn_r")
    
    # Tworzenie dodatkowego obiektu w legendzie, ktory bedzie reprezentowal brak danych.
    no_data_label = mpatches.Patch(color='grey', label='No data')
    plt.legend(handles=[no_data_label], loc='lower center')
    
    plt.title("Cumulative deaths for each country: " + date)
    plt.axis('off')
    
    # Nanoszenie krajow dla ktorych brakuje danych dotyczacych covid w danym dniu w kolorze szarym.
    for i in countries_with_no_data:
        gdf[gdf['country_code'] == i].plot(ax=ax, color="grey")
        
    # Eksport figur do plikow .png oraz .gif
    filename = ("abc_" + date.replace("/", "_") + ".png")
    fig.savefig(filename, dpi=300)
    images.append(imageio.imread(filename))
imageio.mimsave('timelapse_cumulative_deaths.gif', images)

In [None]:
import imageio
images = []
merged = merged.assign(cumulative_cases=merged[::-1].groupby('country_code').cases.cumsum()[::-1])
merged['deaths_per_cases'] = merged['cumulative_deaths']/merged['cumulative_cases']
for date in dates:
    # Tworzenie listy krajow dla ktorych brakuje danych dotyczacych covid w danym dniu.
    countries_on_date = merged[merged['dateRep'] == date]['country_code'].tolist()
    countries_with_no_data = list(set(all_countries_on_map) - set(countries_on_date))
    
    # Tworzenie podstawowego wykresu, na ktory beda nanoszone dane dotyczace liczby zgonow.
    fig, ax = plt.subplots(figsize=(20,8))
    merged[merged['dateRep'] == date].plot(column='deaths_per_cases', legend=False, ax=ax, cmap="RdYlGn_r")
    
    # Tworzenie dodatkowego obiektu w legendzie, ktory bedzie reprezentowal brak danych.
    no_data_label = mpatches.Patch(color='grey', label='No data')
    plt.legend(handles=[no_data_label], loc='lower center')
    
    plt.title("Cumulative deaths per open cases: " + date)
    plt.axis('off')
    
    # Nanoszenie krajow dla ktorych brakuje danych dotyczacych covid w danym dniu w kolorze szarym.
#     for i in countries_with_no_data:
#         gdf[gdf['country_code'] == i].plot(ax=ax, color="grey")
        
    # Eksport figur do plikow .png oraz .gif
    filename = ("abc_" + date.replace("/", "_") + ".png")
    fig.savefig(filename, dpi=300)
    images.append(imageio.imread(filename))
imageio.mimsave('timelapse_deaths_per_cases.gif', images)