In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
%matplotlib inline

In [None]:
covid_m = pd.read_csv("data/covid/covid_data.csv")
vax_m = pd.read_csv("data/covid/country_vaccinations.csv")

covid = covid_m.copy()
vax = vax_m.copy()

In [None]:
covid = covid.groupby(
    ["date", "country"],
    as_index=False
)[["confirmed", "deaths", "recovered"]].sum()

covid["date"] = pd.to_datetime(covid["date"])
covid["active"] = covid["confirmed"] - covid["deaths"] - covid["recovered"]

covid = covid.sort_values(by=["country", "date"])
covid["daily_confirmed"] = covid.groupby(by="country")["confirmed"].diff()
covid["daily_deaths"] = covid.groupby(by="country")["deaths"].diff()
covid["daily_recovered"] = covid.groupby(by="country")["recovered"].diff()

In [None]:
vax["date"] = pd.to_datetime(vax["date"])
vax = vax[
    ['country', 'date', 'total_vaccinations', 
     'people_vaccinated', 'people_vaccinated_per_hundred',
     'people_fully_vaccinated', 'people_fully_vaccinated_per_hundred',
     'daily_vaccinations', 'vaccines']
]

# print(vax["date"].max())
# print(vax["date"].min())

In [None]:
covid_df = covid.merge(vax, on=["date", "country"], how="left")
covid_df["death_rate"] = covid_df["deaths"] / covid_df["confirmed"] * 100
covid_df["recover_rate"] = covid_df["recovered"] / covid_df["confirmed"] * 100

In [None]:
covid_df[covid_df["country"] == "Russia"]["recover_rate"].mean()

---

# PANDAS PLOT()

In [None]:
grouped_cases = covid_df.groupby('date')['daily_confirmed'].sum()
grouped_cases.plot(
    kind='line',
    figsize=(12, 4),
    title='Ежедневная заболеваемость по всем странам',
    grid = True,
    lw=3
);

In [None]:
grouped_cases.plot(
    kind='hist',
    figsize=(10, 6),
    title='Распределение ежедневной заболеваемости',
    grid = True,
    color = 'black',
    bins=10
);

In [None]:
grouped_country = covid_df.groupby(['country'])['confirmed'].last()
grouped_country = grouped_country.nlargest(10)
grouped_country.plot(
    kind='bar',
    grid=True,
    figsize=(12, 4),
    colormap='plasma'
);

In [None]:
grouped_country = covid_df.groupby(['country'])[['confirmed', 'deaths']].last()
grouped_country = grouped_country.nlargest(10, columns=['confirmed'])
grouped_country.plot( 
    kind='bar', 
    grid=True, 
    figsize=(12, 4), 
);

In [None]:
grouped = covid_df.groupby(by="country")["total_vaccinations"].last()
grouped = grouped.nsmallest(5)
grouped.plot(
    kind="bar",
    grid=True,
    figsize=(12, 4)
)

# MATPLOTLIB 

In [None]:
fig = plt.figure(figsize=(12, 4))
axes1 = fig.add_axes([0, 0, 0.5, 1])
axes2 = fig.add_axes([0.5, 0, 0.5, 1])

In [None]:
us_data = covid_df[covid_df['country'] == 'United States']

fig = plt.figure(figsize=(8, 4))
axes = fig.add_axes([0, 0, 1, 1])
axes.scatter(
    x=us_data['people_fully_vaccinated'], 
    y=us_data['daily_confirmed'], 
    s=100,
    marker='o',
    c = 'blue'
);

In [None]:
vaccine_combinations = covid_df['vaccines'].value_counts()[:10]
fig = plt.figure(figsize=(5, 5))
axes = fig.add_axes([0, 0, 1, 1])
axes.pie(
    vaccine_combinations,
    labels=vaccine_combinations.index,
    autopct='%.1f%%',
    explode = [0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
);

In [None]:
def get_vax_list(arg):
    return arg.replace(" ", "").split(",")

counter = Counter()

vax_no_nan = covid_df.dropna(subset=["vaccines"])
vax_last = vax_no_nan.groupby(by="country")["vaccines"].last()
vax_last.apply(lambda cell: counter.update(get_vax_list(cell)))

vaccines = pd.Series(dict(counter)).sort_values(ascending=False)
vaccines['other'] = vaccines[7:].sum()
vaccines = vaccines.sort_values(ascending=False)[:8]

fig = plt.figure(figsize=(5, 5))
axes = fig.add_axes([0, 0, 1, 1])
axes.pie(
    vaccines,
    labels=vaccines.index,
    autopct='%.1f%%',
);


In [None]:
china_data = covid_df[covid_df['country'] == 'China']
china_grouped = china_data.groupby(['date'])[['confirmed', 'active', 'deaths', 'recovered']].sum()

#визуализация графиков
fig = plt.figure(figsize=(10, 4))
axes = fig.add_axes([0, 0, 1, 1])
axes.plot(china_grouped['confirmed'], label='Общее число зафиксированных случаев', lw=3)
axes.plot(china_grouped['deaths'], label='Общее число смертей', lw=3)
axes.plot(china_grouped['recovered'], label='Общее число выздоровевших пациентов', lw=3)
axes.plot(china_grouped['active'], label='Общее число активных случаев', lw=3, linestyle='dashed')

#установка параметров отображения
axes.set_title('Статистика Covid-19 в Китае', fontsize=16)
axes.set_xlabel('Даты')
axes.set_ylabel('Число случаев')
axes.set_yticks(range(0, 100000, 10000))
axes.xaxis.set_tick_params(rotation=30)
axes.grid()
axes.legend();

In [None]:
vacc_country = covid_df.groupby('country')['people_fully_vaccinated'].last().nlargest(5)
vacc_country_per_hundred = covid_df.groupby('country')['people_fully_vaccinated_per_hundred'].last().nlargest(5)

#визуализация главного графика
fig = plt.figure(figsize=(13, 4))
main_axes = fig.add_axes([0, 0, 1, 1])
main_axes.bar(x = vacc_country.index, height = vacc_country);
main_axes.set_ylabel('Число вакцинированных (2 компонент)')
main_axes.set_title('Топ 5 стран по числу полностью привитых людей')

#визуализация вспомогательного графика
insert_axes = fig.add_axes([0.6, 0.6, 0.38, 0.38])
insert_axes.bar(x = vacc_country_per_hundred.index, height = vacc_country_per_hundred, width=0.5);
insert_axes.set_ylabel('На 100 человек')
insert_axes.xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(10, 5))

In [None]:
data = covid_df[covid_df["country"] == "Russia"].groupby(by="date")[["daily_vaccinations", "daily_confirmed"]].sum()

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 4))
axes[0].bar(
    x=data.index,
    height=data["daily_vaccinations"],
    label="Число вакцинированных"
)
axes[0].set_title("Ежедневная вакцинация в России")
axes[0].xaxis.set_tick_params(rotation=45)

axes[1].plot(
    data["daily_confirmed"],
    label="Число заболевших",
    color="tomato",
    lw=2
)
axes[1].set_title("Ежедневная заболеваемость в России")
axes[1].xaxis.set_tick_params(rotation=45)

axes[2].hist(
    x=data["daily_confirmed"],
    label=["Число заболевших"],
    color="lime",
    bins=20
)

axes[2].set_title("Гистограмма заболеваемости в России")
axes[2].xaxis.set_tick_params(rotation=30);

# SEABORN

In [None]:
import seaborn as sns
print(sns.__version__)

In [None]:
countries = ['Russia', 'Australia', 'Germany', 'Canada', 'United Kingdom']
croped_covid_df = covid_df[covid_df['country'].isin(countries)]

populations = pd.DataFrame([
    ['Canada', 37664517],
    ['Germany', 83721496],
    ['Russia', 145975300],
    ['Australia', 25726900],
    ['United Kingdom', 67802690]
    ],
    columns=['country', 'population']
)
croped_covid_df = croped_covid_df.merge(populations, on=['country'])
croped_covid_df['daily_confirmed_per_hundred'] = croped_covid_df['daily_confirmed'] / croped_covid_df['population'] * 100
croped_covid_df.head()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 8))
sns.histplot(
    data=croped_covid_df,
    x='daily_confirmed_per_hundred',
    bins=25,
    kde=True,
    ax=axes[0]
);
axes[0].set_title('Гистограмма ежедневной заболеваемости на 100 человек', fontsize=16)
sns.histplot(
    data=croped_covid_df,
    x='daily_confirmed_per_hundred',
    y='country',
    bins=25,
    color='red',
    ax=axes[1]
);

In [None]:
fig = plt.figure(figsize=(10, 7))
croped_covid_df['quarter'] = croped_covid_df['date'].dt.quarter
barplot = sns.barplot(
    data=croped_covid_df,
    x='country',
    y='daily_confirmed_per_hundred',
    hue='quarter',
)
barplot.set_title('Средний процент болеющего населения по кварталам');

In [None]:
jointplot = sns.jointplot(
    data=croped_covid_df, 
    x='people_fully_vaccinated_per_hundred', 
    y='daily_confirmed_per_hundred',
    hue='country',
    xlim = (0, 40),
    ylim = (0, 0.1),
    height=8,
)

In [None]:
croped_covid_df['confirmed_per_hundred'] = croped_covid_df['confirmed'] / croped_covid_df['population'] * 100

In [None]:
pivot = croped_covid_df.pivot_table(
    values='confirmed_per_hundred',
    columns='date',
    index='country',
)
pivot.columns = pivot.columns.astype('string')
heatmap = sns.heatmap(data=pivot, cmap='YlGnBu')
heatmap.set_title('Тепловая карта вакцинации', fontsize=16);

In [None]:
boxplot = sns.boxplot(
    data=croped_covid_df,
    x='recover_rate',
    y='country',
    orient='h',
    width=0.9
)
boxplot.grid()

# Plotly

In [None]:
import plotly
import plotly.express as px
print(plotly.__version__)

In [None]:
line_data = covid_df.groupby('date', as_index=False).sum()
fig = px.line(
    data_frame=line_data, #DataFrame
    x='date', #ось абсцисс
    y=['confirmed', 'recovered', 'deaths', 'active'], #ось ординат
    height=500, #высота
    width=1000, #ширина
    title='Confirmed, Recovered, Deaths, Active cases over Time' #заголовок
)
# fig.show()

In [None]:
#считаем среднее ежедневно фиксируемое количество выздоровевших по странам
treemap_data = covid_df.groupby(
    by='country',
    as_index=False
)[['daily_recovered']].mean()

#строим график
fig = px.treemap(
    data_frame=treemap_data, #DataFrame
    path=['country'], #категориальный признак, для которого строится график
    values='daily_recovered', #параметр, который сравнивается
    height=500, #высота
    width=1000, #ширина
    title='Daily Recovered Cases by Country' #заголовок
)

#отображаем график
fig.show()

In [None]:
plot = covid_df.groupby(
    by='date',
    as_index=False
)[['daily_vaccinations']].sum()

fig = px.line(
    data_frame=plot,
    x='date',
    y='daily_vaccinations',
    height=500, #высота
    width=1000, #ширина
)

#fig.show()

In [None]:
plot = covid_df.sort_values(by='date')
plot['date'] = plot['date'].astype('string')

#строим график
fig = px.choropleth(
    data_frame=plot, #DataFrame
    locations="country", #столбец с локациями
    locationmode = "country names", #режим сопоставления локаций с базой Plotly
    color="total_vaccinations", #от чего зависит цвет
    animation_frame="date", #анимационный бегунок
    range_color=[0, 30e6], #диапазон цвета
    title='Global Spread of COVID-19', #заголовок
    width=800, #ширина
    height=500, #высота
    color_continuous_scale='Reds' #палитра цветов
)

#отображаем график
#fig.show(renderer='notebook')