In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
plt.style.use('default')
print(sns.__version__)

0.11.2


In [2]:
covid_data = pd.read_csv('data/covid_data.csv')

vaccinations_data = pd.read_csv('data/country_vaccinations.csv')
vaccinations_data = vaccinations_data[
    ['country', 'date', 'total_vaccinations', 
     'people_vaccinated', 'people_vaccinated_per_hundred',
     'people_fully_vaccinated', 'people_fully_vaccinated_per_hundred',
     'daily_vaccinations', 'vaccines']
]


covid_data = covid_data.groupby(
    ['date', 'country'],
    as_index = False
)[['confirmed', 'deaths', 'recovered']].sum()

covid_data['date'] = pd.to_datetime(covid_data['date'])

covid_data['active'] = covid_data['confirmed'] - covid_data['deaths'] - covid_data['recovered']

covid_data = covid_data.sort_values(by=['country', 'date'])
covid_data['daily_confirmed'] = covid_data.groupby('country')['confirmed'].diff()
covid_data['daily_deaths'] = covid_data.groupby('country')['deaths'].diff()
covid_data['daily_recovered'] = covid_data.groupby('country')['recovered'].diff()

vaccinations_data['date'] = pd.to_datetime(vaccinations_data['date'])

covid_df = covid_data.merge(
    vaccinations_data,
    on = ['date', 'country'],
    how = 'left'
)

covid_df['death_rate'] = (covid_df['deaths'] / covid_df['confirmed']) * 100
covid_df['recover_rate'] = (covid_df['recovered'] / covid_df['confirmed']) * 100

In [None]:
countries = ['Russia', 'Australia', 'Germany', 'Canada', 'United Kingdom']
croped_covid_df = covid_df[covid_df['country'].isin(countries)]

populations = pd.DataFrame([
    ['Canada', 37664517],
    ['Germany', 83721496],
    ['Russia', 145975300],
    ['Australia', 25726900],
    ['United Kingdom', 67802690]
], columns=['country', 'population'])

croped_covid_df = croped_covid_df.merge(populations, on=['country'])
croped_covid_df['daily_confirmed_per_hundred'] = croped_covid_df['daily_confirmed'] / croped_covid_df['population']*100
croped_covid_df.head()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 8))
sns.histplot(
    data = croped_covid_df,
    x = 'daily_confirmed_per_hundred',
    bins = 25,
    kde = True,
    ax = axes[0]
);
axes[0].set_title('Гистограмма ежедневной заболеваемости на 100 человек', fontsize = 16)
sns.histplot(
    data = croped_covid_df,
    x = 'daily_confirmed_per_hundred',
    y = 'country',
    bins = 25,
    color = 'red',
    ax = axes[1]
);

In [None]:
fig = plt.figure(figsize =(10, 6))
boxplot = sns.boxplot(
    data = croped_covid_df,
    y = 'country',
    x = 'death_rate',
    orient ='h',
    width = 0.5
)
boxplot.set_title('Распределение летальности по странам');
boxplot.set_xlabel('Летальность');
boxplot.set_ylabel('Страна');
boxplot.grid()

In [None]:
fig = plt.figure(figsize=(10, 7))
croped_covid_df['quarter'] = croped_covid_df['date'].dt.quarter
barplot = sns.barplot(
    data = croped_covid_df,
    x = 'country',
    y = 'daily_confirmed_per_hundred',
    hue = 'quarter'
)
barplot.set_title('Средний процент болеющего населения по кварталам');

In [None]:
joinplot = sns.jointplot(
    data = croped_covid_df,
    x = 'people_fully_vaccinated_per_hundred',
    y = 'daily_confirmed_per_hundred',
    hue = 'country',
    xlim = (0, 40),
    ylim = (0, 0.1),
    height = 8
)

In [None]:
pivot = croped_covid_df.pivot_table(
    values = 'people_vaccinated_per_hundred',
    columns = 'date',
    index = 'country'
)
pivot.columns = pivot.columns.astype('string')
display(pivot)

In [None]:
heatmap = sns.heatmap(data = pivot, cmap = 'YlGnBu')
heatmap.set_title('Тепловая карта вакцинации', fontsize =16);

In [None]:
croped_covid_df['confirmed_per_hundred'] = (croped_covid_df['confirmed'] / croped_covid_df['population']) * 100
pivot1 = croped_covid_df.pivot_table(
    values = 'confirmed_per_hundred',
    columns = 'date',
    index = 'country'
)
pivot1.columns = pivot1.columns.astype('string')
display(pivot1)


In [None]:
heatmap = sns.heatmap(data = pivot1, cmap = 'YlGnBu')
heatmap.set_title('Тепловая карта вакцинации', fontsize =16);

In [None]:
fig = plt.figure(figsize =(10, 6))
boxplot = sns.boxplot(
    data = croped_covid_df,
    y = 'country',
    x = 'recover_rate',
    orient ='h',
    width = 0.5
)
boxplot.set_title('Распределение выздоровлений по странам');
boxplot.set_xlabel('Выздоровление');
boxplot.set_ylabel('Страна');
boxplot.grid()

In [None]:
treemap_data = covid_df.groupby(
    by='country',
    as_index = False,
)[['daily_recovered']].mean();

fig = px.treemap(
    data_frame = treemap_data,
    path = ['country'],
    values = 'daily_recovered',
    height = 500,
    width = 1000,
    title = 'Daily recovered cases by country'
);

fig.show()

In [None]:
#преобразуем даты в строковый формат
choropleth_data = covid_df.sort_values(by='date')
choropleth_data['date'] = choropleth_data['date'].astype('string')

#строим график
fig = px.choropleth(
    data_frame=choropleth_data, #DataFrame
    locations="country", #столбец с локациями
    locationmode = "country names", #режим сопоставления локаций с базой Plotly
    color="confirmed", #от чего зависит цвет
    animation_frame="date", #анимационный бегунок
    range_color=[0, 30e6], #диапазон цвета
    title='Global Spread of COVID-19', #заголовок
    width=800, #ширина
    height=500, #высота
    color_continuous_scale='Reds' #палитра цветов
)

#отображаем график
fig.show()

In [None]:
countries = ['United States', 'Russia', 'United Kingdom', 'Brazil', 'France']
scatter_data = covid_df[covid_df['country'].isin(countries)]

fig = px.scatter_3d(
    data_frame=scatter_data,
    x = 'daily_confirmed',
    y = 'daily_deaths',
    z = 'daily_vaccinations',
    color = 'country',
    log_x=True,
    log_y=True,
    width=1000,
    height=700
)
fig.show()
fig.write_html("ploty/scatter_3d.html")

In [None]:
line_data = covid_df.groupby('date', as_index=False)['daily_vaccinations'].sum()
fig = px.line(data_frame=line_data, x = 'date', y='daily_vaccinations')
fig.show()

In [None]:
#преобразуем даты в строковый формат
choropleth_data = covid_df.sort_values(by='date')
choropleth_data['date'] = choropleth_data['date'].astype('string')

#строим график
fig = px.choropleth(
    data_frame=choropleth_data, #DataFrame
    locations="country", #столбец с локациями
    locationmode = "country names", #режим сопоставления локаций с базой Plotly
    color="total_vaccinations", #от чего зависит цвет
    animation_frame="date", #анимационный бегунок
    range_color=[0, 30e5], #диапазон цвета
    title='Global Spread of COVID-19', #заголовок
    width=800, #ширина
    height=500, #высота
    color_continuous_scale='Reds' #палитра цветов
)

#отображаем график
fig.show(renderer='notebook')