# Import

In [152]:
import numpy as np
import pandas as pd

# visualization
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff

# Data preporation
Data exported from https://datalens.yandex/7o7is1q6ikh23?tab=0Ze

RS.csv - Russian data expor

WS.csv - World data export

ID.csv - Isolation index by day data export

cities.csv - cashed translation of city names

regions.csv - cashed translation of region names

countries.csv - cashed translation of country names

In [193]:
# Load data of previous day
file_name_rus = 'RS.csv'
file_name_wor = 'WS.csv'
file_name_id = 'ID.csv'
rus_df = pd.read_csv(file_name_rus)
world_df = pd.read_csv(file_name_wor)
iso_df = pd.read_csv(file_name_id)

In [194]:
# Rename columns
rename_dict = {'Страна' : 'Country_ru',
               'Регион' : 'Region_ru',
               'Город' : 'City_ru',
               'Дата' : 'Date',
               'Заражений' : 'Confirmed',
               'Выздоровлений' : 'Recovered',
               'Смертей' : 'Deaths',
               'Смертей за день' : 'Day_Deaths',
               'Заражений за день' : 'Day_Confirmed',
               'Выздоровлений за день' : 'Day_Recovered',
               'Индекс' : 'Index'}

rus_df = rus_df.rename(columns=rename_dict)
world_df = world_df.rename(columns=rename_dict)
iso_df = iso_df.rename(columns=rename_dict)

rus_df.head()

Unnamed: 0,Date,Region_ru,Confirmed,Recovered,Deaths,Day_Deaths,Day_Confirmed,Day_Recovered
0,02.03.2020,Московская обл.,1,0,0,0,1,0
1,03.03.2020,Московская обл.,1,0,0,0,0,0
2,04.03.2020,Московская обл.,1,0,0,0,0,0
3,05.03.2020,Московская обл.,1,0,0,0,0,0
4,06.03.2020,Москва,5,0,0,0,5,0


In [195]:
rus_df['Date'] = pd.to_datetime(rus_df['Date'], dayfirst=True)
world_df['Date'] = pd.to_datetime(world_df['Date'], dayfirst=True)
iso_df['Date'] = pd.to_datetime(iso_df['Date'], dayfirst=True)

print('Rus data actual date:', rus_df.iloc[-1]['Date'].strftime('%d, %b %Y'))
print('World data actual date:', world_df.iloc[-1]['Date'].strftime('%d, %b %Y'))
print('Isolation data actual date:', iso_df.iloc[-1]['Date'].strftime('%d, %b %Y'))

Rus data actual date: 30, May 2020
World data actual date: 29, May 2020
Isolation data actual date: 29, May 2020


## Russian data

In [196]:
rus_df.head()

Unnamed: 0,Date,Region_ru,Confirmed,Recovered,Deaths,Day_Deaths,Day_Confirmed,Day_Recovered
0,2020-03-02,Московская обл.,1,0,0,0,1,0
1,2020-03-03,Московская обл.,1,0,0,0,0,0
2,2020-03-04,Московская обл.,1,0,0,0,0,0
3,2020-03-05,Московская обл.,1,0,0,0,0,0
4,2020-03-06,Москва,5,0,0,0,5,0


In [197]:
# обл. -> область
rus_df.Region_ru = rus_df.Region_ru.apply(lambda x : str(x).replace('обл.', 'область'))

# Rename regions
rename_dict = {
    'Адыгея' : 'Республика Адыгея',
    'Алтай'  : 'Республика Алтай',
    'Башкортостан' : 'Республика Башкортостан',
    'Бурятия' : 'Республика Бурятия',
    'Дагестан' : 'Республика Дагестан',
    'Еврейская АО' : 'Еврейская автономная область', 
    'Ингушетия' : 'Республика Ингушетия',
    'Кабардино-Балкария' : 'Кабардино-Балкарская Республика', 
    'Калмыкия' : 'Республика Калмыкия',
    'Карачаево-Черкессия' : 'Карачаево-Черкесская Республика', 
    'Карелия' : 'Республика Карелия', 
    'Коми' : 'Республика Коми', 
    'Крым' : 'Республика Крым', 
    'Марий Эл' : 'Республика Марий Эл',
    'Мордовия' : 'Республика Мордовия', 
    # 'Ненецкий АО' : 'Ненецкий автономный округ', 
    'Саха (Якутия)' : 'Республика Саха (Якутия)', 
    # 'Северная Осетия' : 'Республика Северная Осетия - Алания',
    'Татарстан' : 'Республика Татарстан',
    'Тыва' : 'Республика Тыва', 
    'Удмуртия' : 'Удмуртская Республика', 
    'ХМАО – Югра' : 'Ханты-Мансийский АО',
    'Хакасия' : 'Республика Хакасия', 
    'Чечня' : 'Чеченская Республика',
    'Чувашия' : 'Республика Чувашия',
    'Чукотский АО' : 'Чукотский автономный округ'
}

rus_df.Region_ru = rus_df.Region_ru.replace(rename_dict)

In [198]:
# Use google translation
from googletrans import Translator
translator = Translator()

def translate(rus):
    eng = translator.translate(rus, src='ru', dest='en')
    return eng.text

# Translate names and save to file (translation base)
# region_df = pd.DataFrame({'Region_ru' : np.unique(rus_df.Region_ru.values)})
# region_df['Region'] = region_df.Region_ru.apply(translate)
# region_df.to_csv('regions.csv', index=False)
# region_df.head()

In [199]:
val_cols = ['Confirmed', 'Recovered', 'Deaths', 'Day_Deaths', 'Day_Confirmed', 'Day_Recovered']

# Add translated column
translate_df = pd.read_csv('regions.csv')
rus_df = rus_df.merge(translate_df, left_on='Region_ru', right_on='Region_ru', how='left')
rus_df = rus_df[['Date', 'Region', 'Region_ru'] + val_cols]

# Check names integrity
rus_df = rus_df.fillna('')
missed = rus_df[rus_df.Region == '']
assert len(missed) == 0, f'Regenerate translation base. Missed translation for: \n{np.unique(missed.Region_ru.values)}'

rus_df.head()

Unnamed: 0,Date,Region,Region_ru,Confirmed,Recovered,Deaths,Day_Deaths,Day_Confirmed,Day_Recovered
0,2020-03-02,Moscow region,Московская область,1,0,0,0,1,0
1,2020-03-03,Moscow region,Московская область,1,0,0,0,0,0
2,2020-03-04,Moscow region,Московская область,1,0,0,0,0,0
3,2020-03-05,Moscow region,Московская область,1,0,0,0,0,0
4,2020-03-06,Moscow,Москва,5,0,0,0,5,0


## World data

In [200]:
world_df.head()

Unnamed: 0,Country_ru,Date,Confirmed,Recovered,Deaths,Day_Confirmed,Day_Recovered,Day_Deaths
0,Австралия,2020-01-22,0,0,0,0,0,0
1,Австралия,2020-01-23,0,0,0,0,0,0
2,Австралия,2020-01-24,0,0,0,0,0,0
3,Австралия,2020-01-25,0,0,0,0,0,0
4,Австралия,2020-01-26,4,0,0,4,0,0


In [201]:
# Country names translation
# country_df = pd.DataFrame({'Country_ru' : np.unique(world_df.Country_ru.values)})
# country_df['Country'] = country_df.Country_ru.apply(translate)
# country_df.to_csv('countries.csv', index=False)
# country_df.head()

In [202]:
# Add translated column
translate_df = pd.read_csv('countries.csv')
world_df = world_df.merge(translate_df, left_on='Country_ru', right_on='Country_ru', how='left')
world_df = world_df[['Country', 'Country_ru', 'Date'] + val_cols]

# Check names integrity
world_df = world_df.fillna('')
missed = world_df[world_df.Country == '']
assert len(missed) == 0, f'Regenerate translation base. Missed translation for: \n{np.unique(missed.Country_ru.values)}'

world_df.head()

Unnamed: 0,Country,Country_ru,Date,Confirmed,Recovered,Deaths,Day_Deaths,Day_Confirmed,Day_Recovered
0,Australia,Австралия,2020-01-22,0,0,0,0,0,0
1,Australia,Австралия,2020-01-23,0,0,0,0,0,0
2,Australia,Австралия,2020-01-24,0,0,0,0,0,0
3,Australia,Австралия,2020-01-25,0,0,0,0,0,0
4,Australia,Австралия,2020-01-26,4,0,0,0,4,0


## Isolation data

In [203]:
iso_df.head()

Unnamed: 0,Country_ru,City_ru,Date,Index
0,Азербайджан,Баку,2020-02-23,2.8
1,Азербайджан,Баку,2020-02-24,1.7
2,Азербайджан,Баку,2020-02-25,1.8
3,Азербайджан,Баку,2020-02-26,1.8
4,Азербайджан,Баку,2020-02-27,1.7


In [204]:
# Renames
iso_df.Country_ru = iso_df.Country_ru.replace({'Молдова' : 'Молдавия'})
iso_df.City_ru = iso_df.City_ru.replace({'Московский' : 'Город Московский'})

In [205]:
# City names translation
# city_df = pd.DataFrame({'City_ru' : np.unique(iso_df.City_ru.values)})
# city_df['City'] = city_df.City_ru.apply(translate)
# city_df.to_csv('cities.csv', index=False)
# city_df.head()

In [206]:
# Add translated column
translate_df = pd.read_csv('countries.csv')
iso_df = iso_df.merge(translate_df, left_on='Country_ru', right_on='Country_ru', how='left')
translate_df = pd.read_csv('cities.csv')
iso_df = iso_df.merge(translate_df, left_on='City_ru', right_on='City_ru', how='left')
iso_df = iso_df[['Country', 'City', 'Country_ru', 'City_ru', 'Date', 'Index']]

# Check names integrity
iso_df = iso_df.fillna('')
missed = iso_df[(iso_df.Country == '') | (iso_df.City == '')]
print(missed)
assert len(missed) == 0, 'Regenerate translation base. Missed translation for some values.'

iso_df.head()

Empty DataFrame
Columns: [Country, City, Country_ru, City_ru, Date, Index]
Index: []


Unnamed: 0,Country,City,Country_ru,City_ru,Date,Index
0,Azerbaijan,Baku,Азербайджан,Баку,2020-02-23,2.8
1,Azerbaijan,Baku,Азербайджан,Баку,2020-02-24,1.7
2,Azerbaijan,Baku,Азербайджан,Баку,2020-02-25,1.8
3,Azerbaijan,Baku,Азербайджан,Баку,2020-02-26,1.8
4,Azerbaijan,Baku,Азербайджан,Баку,2020-02-27,1.7


# Visualization

In [207]:
temp = rus_df.groupby(['Date', 'Region'])['Confirmed'].max().reset_index()
temp = temp.sort_values(['Confirmed', 'Date'], ascending=False)

px.line(temp, x="Date", y="Confirmed", color='Region', title='Cases Spread', height=600)

In [208]:
temp = rus_df.groupby(['Date', 'Region'])[['Day_Confirmed', 'Confirmed']].sum().reset_index()
temp = temp.sort_values(['Confirmed', 'Date'], ascending=False)

px.line(temp, x="Date", y="Day_Confirmed", color='Region', title='Daily confirmed cases spread', height=600)

In [209]:
temp = world_df.groupby(['Date', 'Country'])['Confirmed'].max().reset_index()
temp = temp.sort_values(['Confirmed', 'Date', 'Country'], ascending=False)

px.line(temp, x="Date", y="Confirmed", color='Country', title='World cases Spread', height=600)

In [210]:
temp = world_df.groupby(['Date', 'Country'])[['Day_Confirmed', 'Confirmed']].sum().reset_index()
temp = temp.sort_values(['Confirmed', 'Date'], ascending=False)

px.line(temp, x="Date", y="Day_Confirmed", color='Country', title='World daily confirmed cases spread', height=600)

In [211]:
temp = iso_df[iso_df.City == 'Moscow'][['Date', 'Index']].sort_values('Date')
temp = temp.melt(id_vars="Date", value_vars=['Index'])

fig = px.bar(temp, x="Date", y="value", color='value', title='Moscow Yandex self-isolation index')
fig.update_layout(barmode='group', yaxis_title='Index')

fig.show()

In [212]:
rus_df.to_csv(file_name_rus, index=False)
world_df.to_csv(file_name_wor, index=False)
iso_df.to_csv(file_name_id, index=False)