In [0]:
# My repo with data
!git clone https://github.com/Tixonmavrin/covid-19-solution

In [0]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from tqdm import tqdm
import urllib
import json
from datetime import datetime

In [6]:
countries = pd.read_csv('covid-19-solution/data/features/countries-2.csv')
URL = "https://en.wikipedia.org/wiki/National_responses_to_the_2019%E2%80%9320_coronavirus_pandemic"
page = requests.get(URL)
soup = BeautifulSoup(page.text)
tables = soup.findAll("table", {"class": "wikitable"})
data = []
def get_text(col):
    return re.sub(r"\[[^]]+\]", "", col.text.strip())
for row in tables[0].findAll("tr"):
    columns = row.findAll("td")
    if len(columns) < 4:
        continue
    if len(columns) == 5:
        country = get_text(columns[0])
        place = get_text(columns[1])
        columns = columns[2:]
    elif "colspan" in columns[0].attrs:
        country = get_text(columns[0])
        place = ""
        columns = columns[1:]
    else:
        place = get_text(columns[0])
        columns = columns[1:]
    start = get_text(columns[0])
    end = get_text(columns[1])
    level = get_text(columns[2])
    data.append([country, place, start, end, level])
df = pd.DataFrame(data, columns=["Country", "Place", "Start date", "End date", "Level"])
df.to_csv("covid-19-solution/data/mobility/quarantine.csv", index=False)
df.sample()

Unnamed: 0,Country,Place,Start date,End date,Level
77,Russia,Moscow,2020-03-30,2020-05-12,Metropolitan area


In [7]:
def read_csv_from_url(url):
    try:
        return pd.read_csv(url)
    except urllib.error.HTTPError as e:
        if e.status == 404:
            return None
        raise e
PLACES = ['parks', 'residential', 'retail-and-recreation', 'transit-stations', 'workplaces', 'grocery-and-pharmacy']
countries = pd.read_csv('covid-19-solution/data/features/countries-2.csv')
def google_mobility_by_country(code):
    data = []
    for place in PLACES:
        df = read_csv_from_url(f'https://pastelsky.github.io/covid-19-mobility-tracker/output/{code}/mobility-{place}.csv')
        if df is not None:
            df = df.rename(columns={'value': place})
            data.append(df)
    if data:
        return pd.concat(data).groupby('date').first()
    else:
        return None
data = []
for i, row in tqdm(countries[countries['iso_alpha2'] != ''][['country', 'iso_alpha2']].iterrows()):
    mobility = google_mobility_by_country(row['iso_alpha2'])
    if mobility is not None:
        mobility['country'] = row['country']
        data.append(mobility)
df = pd.concat(data).groupby(['country', 'date']).first().reset_index()
df.to_csv('covid-19-solution/data/mobility/mobility-google.csv', index=False)
df.sample()

254it [01:48,  2.33it/s]


Unnamed: 0,country,date,parks,residential,retail-and-recreation,transit-stations,workplaces,grocery-and-pharmacy
3632,LAO,2020-04-04,-24.0,16.0,-59.0,-68.0,-26.0,-40.0


In [0]:
countries = pd.read_csv('covid-19-solution/data/features/countries-2.csv')
city_map = {row['region_center']: row['country'] for i, row in countries[countries['is_region'] == 1].iterrows()}
body = requests.get('https://yandex.ru/web-maps/covid19/isolation').content
data = json.loads(re.compile(r'class="config-view">(.+?)<').search(body.decode('utf-8'))[1])
def ts_to_date(ts):
    return datetime.utcfromtimestamp(ts + 3 * 60 * 60).strftime('%Y-%m-%d')
result = []
for c in data['covidData']['cities']:
    if c['name'] in city_map:
        country = city_map[c['name']]
        result.append(
            pd.DataFrame(
                [[ts_to_date(r['ts']), country, r['value']] for r in c['histogramDays']], 
                columns=['date', 'country', 'isolation'],
            )
        )
result = pd.concat(result).reset_index(drop=True)
result.to_csv('covid-19-solution/data/mobility/mobility-yandex.csv', index=False)

In [18]:
# Get new link here
#https://covid19-static.cdn-apple.com/covid19-mobility-data/current/v2/index.json
URL = 'https://covid19-static.cdn-apple.com/covid19-mobility-data/2007HotfixDev45/v2/en-us/applemobilitytrends-2020-05-01.csv' #change date
countries = pd.read_csv('covid-19-solution/data/features/countries-2.csv', na_filter=False)
df = pd.read_csv(URL)
df = df[df['geo_type'] == 'country/region']
df = df[df['region'].isin(countries['name'])]
df = df.drop(columns=['geo_type'])
del df['alternative_name']
df = df.groupby(['region', 'transportation_type']).first().stack().unstack(1).reset_index()
df = df.rename(columns={
    'level_1': 'date',
    'driving': 'apl-driving',
    'transit': 'apl-transit',
    'walking': 'apl-walking',
})
df = pd.merge(df, countries, left_on='region', right_on='name')#[['date', 'country', 'apl-driving', 'apl-transit', 'apl-walking']]
df.to_csv('covid-19-solution/data/mobility/mobility-apple.csv', index=False)
df.sample()

Unnamed: 0,region,date,apl-driving,apl-transit,apl-walking,country,name,iso_alpha2,population,area,density,lat,long,is_region,region_center
3262,Luxembourg,2020-03-25,24.68,15.96,32.97,LUX,Luxembourg,LU,625978,2590,242.0,49.8153,6.1296,0,
