In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
import seaborn as sns
import firebase_admin
from firebase_admin import credentials, firestore

In [2]:
confirmedCases_raw=pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
deaths_raw=pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
recoveries_raw=pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')

In [3]:
def cleanData(data):
    df = data.copy()
    for index, row in df.iterrows():
        if str(row["Province/State"]) != "nan" and row["Province/State"] != row["Country/Region"]:
            df["Country/Region"][index] = df["Province/State"][index]+ " ("+df["Country/Region"][index]+")"
    return df

In [4]:
def send_firebase(df, collection_name=u'contamination_areas', del_collection=False):
    def delete_collection(coll_ref, batch_size):
        docs = coll_ref.limit(batch_size).stream()
        deleted = 0
        for doc in docs:
            #print(u'Deleting doc {} => {}'.format(doc.id, doc.to_dict()))
            doc.reference.delete()
            deleted = deleted + 1
        if deleted >= batch_size:
            return delete_collection(coll_ref, batch_size)
        print("Collection deleted!")

    cred = credentials.Certificate(r"../credentials-firebase.json")
    try:
        app = firebase_admin.initialize_app(cred)
    except ValueError:
        pass
    data = []
    for _, row in df.iterrows():
        data.append(dict(row))
    store = firestore.client()
    doc_ref = store.collection(collection_name)
    if del_collection:
        delete_collection(doc_ref, 500)
    for d in data:
         doc_ref.add(d)
    print("Collection updated!")

In [5]:
def generateNewestAreas(df):
    mandatoryCol = ["Province/State","Country/Region", "Lat", "Long"]
    newColumns = ["province","country", "latitude", "longitude"]
    colMapping = {k: newColumns[ii] for ii, k in enumerate(mandatoryCol)}
    yesterday = (datetime.now()-timedelta(days=1)).strftime("%m/%d/%y")
    if yesterday[0] == "0":
        yesterday = yesterday[1:]
    mandatoryCol.append(yesterday)
    dayDf = df[mandatoryCol]
    colMapping[yesterday] = "num_of_infected"
    dayDf = dayDf.rename(columns=colMapping)
    dayDf = dayDf.iloc[:,1:]
    dayDf["id"] = dayDf.index
    dayDf["marker_type"] = 1
    dayDf["radius"] = dayDf["num_of_infected"].apply(lambda x: np.log(x)+1)
    dayDf = dayDf[dayDf["num_of_infected"] != 0]
    return dayDf

def generateDailyData():
    confDf = generateNewestAreas(confirmedCasesDf)
    confDf["num_of_recoveries"] = generateNewestAreas(recoveriesDf)["num_of_infected"]
    confDf["num_of_deaths"] = generateNewestAreas(deathsDf)["num_of_infected"]
    confDf = confDf.fillna(0)
    return confDf

In [6]:
confirmedCasesDf = cleanData(confirmedCases_raw)
deathsDf = cleanData(deaths_raw)
recoveriesDf = cleanData(recoveries_raw)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [7]:
dayDf = generateDailyData()
dayDf

Unnamed: 0,country,latitude,longitude,num_of_infected,id,marker_type,radius,num_of_recoveries,num_of_deaths
0,Thailand,15.0000,101.0000,411,0,1,7.018593,42.0,1.0
1,Japan,36.0000,138.0000,1007,1,1,7.914731,232.0,35.0
2,Singapore,1.2833,103.8333,432,2,1,7.068426,140.0,2.0
3,Nepal,28.1667,84.2500,1,3,1,1.000000,1.0,0.0
4,Malaysia,2.5000,112.5000,1183,4,1,8.075809,114.0,4.0
...,...,...,...,...,...,...,...,...,...
477,Northwest Territories (Canada),64.8255,-124.8457,1,477,1,1.000000,0.0,0.0
478,Cape Verde,15.1111,-23.6167,1,478,1,1.000000,0.0,0.0
479,East Timor,-8.5500,125.5600,1,479,1,1.000000,0.0,0.0
480,Eritrea,15.1794,39.7823,1,480,1,1.000000,0.0,0.0


In [8]:
# send_firebase(dayDf, del_collection=True)

### Generate time series for each country

In [9]:
def getFirstOccurance(series):
    ii = 0
    for index, elem in series.items():
        if ii > 4:
            retIndex = None
            if elem != 0:
                retIndex = index
                break
        ii += 1
    return retIndex

def getPositiveData(firstOccurence, series, name):
    colNew = []
    append = False
    for col, data in series.items():
        if col == firstOccurence:
            append = True
        if append:
            colNew.append(col)
    retSeries = series[colNew]
    retSeries.columns = [name+"_"+str(col) for col in retSeries.columns]
    return retSeries
    
def generateCountryData(country):
    confirmedCountry = confirmedCasesDf[confirmedCasesDf["Country/Region"]==country]
    deathsCountry = deathsDf[deathsDf["Country/Region"]==country]
    recoveriesCountry = recoveriesDf[recoveriesDf["Country/Region"]==country]
    countryDataList = [confirmedCountry, deathsCountry, recoveriesCountry]
    firstOccurenceList = [getFirstOccurance(data.iloc[0]) for data in countryDataList]
    nameList = ["confirmed", "deaths", "recoveries"]
    #nameList = [country+"_"+name for name in nameList]
    positiveDataList = [getPositiveData(fo, s, n) for fo, s, n in zip(firstOccurenceList, countryDataList, nameList)]
    retDf = pd.DataFrame()
    retDf = positiveDataList[0].join(positiveDataList[1])
    retDf = retDf.join(positiveDataList[2])
    retDf = retDf.astype(str)
    return retDf

In [10]:
countryDfList = []
for index, row in dayDf.iterrows():
    countryDf = generateCountryData(row["country"])
    countryDfList.append(countryDf.join(pd.DataFrame(row).transpose()))