In [1]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, BatchNormalization
from datetime import datetime, timedelta,date
from mapper import mapper_ru,mapper_global

Using TensorFlow backend.


In [2]:
path_time_series = './data/COVID-19_plus_Russia/csse_covid_19_data/csse_covid_19_time_series/'
recovered_global_name = 'time_series_covid19_recovered_global.csv'
recovered_ru_name = 'time_series_covid19_confirmed_RU.csv'
deaths_global_name = 'time_series_covid19_deaths_global.csv'
deaths_ru_name = 'time_series_covid19_deaths_RU.csv'
path = './data/'
countries_data_name = 'countries.csv'
russia_regions_name = 'russia_regions.csv'
sample_submission = 'sample_submission.csv'

# Predicion

In [3]:
class Prediction:
    def __init__(self, model,last_date,columns_df):
        self.columns_df = columns_df
        self.model = model
        self.last_date = last_date
        self.predion_date = self.__make_date_predition()
        self.size_prediction = len(self.predion_date)
        
    def predict(self,dataset,n_features,n_steps):
        self.dateset = dataset
        matrix_prediction = self.__make_prediction_matrix(n_features,n_steps)
        return self.__make_dataframe_prediction(matrix_prediction)
    
    def __make_dataframe_prediction(self,matrix_prediction):
        df = pd.DataFrame(matrix_prediction,columns=self.columns_df)
        df['date'] = self.predion_date
        return df
    def __make_prediction_matrix(self,n_features,n_steps):
        dataset = self.dateset
        model = self.model
        count = self.size_prediction
        
        x = np.array([dataset[-n_steps:,:]])
        x_i = dataset[-n_steps+1:,:]
        result = []
        for _ in range(count):
            y_pred = model.predict(x)[0]
    #         y_pred_ = list(np.sign(y_pred)*(y_pred))
            y_pred_ = list((y_pred))
            result.append(y_pred_)
            y_pred = y_pred.reshape(1,n_features)
            x = np.vstack((x_i,y_pred))
            x_i = x[-n_steps+1:,:]
            x = np.array([x])
        return result
        
    def __make_date_predition(self):
        last_date = self.last_date
        last_date += timedelta(days=1)
        res_date = []
        while last_date< date(2021,1,1):
            res_date.append(last_date)
            last_date += timedelta(days=1)
        return res_date
        
        
    
class Submission:
    def __init__(self,columns_df,date_prediction):
        self.columns_df = columns_df
        self.date_prediction = date_prediction
        
    def submit(self,rec_df,deaths_df,mapper):
        df_res = pd.DataFrame({}) 
        for column in self.columns_df:
            df_res_ = pd.DataFrame({}) 
            df_res_['date'] = self.date_prediction
            df_res_['region'] = [column]*len(self.date_prediction)
            df_res_['prediction_confirmed'] = rec_df[column]
            df_res_['prediction_deaths'] = deaths_df[column]
            
            df_res = pd.concat([df_res,df_res_],axis=0)
        df_res.index=np.arange(df_res.shape[0])
        df_res.region = df_res.region.map(mapper)
        return df_res

# Model timeseries

In [4]:
EPOCHS = 1000

In [5]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
    n = sequences.shape[0]
    X, y = list(), list()
    for i in range(n):
        # find the end of this pattern
        end_ix = i + n_steps
    # check if we are beyond the dataset
        if end_ix > n-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix, :]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

def build_model_simple(n_steps,n_features):
    model = Sequential()
    model.add(LSTM(n_features, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
    # model.add(RepeatVector(n_features))
    model.add(LSTM(n_features, activation='relu'))
    
    # model.add(LSTM(n_features, activation='relu'))

    # model.add(Dense(n_features*4,activation='relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(n_features*3,activation='relu'))
    # model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(n_features*2,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_features,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_features))
    model.compile(optimizer='adam', loss='mse')
    return model

In [6]:
def build_model_recovered_ru(n_steps,n_features):
    model = Sequential()
    model.add(LSTM(n_features, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
  
    model.add(LSTM(n_features, activation='relu'))
    model.add(Dense(n_features*3,activation='relu'))
    # model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(n_features*2,activation='relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(n_features,activation='relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(n_features))
    model.compile(optimizer='adam', loss='mse')
    return model

def build_model_deaths_ru(n_steps,n_features):
    model = Sequential()
    model.add(LSTM(n_features*2, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
    model.add(LSTM(n_features, activation='relu'))
    model.add(Dense(n_features*2,activation='relu'))

    model.add(Dense(n_features))
    model.compile(optimizer='adam', loss='mse')
    return model

# RU

In [7]:
def pre_ru(df):
    df = pd.concat([df.iloc[:,6:7], df.iloc[:,11:]],axis=1)
    return df
def get_dataset_ru(df):
    solution_df = pd.DataFrame({'name':df['Province_State'].values})
    date = df.columns.values[1:]
    solution = pd.concat([solution_df,df[date]],axis=1)
    dataset = solution[date].values.T 
    return dataset

In [8]:
recovered_ru = pd.read_csv(path_time_series+ recovered_ru_name)
deaths_ru = pd.read_csv(path_time_series + deaths_ru_name)
recovered_ru = pre_ru(recovered_ru)
deaths_ru = pre_ru(deaths_ru)

RECOVERED RU

In [9]:
dataset_rec = get_dataset_ru(recovered_ru) 

n_steps = 3
n_features = dataset_rec.shape[1]
model_rec = build_model_simple(n_steps,n_features)
X_rec,y_rec = split_sequences(dataset_rec,n_steps)
hisoty_rec = model_rec.fit(X_rec, y_rec, epochs=EPOCHS, verbose=0)




DEATHS RU

In [10]:

dataset_deaths = get_dataset_ru(deaths_ru) 
n_steps = 3
n_features =dataset_deaths.shape[1]
model_deaths = build_model_simple(n_steps,n_features)


X_deaths,y_deaths = split_sequences(dataset_deaths,n_steps)
hisoty_deaths = model_deaths.fit(X_deaths, y_deaths, epochs=EPOCHS, verbose=0)

SOLUTION

In [11]:
columns_ru = recovered_ru['Province_State']
last_date = pd.to_datetime(recovered_ru.columns.values[-1:]).date[0]

In [12]:
predictioner_rec = Prediction(model_rec,last_date,columns_ru)
df_pred_rec = predictioner_rec.predict(dataset_rec,n_features,n_steps)
predictioner_deaths = Prediction(model_deaths,last_date,columns_ru)
df_pred_deaths = predictioner_deaths.predict(dataset_deaths,n_features,n_steps)
submissioner = Submission(columns_ru,predictioner_rec.predion_date)
subm_ru = submissioner.submit(df_pred_rec,df_pred_deaths,mapper_ru)

# GLOBAL

In [13]:
def pre_global(df, columns_uniq):
    df = pd.concat([df.iloc[:,1:2], df.iloc[:,4:]],axis=1)
#     print(df)
    for country in columns_uniq:
#         print(country)
        idx = df['Country/Region']==country
        idx_n =df['Country/Region']!=country
        df_ = pd.DataFrame((df[idx].iloc[:,1:].sum(axis=0))).T
        df_['Country/Region'] = country
        cols = df_.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        df_ = df_[cols]
        df = df[idx_n]
        df = pd.concat([df,df_],axis=0)
    df.index = np.arange(df.shape[0])
    return df

def get_dataset_global(df):
    solution_df = pd.DataFrame({'name':df['Country/Region'].values})
    date = df.columns.values[1:]
    solution = pd.concat([solution_df,df[date]],axis=1)
    dataset = solution[date].values.T 
    return dataset

In [14]:
recovered_global = pd.read_csv(path_time_series+ recovered_global_name)
deaths_global = pd.read_csv(path_time_series + deaths_global_name)

In [15]:
uniq_rec = ['China', 'Australia', 'Denmark',"Demark", "France", 'Netherlands','United Kingdom']
uniq_deaths = ['China', 'Australia', 'Denmark',"Demark", "France", 'Netherlands','United Kingdom','Canada']

recovered_global = pre_global(recovered_global,uniq_rec)
deaths_global = pre_global(deaths_global,uniq_deaths)

RECOVERED GLOBAL

In [16]:

dataset_rec = get_dataset_global(recovered_global) 

n_steps = 3
n_features = dataset_rec.shape[1]
model_rec = build_model_simple(n_steps,n_features)


X_rec,y_rec = split_sequences(dataset_rec,n_steps)
hisoty_rec = model_rec.fit(X_rec, y_rec, epochs=EPOCHS, verbose=0)

DEATHS GLOBAL

In [17]:
dataset_deaths = get_dataset_global(deaths_global) 

n_steps = 3
n_features = dataset_deaths.shape[1] 
model_deaths = build_model_simple(n_steps,n_features)
X_deaths,y_deaths = split_sequences(dataset_deaths,n_steps)
hisoty_deaths = model_deaths.fit(X_deaths, y_deaths, epochs=EPOCHS, verbose=0)


SOLUTION

In [18]:
columns_global = recovered_global['Country/Region']
last_date = pd.to_datetime(recovered_global.columns.values[-1:]).date[0]


In [19]:
predictioner_rec = Prediction(model_rec,last_date,columns_global)
df_pred_rec = predictioner_rec.predict(dataset_rec,n_features,n_steps)
predictioner_deaths = Prediction(model_deaths,last_date,columns_global)
df_pred_deaths = predictioner_deaths.predict(dataset_deaths,n_features,n_steps)
submissioner = Submission(columns_global,predictioner_rec.predion_date)
subm_global = submissioner.submit(df_pred_rec,df_pred_deaths,mapper_global)

# Union Solution

In [20]:
solution_union = pd.concat([subm_ru,subm_global],axis=0)
solution_union = solution_union.dropna()
solution_union.index = np.arange(solution_union.shape[0])

In [21]:

solution_union.prediction_confirmed = solution_union.prediction_confirmed.apply(lambda x: round(x) if x>=0 else 0)
solution_union.prediction_deaths = solution_union.prediction_deaths.apply(lambda x: round(x) if x>=0 else 0)

In [22]:
solution_union.region.unique()

array(['RU-ALT', 'RU-AMU', 'RU-ARK', 'RU-AST', 'RU-BEL', 'RU-BRY',
       'RU-VLA', 'RU-VGG', 'RU-VLG', 'RU-VOR', 'RU-YEV', 'RU-ZAB',
       'RU-IVA', 'RU-IRK', 'RU-KB', 'RU-KGD', 'RU-KLU', 'RU-KAM', 'RU-KC',
       'RU-KEM', 'RU-KIR', 'RU-KOS', 'RU-KDA', 'RU-KYA', 'RU-KGN',
       'RU-KRS', 'RU-LEN', 'RU-LIP', 'RU-MAG', 'RU-MOW', 'RU-MOS',
       'RU-MUR', 'RU-NEN', 'RU-NIZ', 'RU-NGR', 'RU-NVS', 'RU-OMS',
       'RU-ORE', 'RU-ORL', 'RU-PNZ', 'RU-PER', 'RU-PRI', 'RU-PSK',
       'RU-AD', 'RU-AL', 'RU-BA', 'RU-BU', 'RU-DA', 'RU-IN', 'RU-KL',
       'RU-KR', 'RU-KO', 'UA-43', 'RU-ME', 'RU-MO', 'RU-SA', 'RU-SE',
       'RU-TA', 'RU-TY', 'RU-KK', 'RU-CU', 'RU-ROS', 'RU-RYA', 'RU-SAM',
       'RU-SPE', 'RU-SAR', 'RU-SAK', 'RU-SVE', 'UA-40', 'RU-SMO',
       'RU-STA', 'RU-TAM', 'RU-TVE', 'RU-TOM', 'RU-TUL', 'RU-TYU',
       'RU-UD', 'RU-ULY', 'RU-KHA', 'RU-KHM', 'RU-CHE', 'RU-CE', 'RU-CHU',
       'RU-YAN', 'RU-YAR', 'AFG', 'ALB', 'DZA', 'AND', 'AGO', 'ATG',
       'ARG', 'ARM', 'AUT', 'AZE'

In [23]:
solution_union.to_csv('./data/solution.csv')