In [None]:
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter
from scipy.interpolate import UnivariateSpline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
import math
from fbprophet import Prophet
import matplotlib.pyplot as plt

In [None]:
def getStates():
    ds = pd.read_csv('data/test.csv')
    states = ds['Province_State'][:50].values   
    return states

In [None]:
def isDown(state, ds):
    x = [[i] for i in range(142)]
    
    confirmed = ds['Confirmed'].values
    deaths = ds['Deaths'].values
    
    smooth_confirmed = savgol_filter(confirmed.reshape(-1), 31, 2)
    scale_smooth_confirmed = (smooth_confirmed - confirmed[0]) / confirmed[-14] * 128
    
    scale_deaths = deaths / deaths[-1] * 142
    deaths_spl = UnivariateSpline(x,scale_deaths,s=10,k=4)
    deaths_spl_1d = deaths_spl.derivative(n=1)
      
    if scale_smooth_confirmed[-1] - 142 < -7 and np.mean(deaths_spl_1d(x)[-28:-14]) < 1.5:
        return True

In [None]:
def distance(x, y):
    if y > x:
        return 0
    b = y + x
    i = b / 2
    return math.sqrt((x - i)**2 + (y - i)**2)

In [None]:
def all_distance(data):
    rtn = []
    for i in range(len(data)):
        rtn.append(distance(i, data[i]))
    return rtn

In [None]:
degree = 3
states = getStates()
start = 133
alpha = 6.5
window = 17

In [35]:
feature = 'Deaths'
total = 0
res = []
for i in range(len(states)):
    state = states[i]
    ds = pd.read_csv('data/train.csv')
    ds = ds[ds['Province_State'] == state]
    raw = ds[feature].values
    
    value = savgol_filter(raw.reshape(-1), window, degree) #smooth data
    
###===================================================================###   
    scale = raw / raw[-1] * 142
    distances = all_distance(scale)
    diff = sum(distances)
    max_point = distances.index(max(distances))       
    
    if scale[80] > 100: # Grow too fast, Saturated
        x = [[i] for i in range(142)]
        #print(scale[75], state)
        model = Pipeline([
            ("poly", PolynomialFeatures(degree=1)),
            ("lasso_reg", Ridge(alpha=0)) 
        ])
        model.fit(x[-10:], value[-10:])
        x_test = [[i + 142] for i in range(26)]
        y_hat = model.predict(x_test)
        y_hat = y_hat.reshape(-1,1) 
        
###===================================================================###
    else:  
        y = np.array([value[start:]]).reshape(-1, 1)
        x = [[i + start] for i in range(142-start)]
        
        diff = np.sum((raw[start:] - value[start:])**2 / raw[-1]**2)
        
        model = Ridge(alpha=alpha, fit_intercept=True)
        model.fit(x, y)
        x_test = [[i + 142] for i in range(26)]

        y_hat = model.predict(x_test)
        slope = (168 - 142) / (y_hat[-1] - y_hat[0]) * raw[-1] / 142
        
        if isDown(state, ds): # 
            if slope < 1.6:
                sub = [[np.log(1 + i) ** 2] for i in range(26)]
                y_hat = y_hat - sub

        
        else:
            if slope < 0 : # impossible slope
                y = np.array([value[50:]]).reshape(-1, 1)
                x = [[i + 50] for i in range(142 - 50)]

                model = Pipeline([
                    ("poly", PolynomialFeatures(degree=2)),
                    ("lasso_reg", Ridge(alpha=0)) 
                ])
                model.fit(x, y)

                y_hat = model.predict(x_test)
# ###===================================================================###        
    res.append(y_hat)

In [36]:
rerange = []
for i in range(len(res[0])):
    for j in range(len(res)):
        rerange.append(res[j][i])
#print(rerange)
df = pd.DataFrame(rerange)
df.to_csv("{}.csv".format(feature),index=False,sep=',')