In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns # for plotting
from scipy.stats import zscore # for standardizing data
from sklearn.impute import SimpleImputer # for imputing missing values
from sklearn.preprocessing import StandardScaler # for standardizing data
from sklearn.svm import SVR # for SVM regression

In [None]:
def checkMissingData():
    df = pd.read_csv('data.csv')
    print(df.info())
    
checkMissingData()

In [None]:
def missingDataHandler():
    df = pd.read_csv('data.csv')
    imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
    missingColumns = ['Daily tests', 'Cases', 'Deaths']
    for column in missingColumns:
        df[column] = imputer.fit_transform(df[column].values.reshape(-1, 1))
    print(df.info())
    return df
    
dataFrame = missingDataHandler()

In [None]:
# get the data for greece until 2020-12-31
def getGreeceData(df):
    df = df[df['Entity'] == 'Greece']
    df = df[df['Date'] <= '2021-01-01']
    return df

dataFrame2 = getGreeceData(dataFrame)

In [None]:
def addPositivityPercent(df):
    df['Daily Cases'] = abs(df['Cases'].diff())
    df['Positivity percent'] = df['Daily Cases'] / df['Daily tests']
    df['Date'] = pd.to_datetime(df['Date'])
    imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
    df['Positivity percent'] = imputer.fit_transform(df['Positivity percent'].values.reshape(-1, 1))
    df['Daily Cases'] = imputer.fit_transform(df['Daily Cases'].values.reshape(-1, 1))
    return df

dataset = addPositivityPercent(dataFrame2)

print(dataset.info())

In [None]:
X = dataset.iloc[:, 12:-1].values
y = dataset.iloc[:, -1].values
y = y.reshape(len(y), 1)

In [None]:
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

In [None]:
regressor = SVR(kernel = 'rbf')
regressor.fit(X, y)

In [None]:
sc_y.inverse_transform(regressor.predict(sc_X.transform([[34044.0, 154796.0, 5742.0, 6.01e+2]])).reshape(1, -1))

In [None]:
plt.scatter(sc_X.inverse_transform(X), sc_y.inverse_transform(y), color = 'red')
plt.plot(sc_X.inverse_transform(X), sc_y.inverse_transform(regressor.predict(X).reshape(-1,1)), color = 'blue')
plt.title('Positivity percent prediction (SVR)')
plt.xlabel('Daily tests, Cases, Deaths')
plt.ylabel('Positivity percent')
plt.show()

In [None]:
X_grid = np.arange(min(sc_X.inverse_transform(X)), max(sc_X.inverse_transform(X)), 0.1)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.scatter(sc_X.inverse_transform(X), sc_y.inverse_transform(y), color = 'red')
plt.plot(X_grid, sc_y.inverse_transform(regressor.predict(sc_X.transform(X_grid)).reshape(-1,1)), color = 'blue')
plt.title('Positivity percent prediction (SVR)')
plt.xlabel('Daily tests, Cases, Deaths')
plt.ylabel('Positivity percent')
plt.show()