# Previsões de casos para o dataset não filtrado com SVR

In [1]:
import pandas as pd 
import numpy as np 
import sys 
import pickle
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_absolute_error

In [2]:
### Importando base de dados
data_sf = pd.read_csv('Dataset_finalizado.csv').dropna()

In [3]:
X = data_sf.iloc[:, :].drop(columns=['last_available_confirmed']).values
Y = data_sf.iloc[:, 2].values

In [4]:
labelencoder = LabelEncoder()
X[:, 0] = labelencoder.fit_transform(X[:,0])
X[:, 1] = labelencoder.fit_transform(X[:,1])

In [5]:
onehotencorder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [1])], remainder='passthrough')
X = onehotencorder.fit_transform(X)

In [6]:
scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)
scaler_Y = StandardScaler()
Y = scaler_Y.fit_transform(Y.reshape(-1,1))

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

### Para o kernel rbf

In [8]:
regressor = SVR(kernel='rbf')
regressor.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [9]:
regressor.score(X_train, Y_train)

0.31698943953448133

In [10]:
prev = regressor.predict(X_test)

prev = scaler_Y.inverse_transform(prev)
Y_test = scaler_Y.inverse_transform(Y_test)

In [11]:
mae = mean_absolute_error(Y_test, prev)
score = r2_score(Y_test, prev)

print("Média do erro absoluto:", mae)
print("R² da previsão:", score)

Média do erro absoluto: 62.48164517968756
R² da previsão: 0.3364333966593115


### Para o kernel polynomial

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

In [13]:
regressor = SVR(kernel='poly', degree=5)
regressor.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=5, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [14]:
regressor.score(X_train, Y_train)

0.9326862021139312

In [15]:
prev = regressor.predict(X_test)

prev = scaler_Y.inverse_transform(prev)
Y_test = scaler_Y.inverse_transform(Y_test)

In [16]:
mae = mean_absolute_error(Y_test, prev)
score = r2_score(Y_test, prev)

print("Média do erro absoluto:", mae)
print("R² da previsão:", score)

Média do erro absoluto: 52.270944930810195
R² da previsão: 0.9462021852426359


# Previsões de casos para o dataset filtrado com SVR

In [4]:
### Importando base de dados
data_cf = pd.read_csv('Dataset_finalizado_filtrado.csv').dropna()

In [18]:
X = data_cf.iloc[:, :].drop(columns=['last_available_confirmed']).values
Y = data_cf.iloc[:, 2].values

In [19]:
labelencoder = LabelEncoder()
X[:, 0] = labelencoder.fit_transform(X[:,0])
X[:, 1] = labelencoder.fit_transform(X[:, 1])

In [20]:
onehotencorder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [1])], remainder='passthrough')
X = onehotencorder.fit_transform(X).toarray()

In [21]:
scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)
scaler_Y = StandardScaler()
Y = scaler_Y.fit_transform(Y.reshape(-1,1))

In [22]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

### Com o kernel rbf

In [23]:
regressor = SVR(kernel='rbf')
regressor.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [24]:
regressor.score(X_train, Y_train)

0.4026576077722921

In [25]:
prev = regressor.predict(X_test)

prev = scaler_Y.inverse_transform(prev)
Y_test = scaler_Y.inverse_transform(Y_test)

In [26]:
mae = mean_absolute_error(Y_test, prev)
score = r2_score(Y_test, prev)

print("Média do erro absoluto:", mae)
print("R² da previsão:", score)

Média do erro absoluto: 57.89138887096914
R² da previsão: 0.42413440465129826


### Para o kernel polynomial

In [27]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

In [28]:
regressor = SVR(kernel='poly', degree=20)
regressor.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=20, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [29]:
regressor.score(X_train, Y_train)

0.8953084228509776

In [30]:
prev = regressor.predict(X_test)

prev = scaler_Y.inverse_transform(prev)
Y_test = scaler_Y.inverse_transform(Y_test)

In [31]:
mae = mean_absolute_error(Y_test, prev)
score = r2_score(Y_test, prev)

print("Média do erro absoluto:", mae)
print("R² da previsão:", score)

Média do erro absoluto: 57.69417861903202
R² da previsão: 0.9070001773414764
