### Prezentarea setului de date de antrenare
Prezicerea numărului de cazuri confirmate de Covid19 într-o țară

In [1]:
import pandas as pd
import numpy as np
from plotly import graph_objects as go

In [2]:
tara = "Romania"
csv = pd.read_csv("./covid19_final.csv", index_col="Country")
romania = csv.loc[tara,:].set_index("Week")
romania = romania/1000

In [3]:
romania.tail(8)

Unnamed: 0_level_0,Open,High,Low,Close
Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
44,4.875,4.875,1.194,4.729
45,5.719,5.719,1.987,3.697
46,4.424,4.424,1.509,2.745
47,3.0,3.0,1.551,2.877
48,3.174,3.174,1.313,2.676
49,2.752,2.797,1.319,2.797
50,3.048,3.048,1.331,2.676
51,2.815,3.382,1.634,3.382


### Prezentarea setului de date, pe saptamani, a numarului de cazuri de COVID-19 in Romania
Pentru fiecare saptamana in parte s-a luat numarul de cazuri la inceputul si sfarsitului ei si intervalul pe care aceste date s-au plimbat.
In final s-a obtinut un grafic, precum cele financiare, care cu verde arata ca numarul de cazuri a crescut in acea saptamana si cu rosu au scazut

S-a incercat pe baza datelor dintr-o saptamana sa se prezica, folosind modele de regresie liniara, numarul de cazuri care se va inregistra in urmatoarea saptamana. Numarul real de cazuri poate sa oscileze pe un interval de +/- eroarea medie patratica a modelului.

In [4]:
concluzii = pd.DataFrame(columns=["Model_Regresie", "Theta_0", "Theta_1", "Theta_2", "Theta_3", "Intercept", "MSE", "Coef_Det"])

In [5]:
fig = go.Figure(data=[
    go.Candlestick(x=romania.index,
        open=romania["Open"],
        high=romania["High"],
        low=romania["Low"],
        close=romania["Close"]
    )
])
fig.show()

In [6]:
X = [np.array(romania.iloc[0,])]
y = [[romania.iloc[1,0]]]
for i in range(1, len(romania.index)-1):
    week = romania.iloc[i,]
    week_open = romania.iloc[i+1,0]
    X.append(np.array(week))
    y.append([week_open])
X = np.array(X)
y = np.array(y)

fig = go.Figure()
fig.add_trace(go.Candlestick(
    open=X[:,0],
    high=X[:,1],
    low=X[:,2],
    close=X[:,3]
))
fig.add_trace(go.Scatter(x=romania.index+1, y=y.flatten(), line=dict(color='green', width=2)))
fig.show()

In [None]:
## Regresie polinomiala multipla 
# y = epsilon+
# theta(1,0)*X0+theta(1,1)*X1+theta(1,2)*X2+theta(1,3)*X3+
# theta(2,0)*X0*X0+theta(2,1)*X1*X1+theta(2,2)*X2*X2+
# theta(2,3)*X3*X3

In [7]:
from sklearn.linear_model import LinearRegression, ElasticNetCV
from sklearn.metrics import mean_squared_error, r2_score

In [8]:
X2 = np.power(X, 2)
Xnou = np.hstack((X, X2))

reg2 = LinearRegression()
reg2.fit(Xnou, y)
print('coeficienti',reg2.coef_)     
print('np-shape(reg2.coef_)=',np.shape(reg2.coef_) )           
print('Interceptie',reg2.intercept_)

y_pred = reg2.predict(Xnou)
print('MSE: ', mean_squared_error(y, y_pred))
print('RMSE: ', np.sqrt(mean_squared_error(y, y_pred)))
print('Coef determinare: ', r2_score(y, y_pred))

concluzii = concluzii.append({"Model_Regresie":"Regresie polinomiala multipla", "Theta_0":reg2.coef_[0][0], "Theta_1":reg2.coef_[0][1], "Theta_2":reg2.coef_[0][2], "Theta_3":reg2.coef_[0][3], "Intercept":reg2.intercept_[0], "MSE":mean_squared_error(y, y_pred), "Coef_Det":r2_score(y, y_pred)}, ignore_index=True)

coeficienti [[ 1.38088675 -1.79056495  0.95514765  1.02853021 -0.08344871  0.11444741
  -0.15316688  0.0161841 ]]
np-shape(reg2.coef_)= (1, 8)
Interceptie [0.172307]
MSE:  0.10883889763441772
RMSE:  0.32990740766829973
Coef determinare:  0.9862915114344368


In [9]:
# ecuația dreaptei de regresie
((Xnou[-1]*reg2.coef_).sum()+reg2.intercept_)[0], y[-1]

(3.0797930633927812, array([2.815]))

In [10]:
fig = go.Figure()
fig.add_trace(go.Candlestick(
    open=X[:,0],
    high=X[:,1],
    low=X[:,2],
    close=X[:,3]
))
fig.add_trace(go.Scatter(x=romania.index+1, y=y.flatten(), line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=romania.index+1, y=y_pred.flatten(), line=dict(color='yellow', width=2)))
fig.show()

## Modelul de regresie liniara ElasticNet

In [11]:
regElasticNet = ElasticNetCV(cv=5, random_state=0)
regElasticNet.fit(X, y.flatten())

print('coeficienti',regElasticNet.coef_)     
print('np-shape(regElasticNet.coef_)=',np.shape(regElasticNet.coef_) )           
print('Interceptie',regElasticNet.intercept_)

y_pred = regElasticNet.predict(X)
print('MSE: ', mean_squared_error(y, y_pred))
print('RMSE: ', np.sqrt(mean_squared_error(y, y_pred)))
print('Coef determinare: ', r2_score(y, y_pred))

concluzii = concluzii.append({"Model_Regresie":"Regresie ElasticNet", "Theta_0":regElasticNet.coef_[0], "Theta_1":regElasticNet.coef_[1], "Theta_2":regElasticNet.coef_[2], "Theta_3":regElasticNet.coef_[3], "Intercept":regElasticNet.intercept_, "MSE":mean_squared_error(y, y_pred), "Coef_Det":r2_score(y, y_pred)}, ignore_index=True)

coeficienti [0.09227376 0.05898774 0.03492392 0.96191399]
np-shape(regElasticNet.coef_)= (4,)
Interceptie 0.023508201965069198
MSE:  0.1550944954659301
RMSE:  0.39382038477703274
Coef determinare:  0.9804655213909098


In [12]:
fig = go.Figure()
fig.add_trace(go.Candlestick(
    open=X[:,0],
    high=X[:,1],
    low=X[:,2],
    close=X[:,3]
))
fig.add_trace(go.Scatter(x=romania.index+1, y=y.flatten(), line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=romania.index+1, y=y_pred.flatten(), line=dict(color='yellow', width=2)))
fig.show()

## Concluzii

In [13]:
concluzii

Unnamed: 0,Model_Regresie,Theta_0,Theta_1,Theta_2,Theta_3,Intercept,MSE,Coef_Det
0,Regresie polinomiala multipla,1.380887,-1.790565,0.955148,1.02853,0.172307,0.108839,0.986292
1,Regresie ElasticNet,0.092274,0.058988,0.034924,0.961914,0.023508,0.155094,0.980466


- *Din tabelul de mai sus se poate observa ca modelul de **regresie polinamiala** este cel mai bun, cu o eroare de 0.108 si un coef de determinare de 0.986
apoi **ElasticNet** cu o eroare de 0.155 si un coef de determinare de 0.9804.*
- *Cu cat **eroarea medie patratica** este mai mica iar **coeficientul de determinare** mai mare, cu atat **regresia liniara este cea mai sigura** de folosit pentru preziceri imediate.*