In [4]:
""" Import required packages """

import numpy as np
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
""" Create dataset as Pandas dataframe """

X = [2,4,7,12, 23,56,98, 123, 200]
y = [23, 32, 46, 39, 90, 155, 230, 295, 410]
df = pd.DataFrame()
df['X'] = X
df['y'] = y
#df = df.sort_values('X').copy()
print(df)
print('The dataframe has {} lines and {} columns'.format(df.shape[0], df.shape[1]))

In [6]:
""" Compute linear OLS model  """

mod = sm.OLS(df['y'],sm.add_constant(df['X']))
res = mod.fit()

In [None]:
print(res.mse_model / res.mse_resid)

In [None]:
print(res.summary())
predictions = res.get_prediction()
prediction_summary = predictions.summary_frame(alpha=0.05)
print(prediction_summary)
plt.plot(X, y, 'bo')
plt.plot(X, prediction_summary['mean'], 'b-', lw=2)
plt.plot(X, prediction_summary['obs_ci_lower'], 'r--', lw=1)
plt.plot(X, prediction_summary['obs_ci_upper'], 'r--', lw=1)
plt.plot(X, prediction_summary['mean_ci_lower'], 'b--', lw=1)
plt.plot(X, prediction_summary['mean_ci_upper'], 'b--', lw=1)

In [None]:
data_test = pd.DataFrame()
data_test['X'] = [0, 110]
print(data_test)
data_test = sm.add_constant(data_test['X'])
print(data_test)
test = mod.predict(res.params, data_test)
print(test)

In [None]:
inv_xtx = mod.normalized_cov_params
print(inv_xtx)
X_pred = data_test.to_numpy()
print(X_pred)
type(X_pred)

In [None]:
n_pred = data_test.shape[0]
var_err = np.zeros((n_pred,))
for i in range(n_pred):
    tmp = X_pred[i,:]
    pm = np.dot(np.dot(tmp, inv_xtx), np.transpose(tmp))
    var_err[i] = res.scale * (1 + pm)
print(var_err)

In [12]:
n = df.shape[0]
p = 1
qt = scipy.stats.t.ppf(0.975, df = n-p-1)

In [None]:
yb = test - qt * np.sqrt(var_err)
print(yb)
yh = test + qt * np.sqrt(var_err)
print(yh)

In [None]:
err = test - yb
print(err)

In [None]:
plt.plot(X, y, 'bo')
plt.plot(X, prediction_summary['mean'], 'b-', lw=2)
plt.plot(X, prediction_summary['obs_ci_lower'], 'r--', lw=1)
plt.plot(X, prediction_summary['obs_ci_upper'], 'r--', lw=1)
plt.plot(X, prediction_summary['mean_ci_lower'], 'b--', lw=1)
plt.plot(X, prediction_summary['mean_ci_upper'], 'b--', lw=1)
plt.plot(data_test['X'], test, 'ro')
plt.errorbar(data_test['X'], test, err)

In [None]:
""" Simplified version with a fucntion for prediction """

import numpy as np
import pandas as pd
import statsmodels.api as sm

# Exemple de données
X = [2,4,7,12, 23,56,98, 123, 200]
y = [23, 32, 46, 39, 90, 155, 230, 295, 410]

# Création du modèle
X = sm.add_constant(X)  # Ajouter une constante au modèle
model = sm.OLS(y, X)
res = model.fit()

# Fonction pour prédire avec erreur
def predict_with_error(new_x, X, y):
    # Ajout manuel de la constante à new_x
    new_x_with_const = np.array([[1, new_x]])  # La constante (1) est ajoutée manuellement

    # Obtenir les prédictions et les intervalles de confiance
    prediction = res.get_prediction(new_x_with_const)
    pred_summary = prediction.summary_frame(alpha=0.05)  # Intervalle de confiance à 95%
    
    return pred_summary['mean'][0], pred_summary['obs_ci_lower'][0], pred_summary['obs_ci_upper'][0]

# Exemple de prédiction pour une nouvelle valeur
new_x = 110
prediction, lower_bound, upper_bound = predict_with_error(new_x, X, y)

# Affichage des résultats
print(f"Prédiction: {prediction}")
print(f"Limite inférieure: {lower_bound}")
print(f"Limite supérieure: {upper_bound}")
