In [57]:
import pandas as pd
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [58]:
# Variables Globales
PATH = os.getcwd()
AUX_FOLDER = 'aux_file'
INPUT_FILENAME = 'dataset_modelo.xlsx'
OUTPUT_FILENAME = 'model_weights.pkl'

In [59]:
# Cargo los datos
df = pd.read_excel(os.path.join(PATH, AUX_FOLDER, INPUT_FILENAME))

In [60]:
# Separo lo datos en train y test y variables pedictoras y target
X = df.drop(['Precio', 'Año', 'Mes', 'Dia'], axis=1)
y = df['Precio']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [61]:
# Crear y entrenar el modelo de regresión lineal múltiple
reg = LinearRegression()
reg.fit(X_train, y_train)

In [62]:
# Realizar predicciones en el conjunto de prueba
y_pred = reg.predict(X_test)

In [63]:
# Calcular métricas de evaluación
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 19367263.025156688
R-squared: 0.9990904889706307


In [64]:
# Obtener los coeficientes y la intersección del modelo
coefficients = reg.coef_
intercept = reg.intercept_

# Guarda los coeficientes e intercept en un archivo usando joblib
joblib.dump([coefficients, intercept], os.path.join(PATH, AUX_FOLDER, OUTPUT_FILENAME))

['c:\\Users\\TALIGENT\\test\\Papermill\\Ejemplo 1\\aux_file\\model_weights.pkl']

In [65]:
coefficients

array([129958.87819936,  20819.50405692, -57172.28825388])

In [66]:
intercept

323721.0984833294