In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split,  KFold, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import set_config

from yellowbrick.regressor import AlphaSelection

from statsmodels.genmod.generalized_linear_model import GLM
import statsmodels.api as sm
from scipy import stats

import warnings

set_config(display='text')
warnings.filterwarnings('ignore')

sns.set()
pd.set_option('display.precision', 3)

In [2]:
# Leemos del csv
df_train = pd.read_csv("../processed_train.csv", header= 0, delimiter = ',')
df_test = pd.read_csv("../processed_test.csv",  header= 0, delimiter = ',')

## Escalado

In [3]:
scaler = MinMaxScaler()

X_train = df_train.drop(columns=['close_price'])
y_train = df_train['close_price']

X_test = df_test.drop(columns=['close_price'])

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

### Regresion lineal

In [4]:
model = LinearRegression()
model.fit(X_train_scaled, y_train)

LinearRegression()

In [5]:
train_predictions = model.predict(X_train_scaled)

mse = mean_squared_error(y_train, train_predictions)
print(f"Mean Squared Error (MSE): {mse}")

# Calcular el Mean Absolute Error (MAE)
mae = mean_absolute_error(y_train, train_predictions)
print(f"Mean Absolute Error (MAE): {mae}")

r2 = model.score(X_train_scaled, y_train)
print(f"R^2: {r2}")

#test_predictions = model.predict(X_test_scaled)

Mean Squared Error (MSE): 83691327126.04489
Mean Absolute Error (MAE): 135267.3246466616
R^2: 0.44003922381215654


## Random Forest

In [12]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Crear y entrenar el modelo de Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Realizar predicciones en el conjunto de prueba
train_predictions_rf = rf_model.predict(X_train_scaled)

# Evaluar en entrenamiento
mse_train_rf = mean_squared_error(y_train, train_predictions_rf)
mae_train_rf = mean_absolute_error(y_train, train_predictions_rf)
r2_train_rf = r2_score(y_train, train_predictions_rf)

print(f"Random Forest - Training MSE: {mse_train_rf}")
print(f"Random Forest - Training MAE: {mae_train_rf}")
print(f"Random Forest - Training R^2: {r2_train_rf}")

Random Forest - Training MSE: 9753988094.990732
Random Forest - Training MAE: 32533.98651975131
Random Forest - Training R^2: 0.9347381511064812


## Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

# Crear y entrenar el modelo de Gradient Boosting
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train_scaled, y_train)

# Realizar predicciones en el conjunto de prueba
train_predictions_gb = gb_model.predict(X_train_scaled)

# Evaluar en entrenamiento
mse_train_gb = mean_squared_error(y_train, train_predictions_gb)
mae_train_gb = mean_absolute_error(y_train, train_predictions_gb)
r2_train_gb = r2_score(y_train, train_predictions_gb)

print(f"Gradient Boosting - Training MSE: {mse_train_gb}")
print(f"Gradient Boosting - Training MAE: {mae_train_gb}")
print(f"Gradient Boosting - Training R^2: {r2_train_gb}")

## SVM Regressor

## XGBoost

In [15]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

# Crear el modelo XGBoost
xgb_model = xgb.XGBRegressor(n_estimators=500, learning_rate=0.3, max_depth=8, random_state=42)

# Entrenar el modelo 0.991
xgb_model.fit(X_train_scaled, y_train)

# Realizar predicciones
train_predictions_xgb = xgb_model.predict(X_train_scaled)

# Evaluar en el conjunto de entrenamiento
mse_train_xgb = mean_squared_error(y_train, train_predictions_xgb)
mae_train_xgb = mean_absolute_error(y_train, train_predictions_xgb)
r2_train_xgb = r2_score(y_train, train_predictions_xgb)

print(f"XGBoost - Training MSE: {mse_train_xgb}")
print(f"XGBoost - Training MAE: {mae_train_xgb}")
print(f"XGBoost - Training R^2: {r2_train_xgb}")

XGBoost - Training MSE: 2404595404.7893167
XGBoost - Training MAE: 27035.049089267213
XGBoost - Training R^2: 0.9839113662607398
