In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import dash
from dash import dcc, html
import plotly.express as px

# Cargar datos desde Excel
file_path = "C:/Users/jadur/OneDrive - UCompensar/Semestre 3/Programacion de Datos II/Base Retal + Lineas.xlsx"
df = pd.read_excel(file_path, sheet_name="Lineas")  

# Seleccion de variables
X = df[['Cantidad embarcada', 'Cantidad Unidades', 'Cobre', 'Aluminio']]
y = df['Peso Neto']

# valores faltantes
X = X.fillna(X.median())  
y = y.fillna(y.median())  

# Estandarizar los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir datos en entrenamiento y prueba (80% - 20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# constante para la regresión OLS
X_train_ols = sm.add_constant(X_train)  
X_test_ols = sm.add_constant(X_test)

# Modelo de regresión
modelo_ols = sm.OLS(y_train, X_train_ols).fit()
y_pred_ols = modelo_ols.predict(X_test_ols)

# Evaluación del modelo
r2 = r2_score(y_test, y_pred_ols)
mae = mean_absolute_error(y_test, y_pred_ols)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_ols))

# Crear aplicación Dash
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("📊 Evaluación del Modelo", style={'textAlign': 'center'}),
    
    html.P("Este dashboard presenta la evaluación de un modelo de regresión aplicado a datos de embarques."),
    
    html.H2("🔹 Métricas del Modelo"),
    html.P(f"✔️ Coeficiente de determinación (R²): {r2:.4f}"),
    html.P(f"✔️ Error absoluto medio (MAE): {mae:.4f}"),
    html.P(f"✔️ Raíz del error cuadrático medio (RMSE): {rmse:.4f}"),
    
    html.H2("🔹 Matriz de Correlación"),
    dcc.Graph(
        id='heatmap',
        figure=px.imshow(X.corr(), text_auto=True, title="Matriz de Correlación")
    ),
    
    html.H2("🔹 Comparación: Predicción vs. Valor Real"),
    dcc.Graph(
        id='scatter_plot',
        figure=px.scatter(x=y_test, y=y_pred_ols, labels={'x': "Peso Neto Real", 'y': "Predicción"},
                          title="Predicción vs. Real", trendline="ols")
    ),

    html.H2("🔹 Distribución del Peso Neto"),
    dcc.Graph(
        id='histogram',
        figure=px.histogram(df, x='Peso Neto', nbins=30, title="Histograma de Peso Neto", marginal="box")
    )
])

# Ejecutar la aplicación en Binder
if __name__ == '__main__':
 app.run_server(host='127.0.0.1', port=8050, debug=True)


#if __name__ == '__main__':
    #app.run(debug=True, host='127.0.0.1', port=8050)


ObsoleteAttributeException: app.run_server has been replaced by app.run

In [None]:
rom sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV

# Definir los valores de hiperparámetros a probar
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

# Ridge Regression (L2)
ridge = GridSearchCV(Ridge(), param_grid, cv=5, scoring='r2')
ridge.fit(X_train, y_train)

# Lasso Regression (L1)
lasso = GridSearchCV(Lasso(), param_grid, cv=5, scoring='r2')
lasso.fit(X_train, y_train)

# Resultados de la optimización
print(f"🏆 Mejor parámetro para Ridge: {ridge.best_params_} con R²={ridge.best_score_:.4f}")
print(f"🏆 Mejor parámetro para Lasso: {lasso.best_params_} con R²={lasso.best_score_:.4f}")

# Evaluación en el conjunto de prueba
ridge_best = Ridge(alpha=ridge.best_params_['alpha']).fit(X_train, y_train)
lasso_best = Lasso(alpha=lasso.best_params_['alpha']).fit(X_train, y_train)

y_pred_ridge = ridge_best.predict(X_test)
y_pred_lasso = lasso_best.predict(X_test)

# Calcular métricas
r2_ridge = r2_score(y_test, y_pred_ridge)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f"📊 R² Ridge en test: {r2_ridge:.4f}")
print(f"📊 R² Lasso en test: {r2_lasso:.4f}")
