In [None]:
import pandas as pd
import joblib
import json
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import os

In [None]:
# 1. Crear carpeta de resultados si no existe
os.makedirs("models_test/saved", exist_ok=True)

In [None]:
# 2. Cargar datos
df = pd.read_csv("../Data/Data_Seattle.csv")

In [None]:
df.describe

In [None]:
df.dtypes

In [None]:
# 3. Procesar columna 'date'
# df["date"] = pd.to_datetime(df['date'])
# df["date"] = (df["date"] - df["date"].min()) / pd.Timedelta(days=1)

In [None]:
# 3. Procesar columna 'date'
df["date"] = pd.to_datetime(df["date"], format="%Y%m%dT%H%M%S", errors="coerce")
df["year"] = df["date"].dt.year
df["month"] = df["date"].dt.month
df["day"] = df["date"].dt.day
df = df.drop("date", axis=1)

In [None]:
# 4. Separar variables 
X = df.drop("price", axis=1)
y = df["price"]

In [None]:
stdscal = StandardScaler()
X = stdscal.fit_transform(X)

In [None]:
# 5. Dividir en train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
param_grid = {'C':[0.1, 0.5, 1, 5, 10], 'kernel':('linear', 'rbf', 'poly'), 'epsilon':[0.1, 0.2, 0.35, 0.5]}
svr = SVR()
grid = GridSearchCV(svr, param_grid, verbose=3, return_train_score=True, scoring='neg_mean_squared_error')

In [None]:
grid.fit(X_train, y_train)

In [None]:
mejores_parametros = grid.best_params_
mejor_modelo = grid.best_estimator_
print(mejores_parametros)

In [None]:
# Realizar predicciones en el conjunto de prueba
y_pred = mejor_modelo.predict(X_test)

# Evaluar el rendimiento del modelo
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

r2_best = r2_score(y_test, y_pred)
print(f"R2 Score: {r2_best}")

In [None]:
# 10. Guardar modelo 
joblib.dump(mejor_modelo, "models_test/saved/svm_gridsearch_date.pkl")

# 11. Guardar parámetros y métricas 
results = {
    "model": "SVM_GridSearch",
    "params": grid.best_params_,
    "metrics": {
        "RMSE": mse,
        "R²": r2_best
    }
}

with open("models_test/saved/svm_gridsearch_results_date.json", "w") as f:
    json.dump(results, f, indent=4)