# 🤖 Data Modeling with Support Vector Machine

In [1]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import make_scorer, mean_absolute_error, accuracy_score, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import sys
import os

# Añadir la carpeta raíz del proyecto al PATH (ajústalo según donde esté tu notebook)
sys.path.append(os.path.abspath('..'))  # '..' significa que sube un nivel desde donde está el notebook

from utils.data_training_utils import train_test_model_with_hyperparameter_tuning

In [2]:
df = pd.read_csv("../data/working_data/data_cleaned_20250901.csv")

In [3]:
for col in df.select_dtypes(include="object").columns:
    df[col] = df[col].astype("category")

from sklearn.preprocessing import LabelEncoder

df_numeric = df.copy()
encoders = {}

for col in df_numeric.select_dtypes(include="category").columns:
    le = LabelEncoder()
    df_numeric[col] = le.fit_transform(df_numeric[col].astype(str))
    encoders[col] = le

In [4]:
param_grid = {
    'kernel': ['rbf', 'linear', 'poly'],
    'C': [0.01, 0.1, 1, 10, 100],
    'epsilon': [0.001, 0.01, 0.1, 0.2, 0.5],
    'gamma': ['scale', 'auto']
}


# Modelo base
model = SVR()

### SVM Model for rental prices

In [None]:
df_numeric_rent = df_numeric[df_numeric['operation'] == 0]

best_rent_num_model, best_rent_num_params, mae_train, r2_train, mae_test, r2_test = train_test_model_with_hyperparameter_tuning(
    model, param_grid, df_numeric_rent, drop_features=['operation'])

Fitting 5 folds for each of 150 candidates, totalling 750 fits


In [None]:
# Scale the data
best_rent_num_model, best_rent_num_params, mae_train, r2_train, mae_test, r2_test = train_test_model_with_hyperparameter_tuning(
    model, param_grid, df_numeric_rent, drop_features=['operation'], scale_data=True)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best model found: SVR(C=0.1, epsilon=0.01)
Best hyperparameters: {'C': 0.1, 'epsilon': 0.01, 'gamma': 'scale', 'kernel': 'rbf'}
Best training MAE: 993.2746602763855
Best training R2: -0.10309051743728528
·········································
MAE test: 905.4214829285932
R2 test: -0.06809740107571449


### Linear Regression Model for sales prices

In [None]:
df_numeric_sale = df_numeric[df_numeric['operation'] == 1]

best_sale_num_model, best_sale_num_params, mae_train, r2_train, mae_test, r2_test = train_test_model_with_hyperparameter_tuning(
    model, param_grid, df_numeric_sale, drop_features=['operation'])

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best model found: SVR(C=0.1, epsilon=0.01, gamma='auto')
Best hyperparameters: {'C': 0.1, 'epsilon': 0.01, 'gamma': 'auto', 'kernel': 'rbf'}
Best training MAE: 385970.948106062
Best training R2: -0.11253651022827
·········································
MAE test: 385528.39053111343
R2 test: -0.10721745824359163


In [None]:
df_numeric_sale = df_numeric[df_numeric['operation'] == 1]

#Scale the data
best_sale_num_model, best_sale_num_params, mae_train, r2_train, mae_test, r2_test = train_test_model_with_hyperparameter_tuning(
    model, param_grid, df_numeric_sale, drop_features=['operation'], scale_data=True)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best model found: SVR(C=0.1, epsilon=0.01)
Best hyperparameters: {'C': 0.1, 'epsilon': 0.01, 'gamma': 'scale', 'kernel': 'rbf'}
Best training MAE: 385965.2036682187
Best training R2: -0.11251340270902684
·········································
MAE test: 385521.1458263802
R2 test: -0.10718799802442414
