In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
def save_results(model_name, rmsle_cv, rmsle_train, mae_train):
    """Saves the performance metrics to a CSV file for comparison."""
    try:
        results_df = pd.read_csv('model_comparison_results.csv')
    except FileNotFoundError:
        results_df = pd.DataFrame(columns=['Model', 'CV_RMSLE_Mean', 'Train_RMSLE', 'Train_MAE'])

    new_result = pd.DataFrame({
        'Model': [model_name],
        'CV_RMSLE_Mean': [rmsle_cv],
        'Train_RMSLE': [rmsle_train],
        'Train_MAE': [mae_train]
    })
    if model_name in results_df['Model'].values:
        results_df = results_df[results_df['Model'] != model_name]
    results_df = pd.concat([results_df, new_result], ignore_index=True)
    results_df.to_csv('model_comparison_results.csv', index=False)
try:
    X_train = pd.read_csv('X_train_processed.csv')
    X_test = pd.read_csv('X_test_processed.csv')
    y_train = np.load('y_train_processed.npy')
    X_test_ID = pd.read_csv('test.csv')['Id']
except FileNotFoundError:
    print("Error: Processed data files not found. Ensure 'main_data_preprocessing.ipynb' was run first.")
    exit()
def rmsle(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))
print("--- Implementing Support Vector Regressor (SVR) ---")
kfolds = KFold(n_splits=5, shuffle=True, random_state=42)

svr = SVR(kernel='rbf')
param_grid = {
    'C': [10, 20, 30],
    'gamma': [0.001, 0.01],
    'epsilon': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, 
                           scoring='neg_mean_squared_error', cv=kfolds, 
                           n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

best_svr = grid_search.best_estimator_
print(f"\nBest Hyperparameters found: {grid_search.best_params_}")
best_svr.fit(X_train, y_train)
y_train_pred = best_svr.predict(X_train)

cv_scores = cross_val_score(best_svr, X_train, y_train, 
                            scoring='neg_mean_squared_error', cv=kfolds)
cv_rmse_scores = np.sqrt(-cv_scores)
final_cv_rmsle = cv_rmse_scores.mean()
final_train_rmsle = rmsle(y_train, y_train_pred)
final_train_mae = mean_absolute_error(y_train, y_train_pred)

print("\n--- Evaluation on Training Data (Log-Transformed) ---")
print(f"SVR CV Mean RMSE: {final_cv_rmsle:.4f} (Std: {cv_rmse_scores.std():.4f})")
print(f"SVR Training RMSLE: {final_train_rmsle:.4f}")
print(f"SVR Training MAE: {final_train_mae:.4f}")


plt.figure(figsize=(10, 6))
plt.scatter(y_train, y_train_pred, c='orange', marker='o', alpha=0.5)
plt.plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], 'k--', lw=2)
plt.title('Actual vs. Predicted Log(SalePrice) (SVR)')
plt.xlabel('Actual Log(SalePrice)')
plt.ylabel('Predicted Log(SalePrice)')
plt.show() 


y_test_pred_log = best_svr.predict(X_test)
y_test_pred_price = np.expm1(y_test_pred_log) 
svr_results = pd.DataFrame({'Id': X_test_ID, 'SalePrice_Predicted': y_test_pred_price})
svr_results.to_csv('svr_predictions.csv', index=False)

save_results(
    model_name='SVR',
    rmsle_cv=final_cv_rmsle,
    rmsle_train=final_train_rmsle,
    mae_train=final_train_mae
)

print("\nSVR Model implementation complete. Results saved.")