# Support Vector Machine Model for Wine Quality Dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from random import shuffle
from sklearn.svm import SVR


In [2]:
Xs = pd.read_csv("../preprocessing/X_scaled.csv")
y = pd.read_csv("../preprocessing/y.csv").squeeze()

In [3]:
# Split dataset k-times
k= 5
kfold = KFold(n_splits=k, shuffle=True, random_state=42)


# store results
mse_scores = []


In [4]:
# train data using cross_validation
for fold, (train_index, test_index) in enumerate(kfold.split(Xs)):
     print(f"\n--- Fold {fold + 1}---")


     #split data
     X_train, X_test = Xs.iloc[train_index], Xs.iloc[test_index]
     y_train, y_test = y.iloc[train_index], y.iloc[test_index]

     svr = SVR(C=1.0, epsilon=0.2)
     svr.fit(X_train, y_train)

     #predict
     y_pred = svr.predict(X_test)

     #Evaluate
     mse = mean_squared_error(y_test, y_pred)
     print(f"MSE: {mse}")

     mse_scores.append(mse)


print("\n=== Cross-Validation Results ===")
print(f"Average MSE: {np.mean(mse_scores):.4f} ± {np.std(mse_scores):.4f}")


--- Fold 1---
MSE: 0.45722694935935826

--- Fold 2---
MSE: 0.4633330747075883

--- Fold 3---
MSE: 0.47650355334914346

--- Fold 4---
MSE: 0.49721546324581856

--- Fold 5---
MSE: 0.4987887421486239

=== Cross-Validation Results ===
Average MSE: 0.4786 ± 0.0170
