In [3]:
from sklearn.svm import SVR
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn import datasets

In [5]:
df = datasets.load_diabetes(as_frame=True).frame

In [6]:
X= df.drop("target", axis=1)
y=df["target"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X,y,test_size=0.3, random_state=42
)

In [8]:
y_scaler = StandardScaler()

y_train_scaled = y_scaler.fit_transform(y_train.values.reshape(-1,1)).ravel()
y_test_scaled = y_scaler.transform(y_test.values.reshape(-1,1)).ravel()

In [23]:
model = SVR()

model.fit(X_train, y_train_scaled)

In [24]:
y_test_pred_scaled = model.predict(X_test)
y_train_pred_sclaed = model.predict(X_train)

In [28]:
print("r2 score for test:",r2_score(y_test_scaled, y_test_pred_scaled))
print("r2 score for train", r2_score(y_train_scaled, y_train_pred_sclaed))

r2 score for test: 0.48844443151651884
r2 score for train 0.6596361676267712


In [29]:
model = SVR(kernel="linear")

model.fit(X_train, y_train_scaled)
y_pred_scaled = model.predict(X_test)
y_test_pred_scaled = model.predict(X_test)
y_train_pred_sclaed = model.predict(X_train)
print("r2 score for test:",r2_score(y_test_scaled, y_test_pred_scaled))
print("r2 score for train", r2_score(y_train_scaled, y_train_pred_sclaed))

r2 score for test: 0.4433761323833776
r2 score for train 0.45191229982475245


In [30]:
model = SVR(kernel="sigmoid")

model.fit(X_train, y_train_scaled)
y_pred_scaled = model.predict(X_test)
y_test_pred_scaled = model.predict(X_test)
y_train_pred_sclaed = model.predict(X_train)
print("r2 score for test:",r2_score(y_test_scaled, y_test_pred_scaled))
print("r2 score for train", r2_score(y_train_scaled, y_train_pred_sclaed))

r2 score for test: -15.316808189576822
r2 score for train -19.721193440731305


In [31]:
model = SVR(kernel="poly")

model.fit(X_train, y_train_scaled)
y_pred_scaled = model.predict(X_test)
y_test_pred_scaled = model.predict(X_test)
y_train_pred_sclaed = model.predict(X_train)
print("r2 score for test:",r2_score(y_test_scaled, y_test_pred_scaled))
print("r2 score for train", r2_score(y_train_scaled, y_train_pred_sclaed))

r2 score for test: 0.24203771038107802
r2 score for train 0.579092083431054


# Hyper parameter tuning using gridsearchCV

In [33]:
from sklearn.model_selection import GridSearchCV

param_grid ={
    "C" : [1,2,5,10,50,100],
    "kernel": ["rbf", "linear"],
    "epsilon" : [0.01, 0.1,0.2,0.3,0.5]
}

In [36]:
svr=SVR()

grid_search= GridSearchCV(
    svr,
    param_grid,
    scoring="r2",
    cv=5
)
grid_search.fit(X_train, y_train_scaled)

In [38]:
print("Best pras :", grid_search.best_params_)

Best pras : {'C': 10, 'epsilon': 0.1, 'kernel': 'linear'}


In [42]:
best_model = SVR(kernel="linear",C=10, epsilon=0.1)

best_model.fit(X_train, y_train_scaled)

y_test_pred_scaled = best_model.predict(X_test)
y_train_pred_sclaed = best_model.predict(X_train)
print("r2 score for test:",r2_score(y_test_scaled, y_test_pred_scaled))
print("r2 score for train", r2_score(y_train_scaled, y_train_pred_sclaed))

r2 score for test: 0.47444183250401084
r2 score for train 0.5151066486918875
