In [1]:
import numpy as np
import pandas as pd

# 2) sklearn imports used in this example
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2


In [3]:
df['flower'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
df['flower'] = df['flower'].apply(lambda x: iris.target_names[x])
df.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.3, random_state=42, stratify=iris.target
)

In [6]:
model = svm.SVC(kernel='rbf', C=30, gamma='auto')
model.fit(X_train, y_train)
test_score = model.score(X_test, y_test)
print("Approach 1 - test score:", test_score)

Approach 1 - test score: 0.9111111111111111


In [7]:
from sklearn.model_selection import cross_val_score  # repeated import is harmless; kept for readability
cv_scores_linear = cross_val_score(svm.SVC(kernel='linear', C=10, gamma='auto'), iris.data, iris.target, cv=5)
cv_scores_rbf = cross_val_score(svm.SVC(kernel='rbf', C=10, gamma='auto'), iris.data, iris.target, cv=5)
print("linear cv scores:", cv_scores_linear)
print("rbf cv scores:", cv_scores_rbf)


linear cv scores: [1.         1.         0.9        0.96666667 1.        ]
rbf cv scores: [0.96666667 1.         0.96666667 0.96666667 1.        ]


In [8]:
kernels = ['rbf', 'linear']
C_values = [1, 10, 20]
avg_scores = {}
for kval in kernels:
    for cval in C_values:
        scores = cross_val_score(svm.SVC(kernel=kval, C=cval, gamma='auto'), iris.data, iris.target, cv=5)
        avg_scores[f"{kval}_{cval}"] = np.mean(scores)
print("Average CV scores (kernel_C):", avg_scores)

Average CV scores (kernel_C): {'rbf_1': np.float64(0.9800000000000001), 'rbf_10': np.float64(0.9800000000000001), 'rbf_20': np.float64(0.9666666666666668), 'linear_1': np.float64(0.9800000000000001), 'linear_10': np.float64(0.9733333333333334), 'linear_20': np.float64(0.9666666666666666)}


In [9]:
param_grid = {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']}
clf = GridSearchCV(svm.SVC(gamma='auto'), param_grid, cv=5, return_train_score=False)
clf.fit(iris.data, iris.target)
cv_results_df = pd.DataFrame(clf.cv_results_)
print(cv_results_df[['param_C', 'param_kernel', 'mean_test_score']])
print("GridSearch best_params:", clf.best_params_)
print("GridSearch best_score:", clf.best_score_)


   param_C param_kernel  mean_test_score
0        1          rbf         0.980000
1        1       linear         0.980000
2       10          rbf         0.980000
3       10       linear         0.973333
4       20          rbf         0.966667
5       20       linear         0.966667
GridSearch best_params: {'C': 1, 'kernel': 'rbf'}
GridSearch best_score: 0.9800000000000001


In [10]:
rs = RandomizedSearchCV(
    svm.SVC(gamma='auto'),
    {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']},
    cv=5,
    n_iter=2,
    random_state=42,
    return_train_score=False
)
rs.fit(iris.data, iris.target)
rs_results_df = pd.DataFrame(rs.cv_results_)
print(rs_results_df[['param_C', 'param_kernel', 'mean_test_score']])

   param_C param_kernel  mean_test_score
0        1          rbf             0.98
1        1       linear             0.98


In [11]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params': {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']}
    },
    'random_forest': {
        'model': RandomForestClassifier(random_state=42),
        'params': {'n_estimators': [1, 5, 10]}
    },
    'logistic_regression': {
        'model': LogisticRegression(solver='liblinear', multi_class='auto', random_state=42),
        'params': {'C': [1, 5, 10]}
    }
}

In [12]:
scores = []
for model_name, mp in model_params.items():
    g = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    g.fit(iris.data, iris.target)
    scores.append({
        'model': model_name,
        'best_score': g.best_score_,
        'best_params': g.best_params_
    })

scores_df = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
print(scores_df)

                 model  best_score                best_params
0                  svm    0.980000  {'C': 1, 'kernel': 'rbf'}
1        random_forest    0.966667       {'n_estimators': 10}
2  logistic_regression    0.966667                   {'C': 5}


