In [1]:
#load dataset
from sklearn.datasets import make_moons
x,y = make_moons(n_samples=10000, noise=0.5, random_state=0)

In [2]:
#train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state=42)

### SVM

In [3]:
from sklearn.svm import SVC

svc_model=SVC(kernel='rbf',C=3)
svc_model.fit(X_train, y_train)

### Evaluation

In [4]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

svc_pred = svc_model.predict(X_test)
print("Accuracy score :",accuracy_score(y_test, svc_pred))
print("Precision : ",precision_score(y_test,svc_pred))
print("Recall : ",recall_score(y_test,svc_pred))
print("F1 score : ",f1_score(y_test,svc_pred))
print("Confusion matrix :")
print(confusion_matrix(y_test,svc_pred))

Accuracy score : 0.8335
Precision :  0.8198636806231743
Recall :  0.8505050505050505
F1 score :  0.8349033217649975
Confusion matrix :
[[825 185]
 [148 842]]


In [5]:
#cross validation is another evaluation measure
from sklearn.model_selection import cross_val_score
import numpy as np

scores = cross_val_score(svc_model, X_train, y_train, cv=5, scoring='f1_macro')

#F1 macro computes the F1 score for each class separately and then averages them, giving equal weight to each class.
#This is useful when you have class imbalance.

# Calculate mean F1 score 
mean_f1 = np.mean(scores)

print("5 fold cross validation")
print(f'Mean F1 Score: {mean_f1:.2f}')

5 fold cross validation
Mean F1 Score: 0.82


### Parameter tuning

In [6]:
from sklearn.model_selection import GridSearchCV


# Define the hyperparameter grid to search
param_grid = {
    'C': [0.1, 1, 3, 10],          # Regularization parameter
    'gamma': ['scale', 'auto'],  # Kernel coefficient for 'rbf'
}

# Create a GridSearchCV object with cross-validation
grid_search = GridSearchCV(estimator=svc_model, param_grid=param_grid, scoring='f1_macro', cv=5)

# Fit the GridSearchCV object to your training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Get the best F1 score achieved during cross-validation
best_f1_score = grid_search.best_score_
print("Best F1 Score:", best_f1_score)

# Get the best trained model
best_model = grid_search.best_estimator_
print(best_model)

Best Hyperparameters: {'C': 10, 'gamma': 'scale'}
Best F1 Score: 0.8242345834595174
SVC(C=10)


SVC: https://stackabuse.com/implementing-svm-and-kernel-svm-with-pythons-scikit-learn/ https://www.tutorialspoint.com/machine_learning_with_python/machine_learning_with_python_implementing_svm_in_python.htm

### KNN

In [3]:
from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier()
knn.fit(X_train,y_train)

### Evaluation

In [5]:
from sklearn.metrics import accuracy_score, classification_report

knn_pred=knn.predict(X_test)

print("Accuracy score :",accuracy_score(y_test, knn_pred))
print("classification_report")
print(classification_report(y_test,knn_pred))

Accuracy score : 0.7925
classification_report
              precision    recall  f1-score   support

           0       0.80      0.78      0.79      1010
           1       0.78      0.80      0.79       990

    accuracy                           0.79      2000
   macro avg       0.79      0.79      0.79      2000
weighted avg       0.79      0.79      0.79      2000



### Parameter tuning

In [7]:
from sklearn.model_selection import GridSearchCV
# Define the hyperparameter grid to search
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # Test different values of n_neighbors
}

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, scoring='accuracy', cv=5)

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Number of Neighbors:", best_params['n_neighbors'])

# Get the best trained model
best_model = grid_search.best_estimator_
print(best_model)

# Make predictions on the test data using the best model
y_pred = best_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy with Best Model: {accuracy:.2f}')


Best Number of Neighbors: 11
KNeighborsClassifier(n_neighbors=11)
Accuracy with Best Model: 0.82
