<a href="https://colab.research.google.com/github/Meenusj/ML/blob/main/validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import f1_score
import numpy as np

# Load the "Breast Cancer Wisconsin (Diagnostic)" dataset from scikit-learn
breast_cancer_data = load_breast_cancer()
X, y = breast_cancer_data.data, breast_cancer_data.target

# 10-fold validation with SVM
svm_classifier = SVC()
f1_scores_10fold = cross_val_score(svm_classifier, X, y, cv=10, scoring='f1_macro')

# Train-test split ratios
train_test_splits = [(0.8, 0.2), (0.7, 0.3), (0.6, 0.4)]
f1_scores_train_test = []

# Perform SVM classification with different train-test splits
for train_size, test_size in train_test_splits:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    svm_classifier.fit(X_train, y_train)
    y_pred = svm_classifier.predict(X_test)
    f1 = f1_score(y_test, y_pred, average='macro')
    f1_scores_train_test.append(f1)

# Prepare a comparison table of F1 scores for a and b
comparison_data = {
    'Method': ['10-Fold Validation', '80:20 Split', '70:30 Split', '60:40 Split'],
    'F1 Score': [np.mean(f1_scores_10fold)] + f1_scores_train_test
}

comparison_table = pd.DataFrame(comparison_data)
print(comparison_table)

               Method  F1 Score
0  10-Fold Validation  0.904060
1         80:20 Split  0.942230
2         70:30 Split  0.927945
3         60:40 Split  0.935203
