In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC

# Load dataset
data = pd.read_csv('Ex3.csv')

# Handling missing values (if any)
data.fillna(method='ffill', inplace=True)

# Encoding categorical variables
le = LabelEncoder()
data['Gender'] = le.fit_transform(data['Gender'])

# Splitting the dataset into the Training set and Test set
X = data.drop('Purchase', axis=1)
y = data['Purchase']

# Feature scaling
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

# Define SVM classifiers
classifiers = {
    'Linear SVM': SVC(kernel='linear', random_state=0),
    'Polynomial SVM': SVC(kernel='poly', degree=3, random_state=0),
    'RBF SVM': SVC(kernel='rbf', random_state=0)
}

# Evaluate each classifier using k-fold cross-validation
for clf_name, clf in classifiers.items():
    scores = cross_val_score(clf, X_scaled, y, cv=5, scoring='accuracy')
    print(f"{clf_name} Cross-Validation Accuracy: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})")

# Train a selected SVM model (e.g., Linear SVM) on the entire dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=0)
classifier = SVC(kernel='linear', random_state=0)
classifier.fit(X_train, y_train)

# Make Predictions
y_pred = classifier.predict(X_test)

# Performance Evaluation
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)


print(f"Performance Score: {f1:.2f}")


Linear SVM Cross-Validation Accuracy: 0.80 (+/- 0.33)
Polynomial SVM Cross-Validation Accuracy: 0.80 (+/- 0.53)
RBF SVM Cross-Validation Accuracy: 0.80 (+/- 0.53)
Performance Score: 1.00


In [1]:
import pandas as pd
import numpy as np

# Here we are using inbuilt dataset of scikit learn
from sklearn.datasets import load_breast_cancer

# instantiating
cancer = load_breast_cancer(as_frame=True)
# creating dataframe
df = cancer.frame

# checking shape
print('Original Dataframe shape :',df.shape)

# Input features
X = df[cancer['feature_names']]
print('Inputs Dataframe shape   :', X.shape)


Original Dataframe shape : (569, 31)
Inputs Dataframe shape   : (569, 30)


In [None]:
%%writefile Ex4.cAge,Income,Gender,TargetVariable
25,50000,Male,0
45,64000,Female,1
35,58000,Female,0
50,72000,Male,1
23,48000,Male,0
31,52000,Female,0
46,60000,Female,1
29,55000,Male,0
52,75000,Male,1
48,68000,Female,1
36,59000,Male,0
28,53000,Female,0
27,52000,Female,0
44,61000,Male,1
33,57000,Female,1
