A1

In [2]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer


data = {
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Customer Type': ['Regular', 'VIP', 'Regular', 'VIP'],
    'Type of Travel': ['Business', 'Business', 'Personal', 'Business'],
    'Class': ['Economy', 'Business', 'Economy', 'Business'],
    'satisfaction': ['Satisfied', 'Dissatisfied', 'Satisfied', 'Dissatisfied'],
    'Feature1': [10, 20, 15, 25],
    'Feature2': [30, 40, 35, 45]
}

df = pd.DataFrame(data)


label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Customer Type'] = label_encoder.fit_transform(df['Customer Type'])
df['Type of Travel'] = label_encoder.fit_transform(df['Type of Travel'])
df['Class'] = label_encoder.fit_transform(df['Class'])
df['satisfaction'] = label_encoder.fit_transform(df['satisfaction'])

X = df.drop('satisfaction', axis=1)
y = df['satisfaction']

class_1 = 0
class_2 = 1

X_binary = X[(y == class_1) | (y == class_2)]
y_binary = y[(y == class_1) | (y == class_2)]

X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)

clf = svm.SVC()
clf.fit(X_train_imputed, y_train)

support_vectors = clf.support_vectors_

print("Support Vectors:")
print(support_vectors)


Support Vectors:
[[ 0.  1.  0.  0. 25. 45.]
 [ 1.  0.  1.  1. 15. 35.]]


A2

In [9]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

columns_to_drop = ['Unnamed: 0', 'id']
df = df.drop(columns_to_drop, axis=1, errors='ignore')

label_encoder = LabelEncoder()
for column in ['Gender', 'Customer Type', 'Type of Travel', 'Class', 'satisfaction']:
    df[column] = label_encoder.fit_transform(df[column])

X = df.drop('satisfaction', axis=1)
y = df['satisfaction']

class_1 = 0
class_2 = 1

X_binary = X[(y == class_1) | (y == class_2)]
y_binary = y[(y == class_1) | (y == class_2)]

X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

clf = svm.SVC()

clf.fit(X_train_imputed, y_train)

accuracy = clf.score(X_test_imputed, y_test)
print(f"Accuracy on the test set: {accuracy:.2%}")

test_vector = X_test_imputed[0]
predicted_class = clf.predict([test_vector])

print(f"Predicted class for the test vector: {predicted_class}")

Accuracy on the test set: 66.88%
Predicted class for the test vector: [0]


A3

In [3]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

df = pd.read_csv('test(2).csv')

columns_to_drop = ['Unnamed: 0', 'id']
df = df.drop(columns_to_drop, axis=1, errors='ignore')

label_encoder = LabelEncoder()
for column in ['Gender', 'Customer Type', 'Type of Travel', 'Class', 'satisfaction']:
    df[column] = label_encoder.fit_transform(df[column])

X = df.drop('satisfaction', axis=1)
y = df['satisfaction']

class_1 = 0
class_2 = 1

X_binary = X[(y == class_1) | (y == class_2)]
y_binary = y[(y == class_1) | (y == class_2)]

X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

clf = svm.SVC()

clf.fit(X_train_imputed, y_train)

predicted_labels = clf.predict(X_test_imputed)

accuracy = sum(predicted_labels == y_test) / len(y_test)
print(f"Accuracy on the test set: {accuracy:.2%}")

result_df = pd.DataFrame({'Actual Label': y_test, 'Predicted Label': predicted_labels})
print(result_df)

Accuracy on the test set: 66.57%
       Actual Label  Predicted Label
13047             0                0
14975             1                1
12642             0                0
23011             1                1
13308             0                0
...             ...              ...
11106             1                1
10256             1                0
20216             1                1
7029              1                1
17124             1                0

[5196 rows x 2 columns]


A4

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score

df = pd.read_csv('test (2).csv')

columns_to_drop = ['Unnamed: 0', 'id']
df = df.drop(columns_to_drop, axis=1, errors='ignore')

label_encoder = LabelEncoder()
for column in ['Gender', 'Customer Type', 'Type of Travel', 'Class', 'satisfaction']:
    df[column] = label_encoder.fit_transform(df[column])

X = df.drop('satisfaction', axis=1)
y = df['satisfaction']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

kernel_functions = ['linear', 'poly', 'rbf', 'sigmoid']

for kernel in kernel_functions:
    if kernel == 'linear':

        clf = LinearSVC()
    else:
        clf = SVC(kernel=kernel)

    clf.fit(X_train_imputed, y_train)

    predicted_labels = clf.predict(X_test_imputed)

    accuracy = accuracy_score(y_test, predicted_labels)
    print(f"Accuracy with {kernel} kernel: {accuracy:.2%}")



Accuracy with linear kernel: 61.45%
Accuracy with poly kernel: 66.94%
Accuracy with rbf kernel: 66.57%
Accuracy with sigmoid kernel: 41.78%
