In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import joblib as jb

#### Load train dataset

In [3]:
train_dataset = pd.read_csv("C:\\Users\\student\\Desktop\\week5\\mnist_train.csv")

#### Preprocessing

In [4]:
print(f"Number of Null values: {train_dataset.isnull().sum().sum()}")
print(f"No of Duplicate records: {train_dataset.duplicated().sum()}")

0

#### Separating Features and label

In [6]:
train_data = train_dataset.drop("label", axis =1)
train_label = train_dataset["label"]
train_data.columns = [f"p{i+1}" for i in range(784)]

#### Scalling

In [8]:
scaler = MinMaxScaler()
train_data_scaled = scaler.fit_transform(train_data)

#### Training model

In [10]:
model = SVC()
model.fit(train_data_scaled, train_label)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


#### Loading the test dataset

In [11]:
test_dataset = pd.read_csv("C:\\Users\\student\\Desktop\\week5\\mnist_test.csv")

#### Prepairing the test data same as train data

In [12]:
print(f"Number of Null values: {test_dataset.isnull().sum().sum()}")
print(f"No of Duplicate records: {test_dataset.duplicated().sum()}")
test_data = test_dataset.drop("label", axis =1)
test_label = test_dataset["label"]
test_data.columns = [f"p{i+1}" for i in range(784)]
test_data_scaled = scaler.transform(test_data)

0

#### Model prediction and evaluation

In [1]:
pred_label = model.predict(test_data_scaled)

acc = accuracy_score(test_label, pred_label)
rec = recall_score(test_label, pred_label, average="weighted")
pre = precision_score(test_label, pred_label, average="weighted")
f1 = f1_score(test_label, pred_label, average="weighted")
print(f"Accuracy: {acc*100}%")
print(f"Recall: {rec*100}%")
print(f"Precision: {pre*100:.2f}%")
print(f"F1_score: {f1*100: .2f}%")
cm = confusion_matrix(test_label, pred_label)
sns.heatmap(cm, annot=True, fmt= 'd',cmap="Blues", xticklabels=[i for i in range(10)], yticklabels=[i for i in range(10)])
plt.xlabel("Predicted digits")
plt.ylabel("Actual digits")
plt.show()

NameError: name 'model' is not defined

#### Saving the model and scaler

In [18]:
with open("model.pkl", 'wb') as file:
    jb.dump(model,file)
with open("scaler.pkl", 'wb') as sc:
    jb.dump(scaler, sc)