In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

bankdata = pd.read_csv("bill_authentication.csv")

In [14]:
# Exploratory data analysis (minimal)
print(f"Shape: {bankdata.shape}") # Refers to the dimensions of the data
print(bankdata.head())

Shape: (1372, 5)
   Variance  Skewness  Curtosis  Entropy  Class
0   3.62160    8.6661   -2.8073 -0.44699      0
1   4.54590    8.1674   -2.4586 -1.46210      0
2   3.86600   -2.6383    1.9242  0.10645      0
3   3.45660    9.5228   -4.0112 -3.59440      0
4   0.32924   -4.4552    4.5718 -0.98880      0


In [15]:
print("\nMEANS: \n")
print(bankdata.mean())

print("\nSTANDARD DEVIATIONS: \n")
print(bankdata.std())

print("\nMEDIANS: \n")
print(bankdata.median())

print("\nDESCRIPTIVE STATISTICS: \n")
print(bankdata.describe())


MEANS: 

Variance    0.433735
Skewness    1.922353
Curtosis    1.397627
Entropy    -1.191657
Class       0.444606
dtype: float64

STANDARD DEVIATIONS: 

Variance    2.842763
Skewness    5.869047
Curtosis    4.310030
Entropy     2.101013
Class       0.497103
dtype: float64

MEDIANS: 

Variance    0.49618
Skewness    2.31965
Curtosis    0.61663
Entropy    -0.58665
Class       0.00000
dtype: float64

DESCRIPTIVE STATISTICS: 

          Variance     Skewness     Curtosis      Entropy        Class
count  1372.000000  1372.000000  1372.000000  1372.000000  1372.000000
mean      0.433735     1.922353     1.397627    -1.191657     0.444606
std       2.842763     5.869047     4.310030     2.101013     0.497103
min      -7.042100   -13.773100    -5.286100    -8.548200     0.000000
25%      -1.773000    -1.708200    -1.574975    -2.413450     0.000000
50%       0.496180     2.319650     0.616630    -0.586650     0.000000
75%       2.821475     6.814625     3.179250     0.394810     1.000000
max 

In [27]:
# Data Preprocessing
# 1. Divide the data into attributes and labels
# 2. Divide the data into training and testing sets

# Attributes and Labels
# 1. Decide X and Y variables
x = bankdata.drop("Class", axis=1) # All columns except class
y = bankdata['Class'] # Only class column

# Split training and testing sets
from sklearn.model_selection import train_test_split
RAND_SEED = 1234 # Set the seed for generating training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=RAND_SEED)

# Train the model
from sklearn.svm import SVC
svclassifier = SVC(kernel="linear") # Support Vector Classifier (SVC)
svclassifier.fit(x_train, y_train) # Train model using SVC

# Make predictions
y_pred = svclassifier.predict(x_test) # Create predictions on y-axis
print(y_pred)

# Evaluate the model
print("\nCONFUSION MATRIX") # Create confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
cm = confusion_matrix(y_test, y_pred) # Store results in a confusion matrix
print(cm)
print("\nBank Note Prediction Results: ")
print(classification_report(y_test,y_pred))

print("\nThere are", cm[1,0], "classification errors\n")

[0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 1 0 1 1 1 0 0 0 1 1 1 1 0 1 1 0 1 1 1 1 0 0
 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1
 1 1 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 1 1 1 1 0 0 0 1 0
 0 1 0 1 0 0 0 1 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 1 0
 0 1 0 0 1 1 1 0 1 0 1 1 1 1 0 1 0 0 1 0 0 0 1 0 1 1 0 1 1 1 0 0 0 1 0 0 0
 1 1 0 1 0 0 1 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 1 1 0 1 1 1 0 1 1
 0 0 1 0 0 0 0 1 1 0 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 0 0
 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 0]

CONFUSION MATRIX
[[155   5]
 [  1 114]]

Bank Note Prediction Results: 
              precision    recall  f1-score   support

           0       0.99      0.97      0.98       160
           1       0.96      0.99      0.97       115

    accuracy                           0.98       275
   macro avg       0.98      0.98      0.98       275
weighted avg       0.98      0.98      0.98       275


There are 1 classification errors

