In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("/content/banknotes.csv")

In [None]:
data.head()

Unnamed: 0,conterfeit,Length,Left,Right,Bottom,Top,Diagonal
0,0,214.8,131.0,131.1,9.0,9.7,141.0
1,0,214.6,129.7,129.7,8.1,9.5,141.7
2,0,214.8,129.7,129.7,8.7,9.6,142.2
3,0,214.8,129.7,129.6,7.5,10.4,142.0
4,0,215.0,129.6,129.7,10.4,7.7,141.8


In [None]:

import sklearn.utils
data_df = sklearn.utils.shuffle(data)
data_df = data_df.reset_index(drop=True)
data_df.shape

(200, 7)

In [None]:
X = data_df.drop(columns='conterfeit')
y = data_df['conterfeit']

### Removing Outliers from the columns

In [None]:
# Define the columns to filter
columns = ['Length', 'Left', 'Right','Bottom','Top','Diagonal']

# Calculate the IQR for each column
Q1 = data[columns].quantile(0.25)
Q3 = data[columns].quantile(0.75)
IQR = Q3 - Q1

# Filter out the outliers
data_out = data[~((data[columns] < (Q1 - 1.5 * IQR)) |(data[columns] > (Q3 + 1.5 * IQR))).any(axis=1)]

# Print the original and filtered data shape
print("Original data shape:", data.shape)
print("Filtered data shape:", data_out.shape)


Original data shape: (200, 7)
Filtered data shape: (192, 7)


In [None]:
data_out.isnull().sum()

conterfeit    0
Length        0
Left          0
Right         0
Bottom        0
Top           0
Diagonal      0
dtype: int64

In [None]:
X = data_out.drop(columns='conterfeit')
y = data_out['conterfeit']

data_out

### Data Preprocessing

In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
y_train.value_counts()

1    70
0    64
Name: conterfeit, dtype: int64

In [None]:
import imblearn
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=1)
x_train_balanced, y_balanced = smote.fit_resample(x_train, y_train)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalised_train_df = scaler.fit_transform(x_train_balanced)
normalised_train_df = pd.DataFrame(normalised_train_df, columns=x_train_balanced.columns)

In [None]:
x_test = x_test.reset_index(drop=True)
normalised_test_df = scaler.transform(x_test)
normalised_test_df = pd.DataFrame(normalised_test_df, columns=x_test.columns)

### Model Training and Evaluation

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, confusion_matrix


# Build models
models = {'LogisticRegression': LogisticRegression(),
          'DecisionTreeClassifier': DecisionTreeClassifier(),
         'Svm': SVC(kernel='linear', C=1.0, random_state=42),
          'RandomForestClassifier':  RandomForestClassifier(n_estimators=100, random_state=42),
         'GradientBoostingClassifier': GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=42),
         'KNeighborClassifier': KNeighborsClassifier(n_neighbors=5),
          'Gaussian Naive_Bayes': GaussianNB()}

In [None]:
def train(model, normalised_train_df, y_balanced):
    model.fit(normalised_train_df, y_balanced)
    return model

In [None]:
#Cross Validation
from sklearn.model_selection import cross_val_score
log_reg = LogisticRegression()
scores = cross_val_score(log_reg, normalised_train_df, y_balanced, cv=5, scoring='f1_macro')
scores

array([1.       , 0.9642401, 1.       , 1.       , 1.       ])

In [None]:
def score(model,normalised_test_df, y_test):
    predictions = model.predict(normalised_test_df)
    y_pred = predictions
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    
    
    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print(' Recall:',  recall)
    print()
    return [accuracy, precision, recall]

In [None]:
scoring1 = pd.DataFrame()
scoring1['Metric'] = ['Accuracy', 'Precision', 'Recall']

In [None]:
for name, func in models.items():
    print(f'Performance of {name} on Test:')
    print('=='*24)
    print ('Test set:')
    print("**"*8)
    
    model = train(func, normalised_train_df, y_balanced)
    results = score(model, normalised_test_df, y_test)
    scoring1[name] = results

Performance of LogisticRegression on Test:
Test set:
****************
Accuracy: 1.0
Precision: 1.0
 Recall: 1.0

Performance of DecisionTreeClassifier on Test:
Test set:
****************
Accuracy: 1.0
Precision: 1.0
 Recall: 1.0

Performance of Svm on Test:
Test set:
****************
Accuracy: 1.0
Precision: 1.0
 Recall: 1.0

Performance of RandomForestClassifier on Test:
Test set:
****************
Accuracy: 1.0
Precision: 1.0
 Recall: 1.0

Performance of GradientBoostingClassifier on Test:
Test set:
****************
Accuracy: 0.9827586206896551
Precision: 1.0
 Recall: 0.9629629629629629

Performance of KNeighborClassifier on Test:
Test set:
****************
Accuracy: 1.0
Precision: 1.0
 Recall: 1.0

Performance of Gaussian Naive_Bayes on Test:
Test set:
****************
Accuracy: 1.0
Precision: 1.0
 Recall: 1.0



In [None]:
scoring1

Unnamed: 0,Metric,LogisticRegression,DecisionTreeClassifier,Svm,RandomForestClassifier,GradientBoostingClassifier,KNeighborClassifier,Gaussian Naive_Bayes
0,Accuracy,1.0,1.0,1.0,1.0,0.982759,1.0,1.0
1,Precision,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Recall,1.0,1.0,1.0,1.0,0.962963,1.0,1.0


**After Evaluating the Model, RandomForest Classifier was our prefered Model**

**RandomForestClassifier**
Evaluation Metric       Score
Accuracy                1.0
Precision               1.0
Recall                  1.0