# **Import Libraries**

In [31]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from prettytable import PrettyTable

# **Load Data**

In [6]:
Data = pd.read_csv('BankNote_Authentication.csv')
print(Data)

    variance  skewness  curtosis   entropy  class
0    3.62160  8.666100  -2.80730 -0.446990      0
1    4.54590  8.167400  -2.45860 -1.462100      0
2    3.86600 -2.638300   1.92420  0.106450      0
3    3.45660  9.522800  -4.01120 -3.594400      0
4    0.32924 -4.455200   4.57180 -0.988800      0
..       ...       ...       ...       ...    ...
95  -4.29320  3.341900   0.77258 -0.997850      1
96  -3.02650 -0.062088   0.68604 -0.055186      1
97  -1.70150 -0.010356  -0.99337 -0.531040      1
98  -0.64326  2.474800  -2.94520 -1.027600      1
99  -0.86339  1.934800  -2.37290 -1.089700      1

[100 rows x 5 columns]


# **Understand the Data**

In [22]:
print("Columns:", Data.columns)
print("\nNo of instances in data:", Data['variance'].count())
print("\nShape:", Data.shape)
print("\nData Types:", Data.dtypes)
print()
print("\nMissing values", Data.info())

Columns: Index(['variance', 'skewness', 'curtosis', 'entropy', 'class'], dtype='object')

No of instances in data: 100

Shape: (100, 5)

Data Types: variance    float64
skewness    float64
curtosis    float64
entropy     float64
class         int64
dtype: object

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   variance  100 non-null    float64
 1   skewness  100 non-null    float64
 2   curtosis  100 non-null    float64
 3   entropy   100 non-null    float64
 4   class     100 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 4.0 KB

Missing values None


# **Preprocess the Data**


Data is already Preprocessed so No Preprocessing needs to be Performed.


# **Feature Extraction**

Feature extraction is already done so no further feature extraction is needed.

In [24]:
feature_cols = ['variance', 'skewness', 'curtosis', 'entropy']
target_col = ['class']

X = Data[feature_cols]
y = Data[target_col]

print("Features:",feature_cols)
print("Target:",target_col)
print("\nClass distribution:\n", y.value_counts())

Features: ['variance', 'skewness', 'curtosis', 'entropy']
Target: ['class']

Class distribution:
 class
0        50
1        50
Name: count, dtype: int64


# **Label Encoding**

No label encoding is needed because dataset has numeric values not categorical

# **Training Phase(Train Test Split(80-20))**

In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=0,
    shuffle=False)

**Save trainig and test data into csv files**

In [26]:
X_train.to_csv('X_train.csv', index=False, header=True)
X_test.to_csv('X_test.csv', index=False, header=True)

y_train.to_csv('y_train.csv', index=False, header=True)
y_test.to_csv('y_test.csv', index=False, header=True)

In [30]:
print("Training Data")
df = pd.read_csv('X_train.csv')
print(df)
print("\n")
df = pd.read_csv('y_train.csv')
print(df)

Training Data
    variance  skewness  curtosis  entropy
0    3.62160    8.6661  -2.80730 -0.44699
1    4.54590    8.1674  -2.45860 -1.46210
2    3.86600   -2.6383   1.92420  0.10645
3    3.45660    9.5228  -4.01120 -3.59440
4    0.32924   -4.4552   4.57180 -0.98880
..       ...       ...       ...      ...
75  -2.66850  -10.4519   9.11390 -1.73230
76  -0.47465   -4.3496   1.99010  0.75170
77   1.05520    1.1857  -2.64110  0.11033
78   1.16440    3.8095  -4.94080 -4.09090
79  -4.47790    7.3708  -0.31218 -6.77540

[80 rows x 4 columns]


    class
0       0
1       0
2       0
3       0
4       0
..    ...
75      1
76      1
77      1
78      1
79      1

[80 rows x 1 columns]


# **Support Vector Classification**

**Scaling**

In [32]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [33]:
pickle.dump(scaler, open('scaler_banknote.pkl', 'wb'))
print("Scaler saved as scaler_banknote.pkl")

Scaler saved as scaler_banknote.pkl


In [34]:
svc_model = svm.SVC(kernel="rbf", gamma='auto', random_state=0)
svc_model.fit(X_train_scaled, y_train.values.ravel())
print(svc_model)

SVC(gamma='auto', random_state=0)


**Save the trained model**

In [35]:
pickle.dump(svc_model, open('svc_trained_banknote.pkl', 'wb'))
print("Model saved as svc_trained_banknote.pkl")

Model saved as svc_trained_banknote.pkl


# **Evaluation of the model**

In [40]:
y_pred = svc_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.95

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.95      0.97        20

    accuracy                           0.95        20
   macro avg       0.50      0.47      0.49        20
weighted avg       1.00      0.95      0.97        20


Confusion Matrix:
 [[ 0  0]
 [ 1 19]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Pretty summary**

In [42]:
summary = PrettyTable()
summary.field_names = ["Metric", "Value"]
summary.add_row(["Test Accuracy", accuracy])
summary.add_row(["Test Samples", len(y_test)])
print("\nSummary:\n", summary)


Summary:
 +---------------+-------+
|     Metric    | Value |
+---------------+-------+
| Test Accuracy |  0.95 |
|  Test Samples |   20  |
+---------------+-------+


# **Application Phase**

**Taking input and storing it into dataframe**

In [44]:
variance_input = float(input("\nEnter Variance value: "))
skewness_input = float(input("Enter Skewness value: "))
curtosis_input = float(input("Enter Curtosis value: "))
entropy_input = float(input("Enter Entropy value: "))


Enter Variance value: 3.6216
Enter Skewness value: 8.6661
Enter Curtosis value: -2.8073
Enter Entropy value: -0.44699


In [46]:
user_input = pd.DataFrame({
    'variance': [variance_input],
    'skewness': [skewness_input],
    'curtosis': [curtosis_input],
    'entropy': [entropy_input]
})

print("User input Feature vectors:\n")
print( user_input)

User input Feature vectors:

   variance  skewness  curtosis  entropy
0    3.6216    8.6661   -2.8073 -0.44699


**Load the Saved Scaler and Model**

In [47]:
scaler = pickle.load(open('scaler_banknote.pkl', 'rb'))
loaded_model = pickle.load(open('svc_trained_banknote.pkl', 'rb'))

**Scale the Input and Predict**

In [51]:
user_scaled = scaler.transform(user_input)

prediction = loaded_model.predict(user_scaled)

label_map = {0: "AUTHENTIC", 1: "FAKE"}
final_prediction = label_map[int(prediction[0])]


In [52]:
pretty_table = PrettyTable()
pretty_table.add_column("       ** Prediction **       ", [final_prediction])

print(pretty_table)

+--------------------------------+
|        ** Prediction **        |
+--------------------------------+
|           AUTHENTIC            |
+--------------------------------+


# **Conclusion**



- The analysis shows that banknote authentication can be effectively determined using four statistical features: variance, skewness, curtosis, and entropy. The dataset used was already clean and preprocessed, requiring no additional feature engineering or label encoding.

- Using a Support Vector Classifier (SVM), the model achieved 95% accuracy on the test set, successfully distinguishing between authentic and fake notes.

- Despite the relatively small dataset size, the study highlights how machine learning techniques can provide reliable and automated solutions for fraud detection.This project demonstrates that AI-based systems can assist banks and financial institutions in real-world scenarios by enhancing the speed and reliability of banknote authentication.