In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
dataset_path = "C:\\Users\\msi\\Downloads\\BankNotesDataset.csv"
df = pd.read_csv(dataset_path)

# Take a subset of the data 
subset_df = df.sample(n=12700, random_state=42)


# Splitting the 'Denomination' column at the first underscore only
subset_df['Denomination'], subset_df['Orientation'] = zip(*subset_df['Denomination'].apply(lambda x: x.split('_', 1) if '_' in x else (x, 'Unknown')))

# Separate features and labels
X = subset_df.iloc[:, :-3]  # Features
y_currency = subset_df['Currency']  # Currency labels
y_denomination = subset_df['Denomination']  # Denomination labels
y_orientation = subset_df['Orientation']  # Orientation labels

X_train_currency, X_test_currency, y_currency_train, y_currency_test = train_test_split(X, y_currency, test_size=0.2, random_state=42)
X_train_denomination, X_test_denomination, y_denomination_train, y_denomination_test = train_test_split(X, y_denomination, test_size=0.2, random_state=42)
X_train_orientation, X_test_orientation, y_orientation_train, y_orientation_test = train_test_split(X, y_orientation, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_currency_scaled = scaler.fit_transform(X_train_currency)
X_test_currency_scaled = scaler.transform(X_test_currency)

X_train_denomination_scaled = scaler.fit_transform(X_train_denomination)
X_test_denomination_scaled = scaler.transform(X_test_denomination)

X_train_orientation_scaled = scaler.fit_transform(X_train_orientation)
X_test_orientation_scaled = scaler.transform(X_test_orientation)

print(subset_df.info())


<class 'pandas.core.frame.DataFrame'>
Index: 12700 entries, 5716 to 10604
Columns: 260 entries, Unnamed: 0 to Orientation
dtypes: float64(256), int64(1), object(3)
memory usage: 25.3+ MB
None


In [9]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

# Define parameter grids for SVM and MLP classifiers
svm_param_grid = {'C': [0.1, 1, 10]}
mlp_param_grid = {'hidden_layer_sizes': [(50,), (100,), (150,)], 'alpha': [0.0001, 0.001, 0.01]}

# Grid search for SVM classifiers
svm_currency_grid = GridSearchCV(SVC(kernel='linear'), param_grid=svm_param_grid, cv=3)
svm_denomination_grid = GridSearchCV(SVC(kernel='linear'), param_grid=svm_param_grid, cv=3)
svm_orientation_grid = GridSearchCV(SVC(kernel='linear'), param_grid=svm_param_grid, cv=3)

svm_currency_grid.fit(X_train_currency_scaled, y_currency_train)
svm_denomination_grid.fit(X_train_denomination_scaled, y_denomination_train)
svm_orientation_grid.fit(X_train_orientation_scaled, y_orientation_train)

# Grid search for MLP classifiers
mlp_currency_grid = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), param_grid=mlp_param_grid, cv=3)
mlp_denomination_grid = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), param_grid=mlp_param_grid, cv=3)
mlp_orientation_grid = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), param_grid=mlp_param_grid, cv=3)

mlp_currency_grid.fit(X_train_currency_scaled, y_currency_train)
mlp_denomination_grid.fit(X_train_denomination_scaled, y_denomination_train)
mlp_orientation_grid.fit(X_train_orientation_scaled, y_orientation_train)

# Best hyperparameters for SVM classifiers
print("Best hyperparameters for SVM Currency:", svm_currency_grid.best_params_)
print("Best hyperparameters for SVM Denomination:", svm_denomination_grid.best_params_)
print("Best hyperparameters for SVM Orientation:", svm_orientation_grid.best_params_)

# Best hyperparameters for MLP classifiers
print("Best hyperparameters for MLP Currency:", mlp_currency_grid.best_params_)
print("Best hyperparameters for MLP Denomination:", mlp_denomination_grid.best_params_)
print("Best hyperparameters for MLP Orientation:", mlp_orientation_grid.best_params_)

# Evaluate SVM classifiers
svm_currency_accuracy = svm_currency_grid.score(X_test_currency_scaled, y_currency_test)
svm_denomination_accuracy = svm_denomination_grid.score(X_test_denomination_scaled, y_denomination_test)
svm_orientation_accuracy = svm_orientation_grid.score(X_test_orientation_scaled, y_orientation_test)

print("SVM Accuracy for Currency:", svm_currency_accuracy)
print("SVM Accuracy for Denomination:", svm_denomination_accuracy)
print("SVM Accuracy for Orientation:", svm_orientation_accuracy)

# Evaluate MLP classifiers
mlp_currency_accuracy = mlp_currency_grid.score(X_test_currency_scaled, y_currency_test)
mlp_denomination_accuracy = mlp_denomination_grid.score(X_test_denomination_scaled, y_denomination_test)
mlp_orientation_accuracy = mlp_orientation_grid.score(X_test_orientation_scaled, y_orientation_test)

print("MLP Accuracy for Currency:", mlp_currency_accuracy)
print("MLP Accuracy for Denomination:", mlp_denomination_accuracy)
print("MLP Accuracy for Orientation:", mlp_orientation_accuracy)


Best hyperparameters for SVM Currency: {'C': 0.1}
Best hyperparameters for SVM Denomination: {'C': 0.1}
Best hyperparameters for SVM Orientation: {'C': 0.1}
Best hyperparameters for MLP Currency: {'alpha': 0.01, 'hidden_layer_sizes': (150,)}
Best hyperparameters for MLP Denomination: {'alpha': 0.01, 'hidden_layer_sizes': (150,)}
Best hyperparameters for MLP Orientation: {'alpha': 0.001, 'hidden_layer_sizes': (150,)}
SVM Accuracy for Currency: 0.9775590551181103
SVM Accuracy for Denomination: 0.8842519685039371
SVM Accuracy for Orientation: 0.9062992125984252
MLP Accuracy for Currency: 0.9759842519685039
MLP Accuracy for Denomination: 0.934251968503937
MLP Accuracy for Orientation: 0.9413385826771653


In [4]:
from sklearn.decomposition import PCA

# Initialize PCA with desired number of components
pca = PCA(n_components=200) 

# Fit PCA on the scaled training data for currency
pca.fit(X_train_currency_scaled)

# Transform both training and testing data using PCA
X_train_currency_pca = pca.transform(X_train_currency_scaled)
X_test_currency_pca = pca.transform(X_test_currency_scaled)

# Fit PCA on the scaled training data for denomination
pca.fit(X_train_denomination_scaled)

# Transform both training and testing data using PCA
X_train_denomination_pca = pca.transform(X_train_denomination_scaled)
X_test_denomination_pca = pca.transform(X_test_denomination_scaled)

# Fit PCA on the scaled training data for orientation
pca.fit(X_train_orientation_scaled)

# Transform both training and testing data using PCA
X_train_orientation_pca = pca.transform(X_train_orientation_scaled)
X_test_orientation_pca = pca.transform(X_test_orientation_scaled)


In [10]:
from sklearn.model_selection import GridSearchCV

# Define parameter grids for SVM and MLP classifiers
svm_param_grid = {'C': [0.1, 1, 10]}
mlp_param_grid = {'hidden_layer_sizes': [(50,), (100,), (150,)], 'alpha': [0.0001, 0.001, 0.01]}

# Grid search for SVM classifier on the PCA-transformed data for currency
svm_currency_grid = GridSearchCV(SVC(kernel='linear'), param_grid=svm_param_grid, cv=3)
svm_currency_grid.fit(X_train_currency_pca, y_currency_train)

# Grid search for MLP classifier on the PCA-transformed data for currency
mlp_currency_grid = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), param_grid=mlp_param_grid, cv=3)
mlp_currency_grid.fit(X_train_currency_pca, y_currency_train)

# Best hyperparameters for SVM classifier
print("Best hyperparameters for SVM Currency after PCA:", svm_currency_grid.best_params_)

# Best hyperparameters for MLP classifier
print("Best hyperparameters for MLP Currency after PCA:", mlp_currency_grid.best_params_)

# Evaluate SVM classifier
svm_currency_accuracy = svm_currency_grid.score(X_test_currency_pca, y_currency_test)
print("SVM Accuracy for Currency after PCA:", svm_currency_accuracy)

# Evaluate MLP classifier
mlp_currency_accuracy = mlp_currency_grid.score(X_test_currency_pca, y_currency_test)
print("MLP Accuracy for Currency after PCA:", mlp_currency_accuracy)


Best hyperparameters for SVM Currency after PCA: {'C': 0.1}
Best hyperparameters for MLP Currency after PCA: {'alpha': 0.001, 'hidden_layer_sizes': (150,)}
SVM Accuracy for Currency after PCA: 0.9751968503937007
MLP Accuracy for Currency after PCA: 0.9779527559055118


In [8]:
from sklearn.model_selection import GridSearchCV

# Define parameter grids for SVM and MLP classifiers
svm_param_grid = {'C': [0.1, 1, 10]}
mlp_param_grid = {'hidden_layer_sizes': [(50,), (100,), (150,)], 'alpha': [0.0001, 0.001, 0.01]}

# Grid search for SVM classifier on the PCA-transformed data for denomination
svm_denomination_grid = GridSearchCV(SVC(kernel='linear'), param_grid=svm_param_grid, cv=3)
svm_denomination_grid.fit(X_train_denomination_pca, y_denomination_train)

# Grid search for MLP classifier on the PCA-transformed data for denomination
mlp_denomination_grid = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), param_grid=mlp_param_grid, cv=3)
mlp_denomination_grid.fit(X_train_denomination_pca, y_denomination_train)

# Best hyperparameters for SVM classifier
print("Best hyperparameters for SVM Denomination after PCA:", svm_denomination_grid.best_params_)

# Best hyperparameters for MLP classifier
print("Best hyperparameters for MLP Denomination after PCA:", mlp_denomination_grid.best_params_)

# Evaluate SVM classifier
svm_denomination_accuracy = svm_denomination_grid.score(X_test_denomination_pca, y_denomination_test)
print("SVM Accuracy for Denomination after PCA:", svm_denomination_accuracy)

# Evaluate MLP classifier
mlp_denomination_accuracy = mlp_denomination_grid.score(X_test_denomination_pca, y_denomination_test)
print("MLP Accuracy for Denomination after PCA:", mlp_denomination_accuracy)


Best hyperparameters for SVM Denomination after PCA: {'C': 0.1}
Best hyperparameters for MLP Denomination after PCA: {'alpha': 0.001, 'hidden_layer_sizes': (150,)}
SVM Accuracy for Denomination after PCA: 0.8755905511811024
MLP Accuracy for Denomination after PCA: 0.9385826771653544


In [9]:
# Grid search for SVM classifier on the PCA-transformed data for orientation
svm_orientation_grid = GridSearchCV(SVC(kernel='linear'), param_grid=svm_param_grid, cv=3)
svm_orientation_grid.fit(X_train_orientation_pca, y_orientation_train)

# Grid search for MLP classifier on the PCA-transformed data for orientation
mlp_orientation_grid = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), param_grid=mlp_param_grid, cv=3)
mlp_orientation_grid.fit(X_train_orientation_pca, y_orientation_train)

# Best hyperparameters for SVM classifier
print("Best hyperparameters for SVM Orientation after PCA:", svm_orientation_grid.best_params_)

# Best hyperparameters for MLP classifier
print("Best hyperparameters for MLP Orientation after PCA:", mlp_orientation_grid.best_params_)

# Evaluate SVM classifier
svm_orientation_accuracy = svm_orientation_grid.score(X_test_orientation_pca, y_orientation_test)
print("SVM Accuracy for Orientation after PCA:", svm_orientation_accuracy)

# Evaluate MLP classifier
mlp_orientation_accuracy = mlp_orientation_grid.score(X_test_orientation_pca, y_orientation_test)
print("MLP Accuracy for Orientation after PCA:", mlp_orientation_accuracy)


Best hyperparameters for SVM Orientation after PCA: {'C': 0.1}
Best hyperparameters for MLP Orientation after PCA: {'alpha': 0.001, 'hidden_layer_sizes': (150,)}
SVM Accuracy for Orientation after PCA: 0.9035433070866141
MLP Accuracy for Orientation after PCA: 0.9385826771653544


Option 2


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
dataset_path = "C:\\Users\\msi\\Downloads\\BankNotesDataset.csv"
df = pd.read_csv(dataset_path)

# Take a random subset of 12,700 samples
df_subset = df.sample(n=12700, random_state=42)

# Split the 'Denomination' column at the first underscore only
df_subset['Denomination'], df_subset['Orientation'] = zip(*df_subset['Denomination'].apply(lambda x: x.split('_', 1) if '_' in x else (x, 'Unknown')))

# Merge currency, denomination, and orientation into a new combined label
df_subset['combined_label'] = df_subset['Currency'] + '_' + df_subset['Denomination'].astype(str) + '_' + df_subset['Orientation'].astype(str)

# Drop the individual label columns
df_subset.drop(['Currency', 'Denomination', 'Orientation'], axis=1, inplace=True)

# Separate features and combined label
X = df_subset.iloc[:, :-1]  # Features
y = df_subset['combined_label']  # Combined label

# Standardize numerical features and split the data into training and testing sets
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Display the first few rows of the preprocessed features
print(pd.DataFrame(X_train, columns=X.columns).head())


   Unnamed: 0       v_0       v_1       v_2       v_3       v_4       v_5  \
0    1.043680 -0.832476 -0.845620 -0.001116  0.035986  0.418565 -0.669739   
1   -0.983174 -0.832476 -0.845620 -1.015671 -0.088848  0.622374 -0.669739   
2    0.093789  1.489929  0.346792 -0.828336  0.603328  0.247659 -0.669739   
3   -0.755312 -0.832476  0.647362 -0.723153 -0.345735  1.016838  2.789939   
4    0.171234  1.064169 -0.436569 -0.170665 -0.982764 -0.958141  0.268282   

        v_6       v_7       v_8  ...     v_246     v_247     v_248     v_249  \
0 -0.627793 -0.708573  0.801463  ...  0.158470  2.064332 -0.424497 -0.981267   
1 -0.713254  0.672900 -0.533858  ... -0.911672 -0.752726  0.394915  1.603563   
2 -0.713254 -0.606803 -0.874762  ... -0.522613  0.339103  0.597836 -0.762828   
3  2.193294  0.394470 -0.612061  ...  2.001860 -0.752726  0.428857 -0.274922   
4  0.409763  1.031207 -0.874762  ...  3.858942  1.367292  1.375137 -0.981267   

      v_250     v_251     v_252     v_253     v_254     

In [3]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for SVM
svm_param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'kernel': ['linear']  # Wrap 'linear' in a list
}

# Define the parameter grid for MLP
mlp_param_grid = {'hidden_layer_sizes': [(50,), (100,), (150,)], 'alpha': [0.0001, 0.001, 0.01]}

# Perform grid search for SVM
svm_grid_search = GridSearchCV(SVC(), svm_param_grid, cv=3)
svm_grid_search.fit(X_train, y_train)

# Get the best SVM model
best_svm_classifier = svm_grid_search.best_estimator_

# Evaluate the best SVM model
svm_accuracy = best_svm_classifier.score(X_test, y_test)
print("Best SVM Accuracy:", svm_accuracy)
print("Best SVM Parameters:", svm_grid_search.best_params_)

# Perform grid search for MLP
mlp_grid_search = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), mlp_param_grid, cv=3)
mlp_grid_search.fit(X_train, y_train)

# Get the best MLP model
best_mlp_classifier = mlp_grid_search.best_estimator_

# Evaluate the best MLP model
mlp_accuracy = best_mlp_classifier.score(X_test, y_test)
print("Best MLP Accuracy:", mlp_accuracy)
print("Best MLP Parameters:", mlp_grid_search.best_params_)




Best SVM Accuracy: 0.934251968503937
Best SVM Parameters: {'C': 1, 'kernel': 'linear'}




Best MLP Accuracy: 0.9358267716535433
Best MLP Parameters: {'alpha': 0.001, 'hidden_layer_sizes': (150,)}


In [5]:
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV


# Apply PCA to the standardized training features
pca = PCA(n_components=200)  
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train SVM classifier on PCA-transformed data
svm_classifier_pca = SVC(kernel='linear')
svm_classifier_pca.fit(X_train_pca, y_train)

# Define the parameter grid for SVM
svm_param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'kernel': ['linear']
}

# Perform grid search for SVM
svm_grid_search = GridSearchCV(SVC(), svm_param_grid, cv=3)
svm_grid_search.fit(X_train_pca, y_train)

# Get the best SVM model
best_svm_classifier_pca = svm_grid_search.best_estimator_

# Evaluate the best SVM model
svm_accuracy_pca = best_svm_classifier_pca.score(X_test_pca, y_test)
print("Best SVM Accuracy with PCA:", svm_accuracy_pca)
print("Best SVM Parameters with PCA:", svm_grid_search.best_params_)

# Define the parameter grid for MLP
mlp_param_grid = {'hidden_layer_sizes': [(50,), (100,), (150,)], 'alpha': [0.0001, 0.001, 0.01]}

# Perform grid search for MLP
mlp_grid_search = GridSearchCV(MLPClassifier(max_iter=1000, tol=0.01), mlp_param_grid, cv=3)
mlp_grid_search.fit(X_train_pca, y_train)

# Get the best MLP model
best_mlp_classifier_pca = mlp_grid_search.best_estimator_

# Evaluate the best MLP model
mlp_accuracy_pca = best_mlp_classifier_pca.score(X_test_pca, y_test)
print("Best MLP Accuracy with PCA:", mlp_accuracy_pca)
print("Best MLP Parameters with PCA:", mlp_grid_search.best_params_)




Best SVM Accuracy with PCA: 0.9366141732283465
Best SVM Parameters with PCA: {'C': 0.1, 'kernel': 'linear'}




Best MLP Accuracy with PCA: 0.9362204724409449
Best MLP Parameters with PCA: {'alpha': 0.01, 'hidden_layer_sizes': (150,)}
