In [1]:
# Test code to implement ML attack on PUF via CRPsets

In [2]:
# Include Needed Libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import pandas as pd
%matplotlib inline

In [18]:
# Pull data
datafile = "CRPSets/CRPSets.xls"
df = pd.read_excel(datafile)

# Convert data to numpy array
numpy_array = df.to_numpy()

#DEBUG
#print(numpy_array.shape)

# Split into our training and testing sets
X = numpy_array[:, :-1] # Takes the first 64 columns as training data
t = numpy_array[:,-1] # takes the last column as test data

# Some code to convert 0's to -1s

# Split our data so we have training and validation sets
# X_train is training data with t_train being the true values of this data
# X_test is validation data with t_test being the true values
X_train, X_test, t_train, t_test = train_test_split(X, t, 
                                                   test_size=0.3,
                                                   stratify=t,
                                                   random_state=1)
print(X_train.shape)
print(X_test.shape)


TypeError: only integer scalar arrays can be converted to a scalar index

In [4]:
#**************************************
#********MODEL 1
#**************************************

# For Pipeline
# 1. SGD - Stochastic Gradient Descent, iterates until finding optimal solution
# 2. loss is our algorithm. Log_loss = Logistic Regression | perceptron = perceptron | hinge = SVM
# 3. Penalty is our regularization term, with Alpha being the amount of regularization

# Create and train a Logistic Regression pipeline based on the data (log_Loss is LR)
pipeLR = Pipeline([('scaler', StandardScaler()), # normalization (substracts the mean and divides by std)
                ('log_reg', SGDClassifier(loss='log_loss', learning_rate='optimal'))
 ])

pipeLR.fit(X_train, t_train)


# WE can use GridSearchCV as an Exhaustive search to find best parameters for each model
params = {
    'log_reg__eta0': [0.001, 0.01, 0.01],
    'log_reg__alpha':[1, 100, 0.5]
}

# GridSearchCV does Exhaustive search over specified parameters for a model
best_param_pipeLR = GridSearchCV(pipeLR, params, cv=10)


# Fit data using the model with best parameters
best_param_pipeLR.fit(X_train, t_train)

# Predict
y_train_LR = best_param_pipeLR.predict(X_train)
y_test_LR = best_param_pipeLR.predict(X_test)

# Commented out, but this is code to print performance
print('BEST PARAMETERS')
print(best_param_pipeLR.best_params_)
print('TRAINING SET PERFORMANCE:')
print(classification_report(t_train, y_train_LR),'\n\n')
print('TEST SET PERFORMANCE:')
print(classification_report(t_test, y_test_LR))
print('TRAINING SET CONFUSION MATRIX')
print(confusion_matrix(t_train, y_train_LR),'\n\n')
print('TEST SET CONFUSION MATRIX')
print(confusion_matrix(t_test, y_test_LR),'\n\n')
print('Accuracy Score')
print(accuracy_score(t_test, y_test_LR),'\n\n')



BEST PARAMETERS
{'log_reg__alpha': 0.5, 'log_reg__eta0': 0.01}
TRAINING SET PERFORMANCE:
              precision    recall  f1-score   support

           0       0.55      0.99      0.70      4546
           1       0.70      0.03      0.06      3853

    accuracy                           0.55      8399
   macro avg       0.62      0.51      0.38      8399
weighted avg       0.62      0.55      0.41      8399
 


TEST SET PERFORMANCE:
              precision    recall  f1-score   support

           0       0.54      0.98      0.70      1948
           1       0.53      0.03      0.05      1652

    accuracy                           0.54      3600
   macro avg       0.54      0.50      0.37      3600
weighted avg       0.54      0.54      0.40      3600

TRAINING SET CONFUSION MATRIX
[[4490   56]
 [3724  129]] 


TEST SET CONFUSION MATRIX
[[1910   38]
 [1609   43]] 


Accuracy Score
0.5425 




In [5]:
#**************************************
# MODEL 2: Perceptron
#**************************************
# Create a Pipeline to scale data, conduct MLP. Fit for our data
pipeMLP = Pipeline([('scaler', StandardScaler()), # normalization (substracts the mean and divides by std)
                ('log_reg', SGDClassifier(loss='perceptron',penalty='l1',learning_rate='optimal'))])

pipeMLP.fit(X_train, t_train)


# WE can use GridSearchCV as an Exhaustive search to find best parameters for each model
params = {
    'log_reg__eta0': [0.001, 0.01, 0.1],
    'log_reg__alpha':[1,10]
}

# GridSearchCV does Exhaustive search over specified parameters for a model
best_param_pipeMLP = GridSearchCV(pipeMLP, params, cv=10)


# Fit data using the model with best parameters
best_param_pipeMLP.fit(X_train, t_train)

# Predict
y_train_MLP = best_param_pipeMLP.predict(X_train)
y_test_MLP = best_param_pipeMLP.predict(X_test)

# Commented out, but this is code to print performance
print('BEST PARAMETERS')
print(best_param_pipeMLP.best_params_)
print('TRAINING SET PERFORMANCE:')
print(classification_report(t_train, y_train_MLP),'\n\n')
print('TEST SET PERFORMANCE:')
print(classification_report(t_test, y_test_MLP))
print('TRAINING SET CONFUSION MATRIX')
print(confusion_matrix(t_train, y_train_MLP),'\n\n')
print('TEST SET CONFUSION MATRIX')
print(confusion_matrix(t_test, y_test_MLP),'\n\n')
print('Accuracy Score')
print(accuracy_score(t_test, y_test_MLP),'\n\n')

BEST PARAMETERS
{'log_reg__alpha': 10, 'log_reg__eta0': 0.1}
TRAINING SET PERFORMANCE:
              precision    recall  f1-score   support

           0       0.54      1.00      0.70      4546
           1       0.00      0.00      0.00      3853

    accuracy                           0.54      8399
   macro avg       0.27      0.50      0.35      8399
weighted avg       0.29      0.54      0.38      8399
 


TEST SET PERFORMANCE:
              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1948
           1       0.00      0.00      0.00      1652

    accuracy                           0.54      3600
   macro avg       0.27      0.50      0.35      3600
weighted avg       0.29      0.54      0.38      3600

TRAINING SET CONFUSION MATRIX
[[4546    0]
 [3853    0]] 


TEST SET CONFUSION MATRIX
[[1948    0]
 [1652    0]] 


Accuracy Score
0.5411111111111111 




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
#**************************************
# Model 3: SVM
#**************************************
# Create a Pipeline to scale data, conduct Support Vector Machine. Fit for our data
pipeSVC = Pipeline([('scaler', StandardScaler()), # normalization (substracts the mean and divides by std)
                ('log_reg', SGDClassifier(loss='hinge',penalty='l1',learning_rate='optimal'))])

pipeSVC.fit(X_train, t_train)


# WE can use GridSearchCV as an Exhaustive search to find best parameters for each model
params = {
    'log_reg__eta0': [0.001, 0.01, 0.1],
    'log_reg__alpha':[1,10]
}

# GridSearchCV does Exhaustive search over specified parameters for a model
best_param_pipeSVC = GridSearchCV(pipeSVC, params, cv=10)


# Fit data using the model with best parameters
best_param_pipeSVC.fit(X_train, t_train)

# Predict
y_train_SVC = best_param_pipeSVC.predict(X_train)
y_test_SVC = best_param_pipeSVC.predict(X_test)

# Commented out, but this is code to print performance
print('BEST PARAMETERS')
print(best_param_pipeSVC.best_params_)
print('TRAINING SET PERFORMANCE:')
print(classification_report(t_train, y_train_SVC),'\n\n')
print('TEST SET PERFORMANCE:')
print(classification_report(t_test, y_test_SVC))
print('TRAINING SET CONFUSION MATRIX')
print(confusion_matrix(t_train, y_train_SVC),'\n\n')
print('TEST SET CONFUSION MATRIX')
print(confusion_matrix(t_test, y_test_SVC),'\n\n')
print('Accuracy Score')
print(accuracy_score(t_test, y_test_SVC),'\n\n')

BEST PARAMETERS
{'log_reg__alpha': 1, 'log_reg__eta0': 0.1}
TRAINING SET PERFORMANCE:
              precision    recall  f1-score   support

           0       0.54      1.00      0.70      4546
           1       0.00      0.00      0.00      3853

    accuracy                           0.54      8399
   macro avg       0.27      0.50      0.35      8399
weighted avg       0.29      0.54      0.38      8399
 


TEST SET PERFORMANCE:
              precision    recall  f1-score   support

           0       0.54      1.00      0.70      1948
           1       0.00      0.00      0.00      1652

    accuracy                           0.54      3600
   macro avg       0.27      0.50      0.35      3600
weighted avg       0.29      0.54      0.38      3600

TRAINING SET CONFUSION MATRIX
[[4546    0]
 [3853    0]] 


TEST SET CONFUSION MATRIX
[[1948    0]
 [1652    0]] 


Accuracy Score
0.5411111111111111 




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
