In [10]:
import numpy as np
import pandas as pd
import math
from arbiter_puf import ArbiterPuf
from xor_puf import XorPuf
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score


### Simulate arbiter PUF and attack it with machine learning

In [11]:
def modify_data_to_train(data):
    """
    Modifies the dataset into train and test, 
    only challenges in test data, and adds the entire CRP in training data. 
    """
    training_data = []
    test_data = []
    for i in data:
        test_data.append(i.pop()) # i is a list with first n bits of challenge and last bit representing response
        training_data.append(i)
    return np.asarray(training_data), np.asarray(test_data)

In [12]:
list_of_number_of_bits = [32, 64, 128]
list_of_data_size = [10000, 50000, 100000]

"""
Run Logistic Regression on Arbiter Puf for different number of stages and number of CRPs
"""
print('================== Arbiter PUF ========================')
for data_size in list_of_data_size:
    print('Number of CRPs: ', data_size)
    for bit in list_of_number_of_bits:
        print('Number of Stages: ', bit)
        dataset = ArbiterPuf(bit).calculate_responses(data_size)
        X,Y = modify_data_to_train(dataset)
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
        LGR_Classifier = LogisticRegression()
        LGR_Classifier.fit(X_train, Y_train)
        scores = cross_val_score(LGR_Classifier, X_train, Y_train, cv=10, scoring="accuracy")
        y_pred = LGR_Classifier.predict(X_test)
        print ("Cross Validation Mean Score: ", '{}%'.format(np.round(scores.mean(), 3) * 100))  
        print("Accuracy : ", accuracy_score(Y_test, y_pred))
        print(classification_report(Y_test, y_pred))
        print('=======================================================')


Number of CRPs:  10000
Number of Stages:  32
Cross Validation Mean Score:  51.1%
Accuracy :  0.5045
              precision    recall  f1-score   support

           0       0.50      0.56      0.53       982
           1       0.52      0.45      0.48      1018

    accuracy                           0.50      2000
   macro avg       0.51      0.51      0.50      2000
weighted avg       0.51      0.50      0.50      2000

Number of Stages:  64
Cross Validation Mean Score:  57.099999999999994%
Accuracy :  0.564
              precision    recall  f1-score   support

           0       0.56      0.57      0.56       987
           1       0.57      0.56      0.56      1013

    accuracy                           0.56      2000
   macro avg       0.56      0.56      0.56      2000
weighted avg       0.56      0.56      0.56      2000

Number of Stages:  128
Cross Validation Mean Score:  50.6%
Accuracy :  0.5125
              precision    recall  f1-score   support

           0       0.49

### Details about Phi

The Phi vector here, contains the phi value calculated at the end of each stage of the PUF and is appended to a list. For a 32 bit PUF, the phi vector contains 32 values, with each value iteratively being calculated ie. [ Phi(32 bits), Phi(31 bits), Phi(30 bits) .. Phi(1 bit)]

### Details about Feature Vector 

This Phi vector provides pertinent information to the classifier, and when this feature is added to the dataset, 
the performance can be observed to increase considerably. 

In [13]:
def modify_data(data): # adding phi to the dataset when training
    training_data = []
    test_data = []
    for i in data:
        test_data.append(i.pop())
        phi = [ArbiterPuf.calculate_phi(i[j:]) for j in range(len(i))]
        training_data.append(phi)   
    return np.asarray(training_data), np.asarray(test_data)


### What is the way you build the delay vector?

The delay vector is formed by multiplying the phi vector along with a randomly generated bit vector of the same size. 

In [14]:
list_of_number_of_bits = [32, 64, 128]
# data_size = 100000
list_of_data_size = [10000, 50000, 100000]
print('================== Arbiter PUF with Phi ========================')
for data_size in list_of_data_size:
    print('Number of CRPs: ', data_size)
    for bit in list_of_number_of_bits:
        print('Number of Stages: ', bit)
        dataset = ArbiterPuf(bit).calculate_responses(data_size)
        X,Y = modify_data(dataset)
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
        LGR_Classifier = LogisticRegression()
        LGR_Classifier.fit(X_train, Y_train)
        scores = cross_val_score(LGR_Classifier, X_train, Y_train, cv=10, scoring="accuracy")
        y_pred = LGR_Classifier.predict(X_test)
        print ("Cross Validation Mean Score: ", '{}%'.format(np.round(scores.mean(), 3) * 100))  
        print("Accuracy : ", accuracy_score(Y_test, y_pred))
        print(classification_report(Y_test, y_pred))
        print('=======================================================')


Number of CRPs:  10000
Number of Stages:  32
Cross Validation Mean Score:  99.6%
Accuracy :  0.998
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       988
           1       1.00      1.00      1.00      1012

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

Number of Stages:  64
Cross Validation Mean Score:  99.2%
Accuracy :  0.993
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1029
           1       0.99      0.99      0.99       971

    accuracy                           0.99      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       0.99      0.99      0.99      2000

Number of Stages:  128
Cross Validation Mean Score:  98.9%
Accuracy :  0.9865
              precision    recall  f1-score   support

           0       0.98      0.99     

## What is the importance of the challenge vector and feature vector?

The challenge vector here contains the list of the bits which form the challenge, and it along with the response vector form the basis of relationship that Logistic Regression utilizes to classify them. The feature vector here is the phi vector, which establishes a strong direct relationship between the response and the challenge pair. 

## Report on security

The Arbiter Puf is clearly quite susceptible to machine learning attacks. For the security to increase, the number of stages have to be considerably increased which is easily circumvented by proportionally increasing the size of the dataset.

## Improve your Arbiter PUF by transforming it into XOR arbiter PUF.

In [15]:
list_of_number_of_bits = [5, 10, 15, 20]
# data_size = 100000
list_of_data_size = [10000, 50000, 100000]
number_of_pufs = [3, 4, 5]
for data_size in list_of_data_size:
    print('Number of CRPs: ', data_size)
    for j in number_of_pufs:
        print(f"================== Xor PUF {j} Pufs ========================")
        for bit in list_of_number_of_bits:
            print('Number of Stages: ', bit)
            # Create xor puf of 'j' arbiter pufs each of size 'bit'
            dataset = XorPuf(bit, j).calculate_responses(data_size)  
            # calculate_responses c
            X,Y = modify_data(dataset)
            X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
            LGR_Classifier = LogisticRegression()
            LGR_Classifier.fit(X_train, Y_train)
            scores = cross_val_score(LGR_Classifier, X_train, Y_train, cv=10, scoring="accuracy")
            y_pred = LGR_Classifier.predict(X_test)
            print ("Cross Validation Mean Score: ", '{}%'.format(np.round(scores.mean(), 3) * 100))  
            print("Accuracy : ", accuracy_score(Y_test, y_pred))
            print(classification_report(Y_test, y_pred))
            print('=======================================================')


Number of CRPs:  10000
Number of Stages:  5
Cross Validation Mean Score:  75.3%
Accuracy :  0.758
              precision    recall  f1-score   support

           0       0.77      0.76      0.76      1029
           1       0.75      0.75      0.75       971

    accuracy                           0.76      2000
   macro avg       0.76      0.76      0.76      2000
weighted avg       0.76      0.76      0.76      2000

Number of Stages:  10
Cross Validation Mean Score:  56.99999999999999%
Accuracy :  0.572
              precision    recall  f1-score   support

           0       0.57      0.58      0.57       991
           1       0.58      0.56      0.57      1009

    accuracy                           0.57      2000
   macro avg       0.57      0.57      0.57      2000
weighted avg       0.57      0.57      0.57      2000

Number of Stages:  15
Cross Validation Mean Score:  61.199999999999996%
Accuracy :  0.6235
              precision    recall  f1-score   support

           0 

  _warn_prf(average, modifier, msg_start, len(result))


Cross Validation Mean Score:  51.300000000000004%
Accuracy :  0.512
              precision    recall  f1-score   support

           0       0.50      0.32      0.39       981
           1       0.52      0.69      0.59      1019

    accuracy                           0.51      2000
   macro avg       0.51      0.51      0.49      2000
weighted avg       0.51      0.51      0.49      2000

Number of Stages:  15
Cross Validation Mean Score:  52.1%
Accuracy :  0.511
              precision    recall  f1-score   support

           0       0.52      0.83      0.64      1038
           1       0.48      0.17      0.25       962

    accuracy                           0.51      2000
   macro avg       0.50      0.50      0.44      2000
weighted avg       0.50      0.51      0.45      2000

Number of Stages:  20
Cross Validation Mean Score:  50.5%
Accuracy :  0.499
              precision    recall  f1-score   support

           0       0.51      0.68      0.58      1021
           1     

  _warn_prf(average, modifier, msg_start, len(result))


Cross Validation Mean Score:  55.400000000000006%
Accuracy :  0.5646
              precision    recall  f1-score   support

           0       0.56      1.00      0.72      5646
           1       0.00      0.00      0.00      4354

    accuracy                           0.56     10000
   macro avg       0.28      0.50      0.36     10000
weighted avg       0.32      0.56      0.41     10000

Number of Stages:  15


  _warn_prf(average, modifier, msg_start, len(result))


Cross Validation Mean Score:  52.2%
Accuracy :  0.5234
              precision    recall  f1-score   support

           0       0.52      0.96      0.68      5208
           1       0.53      0.04      0.08      4792

    accuracy                           0.52     10000
   macro avg       0.53      0.50      0.38     10000
weighted avg       0.53      0.52      0.39     10000

Number of Stages:  20
Cross Validation Mean Score:  51.800000000000004%
Accuracy :  0.5095
              precision    recall  f1-score   support

           0       0.49      0.05      0.09      4894
           1       0.51      0.95      0.66      5106

    accuracy                           0.51     10000
   macro avg       0.50      0.50      0.38     10000
weighted avg       0.50      0.51      0.38     10000

Number of Stages:  5
Cross Validation Mean Score:  75.2%
Accuracy :  0.7533
              precision    recall  f1-score   support

           0       0.75      0.76      0.76      5002
           1   

  _warn_prf(average, modifier, msg_start, len(result))


Cross Validation Mean Score:  53.2%
Accuracy :  0.53215
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      9357
           1       0.53      1.00      0.69     10643

    accuracy                           0.53     20000
   macro avg       0.27      0.50      0.35     20000
weighted avg       0.28      0.53      0.37     20000

Number of Stages:  20


  _warn_prf(average, modifier, msg_start, len(result))


Cross Validation Mean Score:  51.9%
Accuracy :  0.5168
              precision    recall  f1-score   support

           0       0.52      1.00      0.68     10339
           1       0.45      0.00      0.00      9661

    accuracy                           0.52     20000
   macro avg       0.49      0.50      0.34     20000
weighted avg       0.49      0.52      0.35     20000

Number of Stages:  5
Cross Validation Mean Score:  74.6%
Accuracy :  0.74945
              precision    recall  f1-score   support

           0       0.75      0.75      0.75     10058
           1       0.75      0.74      0.75      9942

    accuracy                           0.75     20000
   macro avg       0.75      0.75      0.75     20000
weighted avg       0.75      0.75      0.75     20000

Number of Stages:  10
Cross Validation Mean Score:  57.99999999999999%
Accuracy :  0.57415
              precision    recall  f1-score   support

           0       0.57      0.58      0.58      9959
           1  

## Report on security. Why is this PUF more secure?

## What is the influence of the number of stages, number of PUFs, number of CRPs?

Increasing the number of stages and number of PUFs directly correlate to the security of the PUF being increased. Although this can be circumvented by utilizing a larger number of CRPs, this number exponentially increases with the increase in number of stages and pufs.

## Report on what would be the best trade-off between security and implementation cost and why

Depending upon the usecase and the susceptibility of the device that the PUF is being implemented on, utilizing an xor arbiter PUF with multiple pufs of smaller lengths has higher impacts on security than one with fewer pufs of slightly larger lengths. 