Log entry 02/11/2021:

Currently learnt from stochastic gradient descent: next parameter for used for learning is data order dependent. train set of data on target model and surrogate model for one epoch + store each datapoint to local variable, must be done so all datapoints can be seen. once all data points seen, you can assign each datapoint a value of how much each datapoint's loss value is on surrogate model to estimate target model's loss on datapoint. using this value, can reorder all datapoints sent to train target model in different ways to mess with the model's accuracy.

csv obtained from: https://gist.github.com/netj/8836201

In [76]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import scikitplot as skplt

HIGH_LOW = 'highlow'
LOW_HIGH = 'lowhigh'
OSC_IN = 'oscillatingin'
OSC_OUT = 'oscillatingout'
BATCH_REORDER = 'reorder'
BATCH_RESHUFFLE = 'reshuffle'
N_TESTDATA_SAMPLES = 10

df = pd.read_csv('iris.csv')

testData = df[['petal.length', 'petal.width', 'sepal.width']][:N_TESTDATA_SAMPLES]
trainingData = df[['petal.length', 'petal.width', 'sepal.width']][N_TESTDATA_SAMPLES:]
testLabels = df['sepal.length'][:N_TESTDATA_SAMPLES]
trainingLabels = df['sepal.length'][N_TESTDATA_SAMPLES:]

#Create SGD Regression models using least squares loss function, train the models over one epoch of datapoints in a currently natural non-malicious order
controlModel = SGDRegressor(loss = 'squared_error', penalty = 'elasticnet', fit_intercept = True, max_iter = 100, tol = 0.001, shuffle = False, learning_rate = 'constant')
targetModel = SGDRegressor(loss = 'squared_error', penalty = 'elasticnet', fit_intercept = True, max_iter = 100, tol = 0.001, shuffle = False, learning_rate = 'constant')
surrogateModel = SGDRegressor(loss = 'huber', max_iter = 90, tol = 0.001, shuffle = False)

controlModel.partial_fit(X = trainingData, y = trainingLabels)
surrogateModel.partial_fit(X = trainingData, y = trainingLabels)
targetModel.partial_fit(X = trainingData, y = trainingLabels)

def leastSquares(x, y):
    return (x - y) ** 2

#===== ATTACK =====
def attack(target, surrogate, attackType, batchType):

    datapoints = []
    datatargets = []
    datacosts = {}

    for i in range(len(trainingData)):
        #Read each datapoint, evaluate the loss, record these values
        point = trainingData.iloc[i:i + 1]
        loss = leastSquares(surrogate.predict(point)[0], trainingLabels.iloc[i])
        datapoints.append(point)
        datatargets.append(trainingLabels.iloc[i:i + 1])
        datacosts[i] = loss

    
    #Reorder datapoints in ascending order of cost
    reorderedCosts = dict(sorted(datacosts.items(), key = lambda item: item[1]))

    reorderedData = [[], [], []]
    reorderedLabels = []

    for index in reorderedCosts:
        #Get features and labels of each datapoint and record
        reorderedData[0].append(datapoints[index].iloc[0, 0])
        reorderedData[1].append(datapoints[index].iloc[0, 1])
        reorderedData[2].append(datapoints[index].iloc[0, 2])
        reorderedLabels.append(datatargets[index])

    if attackType == HIGH_LOW:
        #Reverse order of reordered data if high -> low attack type
        reorderedData = [reorderedData[0][::-1], reorderedData[1][::-1], reorderedData[2][::-1]]
        reorderedLabels = reorderedLabels[::-1]

    if attackType == OSC_OUT:
        #Invert each half of reordered data if oscillating out attack type
        right = reorderedData[len(reorderedData) // 2:][::-1]
        left = reorderedData[:len(reorderedData) // 2][::-1]
        reorderedData = left + right

        right = reorderedLabels[len(reorderedLabels) // 2:][::-1]
        left = reorderedLabels[:len(reorderedLabels) // 2][::-1]
        reorderedLabels = left + right

    d = {
        'petal.length': reorderedData[0],
        'petal.width': reorderedData[1],
        'sepal.width': reorderedData[2]
    }
    newDf = pd.DataFrame.from_dict(d)
    
    controlModel.partial_fit(X = trainingData, y = trainingLabels)
    targetModel.partial_fit(X = newDf, y = np.ravel(reorderedLabels))
    surrogateModel.partial_fit(X = newDf, y = np.ravel(reorderedLabels))


for i in range(1000):
    attack(targetModel, surrogateModel, LOW_HIGH, None)

targetPredict = targetModel.predict(testData).tolist()
controlPredict = controlModel.predict(testData).tolist()
testLabelsList = testLabels.tolist()

errorChange = []

for i in range(len(targetPredict)):
    controlError = abs((testLabelsList[i] - controlPredict[i]) / controlPredict[i])
    targetError = abs((testLabelsList[i] - targetPredict[i]) / targetPredict[i])
    errorChange.append(abs(targetError - controlError))

print("Average change in approximation error from target model to prediction model:  " + str(round(np.mean(errorChange) * 100, 2)) + "%\n")


###################need to ADD oscillation attax#############3.19

# pca = PCA(n_components = 2)
# axes = pca.fit_transform(trainingData)
# np.reshape(axes, (2, 140))

# print(axes)
# plt.scatter(axes, axes)
# plt.show()

Average change in approximation error from target model to prediction model:  3.19%

