# Inject Attacks

In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.utils import shuffle as reset
import random



def Distance(x, y):
    return math.sqrt(sum([(a - b) ** 2 for (a, b) in zip(x, y)]))

def partition(points, k, centroids):
    thePartition = [[] for i in centroids]
    indices = range(k)
    for x in points:
        closestIndex = min(indices, key = lambda index: Distance(x, centroids[index]))
        thePartition[closestIndex].append(x)
    return thePartition

def mean(points):
    size = len(points)
    return tuple(float(sum(x)) / size for x in zip(*points))

def deviation_sum(num_k, partition, centroids):
    deviation_sums = 0
    for i in range(num_k):
        sum = 0
        deviation_sum = 0
        for j in partition[i]:
            sum = sum + Distance(j, centroids[i])
        avg = (sum / len(centroids))
        for k in partition[i]:
            gap = (Distance(k, centroids[i]) - avg) ** 2
            deviation_sum = deviation_sum + gap
        deviation_sums = deviation_sums + deviation_sum
    return deviation_sums

def det_k(points):
    deviation_list = []
    for num_k in range(1,11):
        centroids = random.sample(points, num_k)
        thePartition = kMeans_training(points, num_k, centroids)[0]
        deviation_list.append(deviation_sum(num_k, thePartition, centroids))
    max_gap = deviation_list[0] - deviation_list[1]
    max_index = 1
    for i in range(len(deviation_list) - 1) :
        if deviation_list[i] - deviation_list[i + 1] > max_gap:
            max_index = i + 1
    return max_index + 1

def kMeans_training(points, k, initialCentroids):
    global newCentroids
    oldPartition = []
    newPartition = partition(points, k, initialCentroids)

    while oldPartition != newPartition:
        oldPartition = newPartition
        newCentroids = []
        for S in oldPartition:
            newCentroids.append(mean(S))
        newPartition = partition(points, k, newCentroids)

    return newPartition, newCentroids

def kMeans_testing(points, k, centroids, lower_whisker_list, upper_whisker_list):
    partition = [[] for i in centroids]
    outlier = []
    for i in points:
        minDis_index = 0
        minDis = Distance(i, centroids[0])
        for j in range(len(centroids)):
            if Distance(i, centroids[j]) < minDis:
                minDis_index = j
        partition[minDis_index].append(i)
    for i in range(len(partition)):
        for j in partition[i]:
            if (Distance(j, centroids[i]) < lower_whisker_list[i]) or (
                    Distance(j, centroids[i]) > upper_whisker_list[i]):
                outlier.append(j)
    return partition, outlier

def dis_list(partition, centroids):
    dis_lists = [[] for i in centroids]
    for i in range(len(centroids)):
        for j in partition[i]:
            dis_lists[i].append(Distance(j, centroids[i]))
    return dis_lists

def whisker(dis_lists):
    lower_whisker_list = []
    upper_whisker_list = []
    for dis_list in dis_lists:
        dis_list.sort()
        lower_quartile = dis_list[int(len(dis_list) / 4)]
        upper_quartile = dis_list[int(3 * (len(dis_list) / 4))]
        quarterback = 2 * (upper_quartile - lower_quartile)
        lower_whisker = lower_quartile - 1.5 * quarterback
        upper_whisker = upper_quartile + 1.5 * quarterback
        lower_whisker_list.append(lower_whisker)
        upper_whisker_list.append(upper_whisker)
    #
    return lower_whisker_list, upper_whisker_list

# cite: https://blog.csdn.net/laugh12321/article/details/99689071
def train_test_split(data, test_size, shuffle_state=True, random_state=None):

    if shuffle_state:
        data = reset(data, random_state=random_state)

    train = data[int(len(data) * test_size):].reset_index(drop=True)
    test = data[:int(len(data) * test_size)].reset_index(drop=True)

    return train, test
def points_list(dataset):
    points_list = []
    time_list = []
    f1 = lambda CO2_value, Tem_value: [float(CO2_value), float(Tem_value)]
    # f2 = lambda time: [string(time)]
    for index, row in dataset.iterrows():
        points_list.append(f1(*row[['CO2_value', 'Tem_value']].values.tolist()))
        time_list.append(*row[['time']].values.tolist())
    return points_list, time_list


if __name__ == '__main__':

    points_list_training = []
    points_list_testing = []

    data = pd.read_csv(r'C:\Users\raj\Desktop\ABC.csv')
    (training, testing) = train_test_split(data, 0.2)
    points_list_training, time_list_training = points_list(training)
    points_list_testing, time_list_testing = points_list(testing)
    points_list_testing.append([900,21])
    time_list_testing.append('inject timing ')

    f = lambda CO2_value, Tem_value: [float(CO2_value), float(Tem_value)]

    for index, row in training.iterrows():
        points_list_training.append(f(*row[['CO2_value', 'Tem_value']].values.tolist()))
    for index, row in testing.iterrows():
        points_list_testing.append(f(*row[['CO2_value', 'Tem_value']].values.tolist()))

    k = det_k(points_list_training)

    partition_training, centroids_training = kMeans_training(points_list_training, k, random.sample(points_list_training, k))
    lower_whisker_list, upper_whisker_list = whisker(dis_list(partition_training, centroids_training))
    for i in range(len(centroids_training)):
        print(lower_whisker_list[i], upper_whisker_list[i])

    color_list = ['blue', 'green', 'yellow', 'gray', 'purple', 'cornsilk',
                  'blueviolet', 'darkolivegreen', 'forestgreen']
    plt.figure()
    for i in range(len(partition_training)):
        plt.scatter(*zip(*partition_training[i]), c = color_list[i])
        plt.scatter(*zip(centroids_training[i]), c = 'black')

    plt.xlabel('CO2_value')
    plt.ylabel('Tem_value')
    plt.axis([200, 1000, 20, 30])
    plt.show()


    partition_testing, outlier_list = kMeans_testing(points_list_testing, k, centroids_training, lower_whisker_list, upper_whisker_list)
    plt.figure()
    for i in range(len(partition_testing)):
        plt.scatter(*zip(*partition_testing[i]), c = color_list[i])
        plt.scatter(*zip(centroids_training[i]), c = 'black')
    plt.scatter(*zip(*outlier_list), c = 'red')
    plt.xlabel('CO2_value')
    plt.ylabel('Tem_value')
    plt.axis([200, 1000, 20, 30])
    plt.show()
