# Проект по Размити множества и проложения

In [None]:
import pandas as pd
import numpy as np
import os.path
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

## Параметри:

In [None]:
reloadData = False

## База данни

In [None]:
if reloadData or not os.path.isfile('Datasets/Gas/GasArrayDF.npy'):
    df1  = np.array(pd.read_csv('Datasets/Gas/batch1.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df2  = np.array(pd.read_csv('Datasets/Gas/batch2.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df3  = np.array(pd.read_csv('Datasets/Gas/batch3.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df4  = np.array(pd.read_csv('Datasets/Gas/batch4.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df5  = np.array(pd.read_csv('Datasets/Gas/batch5.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df6  = np.array(pd.read_csv('Datasets/Gas/batch6.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df7  = np.array(pd.read_csv('Datasets/Gas/batch7.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df8  = np.array(pd.read_csv('Datasets/Gas/batch8.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df9  = np.array(pd.read_csv('Datasets/Gas/batch9.dat', sep=' |:', header=None, engine='python'))[:, ::2]
    df10 = np.array(pd.read_csv('Datasets/Gas/batch10.dat',sep=' |:', header=None, engine='python'))[:, ::2]
    df = np.vstack([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10])
    with open('Datasets/Gas/GasArrayDF.npy', 'wb') as f:
        np.save(f, df)
else:
    with open('Datasets/Gas/GasArrayDF.npy', 'rb') as f:
        df = np.load(f)

In [None]:
df[:,1:] = preprocessing.normalize(df[:,1:])

## Създаване на модел

In [None]:
from collections import Counter

class KNNClassifier:
    def __init__(self, k=1, verbose=False):
        self.k = k
        self.verbose = verbose

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        self.num_classes = len(set(self.y_train))

    def predict(self, X_test):
        y_pred = []
        for i, row in enumerate(X_test):
            label = self.predict_strategy(row)
            y_pred.append(label)
            if i % int(len(X_test)/10) == 0: print('.', end=' ')
        return y_pred

    def predict_strategy(self, predict_row):
        nearest_k = []
        for neighbour, neighbour_label in zip(self.X_train, self.y_train):
            distance = self.distance(predict_row, neighbour)
            nearest_k.append((neighbour_label, distance))
        nearest_k = sorted(nearest_k, key=lambda x: x[1])[:self.k]
        if self.verbose == True: print(nearest_k[0][0], Counter(list(zip(*nearest_k))[0]).most_common(6))
        return Counter(list(zip(*nearest_k))[0]).most_common(6)[0][0], Counter(list(zip(*nearest_k))[0]).most_common(6)

        # ~~~~~ RETURN CLOSEST NEIGHBOUR ~~~~~
        # best_dist = np.inf
        # best_index = None
        # for i in range(len(self.X_train)):
        #     dist = self.distance(row, self.X_train[i])
        #     if dist < best_dist:
        #         best_dist = dist
        #         best_index = i
        # return self.y_train[best_index]

    def distance(self, a, b):
        return np.linalg.norm(a - b)

In [None]:
class FuzzyKNNClassifier(KNNClassifier):
    def __init__(self, k=1, m=2, verbose=False):
        super().__init__(k, verbose)
        self.m = m

    def predict_strategy(self, row):
        #find the k nearest neighbours
        distances_to_all = [ (self.distance(row, neighbour), label) for neighbour, label in zip(self.X_train, self.y_train)]
        nearest_k = distances_to_all[:self.k]
        if self.verbose: print(nearest_k[0], nearest_k[-1])

        # compute membership values of row for each class
        result = {}
        denominator = sum([1 / (neighbour[0] ** (2/(self.m-1))) for neighbour in nearest_k])
        if self.verbose: print('denom:\n', denominator)
        for cl in range(1, self.num_classes+1):
            class_membership_sum = sum([1 / (neighbour[0] ** (2/(self.m-1))) for neighbour in nearest_k if neighbour[1] == cl])
            result[cl] = class_membership_sum / denominator
        if self.verbose: print('result:\n', result)

        # defuzzify answer
        defuzzified_result = max(result, key=result.get)
        if self.verbose: print('defuzzyfied:\n', defuzzified_result)

        return defuzzified_result, result


## Класифициране

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[:, 1:], df[:, 0], test_size=0.1, random_state=1)

In [None]:
for k in [100, 200, 250]:
    clf = KNNClassifier(k=k, verbose=True)

    print(f'Computing for k={k}: ', end='')
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    correct_percentage = sum(np.array(list(zip(*y_pred))[0]) == y_test)/len(y_test)
    wrong_answers = [(n, ':', i, '!=', j[0], ' full: ', j[1]) for n, i, j in zip(range(len(y_test)), y_test, y_pred) if i != j[0]]

    print(f' Saving results -> ', end='')
    with open(f'output/normal_results_k={k}_rand=1.txt', 'w+') as f:
        f.writelines(str(correct_percentage))
        f.write('\n'.join(str(x) for x in wrong_answers))
    print(f'Saved results ')


In [None]:
correct_percentage = sum(np.array(list(zip(*y_pred))[0]) == y_test)/len(y_test)
wrong_answers = [(n, ':', i, '!=', j[0], ' full: ', j[1]) for n, i, j in zip(range(len(y_test)), y_test, y_pred) if i != j[0]]

In [None]:
print(correct_percentage)
# print(wrong_answers)
for i in wrong_answers:
    print(i)

In [None]:
for k in [1250, 1500, 2000]:
    for m in [1.1, 1.2, 1.4]:
        clf = FuzzyKNNClassifier(k=k, m=m, verbose=False)

        print(f'Computing for k={k} and m={m}: ', end='')
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        fuzzy_correct_percentage = sum(np.array(list(zip(*y_pred))[0]) == y_test)/len(y_test)
        fuzzy_wrong_answers = [(n, ':', i, '!=', j[0], ' full: ', j[1]) for n, i, j in zip(range(len(y_test)), y_test, y_pred) if i != j[0]]
        
        print(f' Saving results -> ', end='')
        with open(f'output/fuzzy_results_k={k}_m={m}_rand=1.txt', 'w+') as f:
            f.writelines(str(fuzzy_correct_percentage))
            f.write('\n'.join(str(x) for x in fuzzy_wrong_answers))
        print(f'Saved results ')
