# $Imports:$

In [439]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict

from matplotlib import style
from sklearn.metrics import f1_score
style.use('ggplot')


from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier


from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split

<hr>

# $Functions$ $used$ $in$ $module:$


### *Cross Validation*

#####  Model validation technique for assessing how the results of a statistical analysis will generalize to an independent data set.


#####  To guarantee that the model is not overfitted or the hyperpramters is overfitted too


##### Give the chance that each sample get tested on at some point and that reduces bias 

<img src="https://upload.wikimedia.org/wikipedia/commons/c/c7/LOOCV.gif">

In [390]:
def cross_validate_train(clf, x, y, k_folds = 5, return_all = True, print_all = True):
    cv_results = cross_validate(clf, x, y, cv=k_folds)
    avrg_fit_time = np.sum(cv_results['fit_time'])/k_folds
    avrg_score_time = (np.sum(cv_results['score_time'])/k_folds)
    avrg_score = (np.sum(cv_results['test_score'])/k_folds)
    
    if print_all:
        print("fit_times:   "+str(cv_results['fit_time'])+", avrg fit time: "+str(avrg_fit_time))
        print("score_times: "+str(cv_results['score_time'])+", avrg score time: "+str(avrg_score_time))
        print("test_scores: "+str(cv_results['test_score'])+", avrg acc: "+str(avrg_score))
    if return_all:
        return avrg_fit_time, avrg_score_time,  avrg_score
    

In [391]:
def calculateDistance(x1, x2):
    distance = np.linalg.norm(x1 - x2)
    return distance

<hr>
<hr>

# $Data$  $manipulation$

<hr>

### Train-Test Split:


In [393]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

<hr>

# *PCA:*

<hr>

### PCA from Scratch:


In [394]:
def featureNormalize(X):
    mu = np.mean(X,axis=0)
    sigma = np.std(X,axis=0)
    normalized_X = (X-mu)/sigma
    
    return (normalized_X, mu, sigma)

In [408]:
def from_scratch_pca(X):
    # First, we need to compute the covariance matrix of X. (Check np.cov)
#     cov = np.cov(X,rowvar=False)
    cov = ((X).T@X)/X.shape[0]
    # Second, we need to find the eigenvectors of this covariance matrix.
    
#    Unitary matrix: defined as a square matrix whose conjugate transpose is also its inverse. 
#   u eigen vectors matrix 
#   s eigen values
    u,s,_ = np.linalg.svd(cov)
    return u,s

In [432]:
def projectData(X, U, K):
    Z = -1* X @ U[:,:K]
    return Z


<hr>

### Testing against PCA from Library:


In [433]:
x = np.array([[-1, -1,-1,-1], [-2, -1,-1,-1], [-3, -2,-1,-2], [1, 1,2,2], [2, 1,4,2], [3, 2,2,5]])
x

array([[-1, -1, -1, -1],
       [-2, -1, -1, -1],
       [-3, -2, -1, -2],
       [ 1,  1,  2,  2],
       [ 2,  1,  4,  2],
       [ 3,  2,  2,  5]])

In [444]:
pca = PCA(n_components=3)

scaler = StandardScaler()
x_pca_normed = scaler.fit_transform(x)

pca.fit_transform(x_pca_normed)

# pipeline = Pipeline([('scaling', StandardScaler()), ('pca', PCA(n_components=3))])
# pipeline.fit_transform(x)

array([[-1.42367278, -0.2405279 ,  0.30244267],
       [-1.66181164, -0.19605803, -0.06708039],
       [-2.47143094,  0.19848579, -0.17244311],
       [ 1.1246693 ,  0.08069594, -0.07043853],
       [ 1.84340642,  0.90678962,  0.06348409],
       [ 2.58883964, -0.74938541, -0.05596473]])

In [445]:
X_norm, mu, sigma = featureNormalize(x)
test_u, test_s = from_scratch_pca(X_norm)
z_test = projectData(X_norm, test_u, 3)
z_test

array([[-1.42367278, -0.2405279 ,  0.30244267],
       [-1.66181164, -0.19605803, -0.06708039],
       [-2.47143094,  0.19848579, -0.17244311],
       [ 1.1246693 ,  0.08069594, -0.07043853],
       [ 1.84340642,  0.90678962,  0.06348409],
       [ 2.58883964, -0.74938541, -0.05596473]])

<hr>
<hr>

# $Machine Learning  Models$:

<hr>
<hr>

## Built from Scratch Models:

<hr>


In [347]:
def MinimumDistanceClassifier(test_point, training_features, labels):
    # ------------------------------------------------------------------------------------------------------
    # INPUTS:   test_point: (1, N) where n is the number of features. 
    #           training_features: (M, N) array where M is the training set size, and N is the number of features.
    
    # OUTPUTS:  classification: an integer indicating the classification of the test point
    unique_labels = labels.unique()
    feature_dict = {}
    for u_label in unique_labels:
        feature_dict[u_label] = training_features[labels == u_label]
    
    for u_label in unique_labels:
        feature_dict[u_label] = np.mean(feature_dict[u_label],axis=0)
        
    minimum = 99999999999999
    classification = -1
    for key, value in feature_dict.items():
        if minimum >  calculateDistance(np.asarray(test_point),np.asarray(value)):
            minimum = calculateDistance(np.asarray(test_point),np.asarray(value))
            classification = key
    # ------------------------------------------------------------------------------------------------------
    
    return classification

In [348]:
def NearestNeighbor(test_point, training_features, labels):
    # ------------------------------------------------------------------------------------------------------
    # INPUTS:   test_point: (1, N) where N is the number of features. 
    #           training_features: (M, N) array where M is the training set size, and N is the number of features.
    
    # OUTPUTS:  classification: an integer indicating the classification of the test point
    #                           either 0 (Rectangle), 1 (Circle), or 2 (Triangle)
    classification = -1
    minimum = 9999999999
    for i in range(len(training_features)) :
        if minimum > calculateDistance(np.asarray(test_point),np.asarray(training_features.iloc[i,:])):
            minimum = calculateDistance(np.asarray(test_point),np.asarray(training_features.iloc[i,:]))
            classification = labels[i]
    # ------------------------------------------------------------------------------------------------------

    return classification


In [349]:
def KNN(test_point, training_features, k, labels):
    # ------------------------------------------------------------------------------------------------------
    # INPUTS:   test_point: (1, N) where N is the number of features. 
    #           training_features: (M, N) array where M is the training set size, and N is the number of features.
    #           k: the number of nearest neighbours. 
    
    # OUTPUTS:  classification: an integer indicating the classification of the test point
    #                           either 0 (Rectangle), 1 (Circle), or 2 (Triangle)    
    
    dists = []
    for i in range(len(training_features)):
        dists.append(calculateDistance(np.asarray(test_point),np.asarray(training_features.iloc[i,:])))
        
    dists = np.asarray(dists)
    sorted_dists_indices = dists.argsort()[:k] 
    
    counts = {}
    for u_label in labels.unique():
        counts[u_label] = (np.count_nonzero(labels[sorted_dists_indices] == u_label))
        
    classification = max(counts, key=counts.get)
    
    # ------------------------------------------------------------------------------------------------------
    return classification


In [237]:
# class SVM(object):
#     def __init__(self,visualization=True):
#         self.visualization = visualization
#         self.colors = {1:'r',-1:'b'}
#         if self.visualization:
#             self.fig = plt.figure()
#             self.ax = self.fig.add_subplot(1,1,1)
    
#     def fit(self,data):
#         #train with data
#         self.data = data
#         # { |\w\|:{w,b}}
#         opt_dict = {}
        
#         transforms = [[1,1],[-1,1],[-1,-1],[1,-1]]
        
#         all_data = np.array([])
#         for yi in self.data:
#             all_data = np.append(all_data,self.data[yi])
                    
#         self.max_feature_value = max(all_data)         
#         self.min_feature_value = min(all_data)
#         all_data = None
        
#         #with smaller steps our margins and db will be more precise
#         step_sizes = [self.max_feature_value * 0.1,
#                       self.max_feature_value * 0.01,
#                       #point of expense
#                       self.max_feature_value * 0.001,]
        
#         #extremly expensise
#         b_range_multiple = 5
#         #we dont need to take as small step as w
#         b_multiple = 5
        
#         latest_optimum = self.max_feature_value*10
        
#         """
#         objective is to satisfy yi(x.w)+b>=1 for all training dataset such that ||w|| is minimum
#         for this we will start with random w, and try to satisfy it with making b bigger and bigger
#         """
#         #making step smaller and smaller to get precise value
#         for step in step_sizes:
#             w = np.array([latest_optimum,latest_optimum])
            
#             #we can do this because convex
#             optimized = False
#             while not optimized:
#                 for b in np.arange(-1*self.max_feature_value*b_range_multiple,
#                                    self.max_feature_value*b_range_multiple,
#                                    step*b_multiple):
#                     for transformation in transforms:
#                         w_t = w*transformation
#                         found_option = True
                        
#                         #weakest link in SVM fundamentally
#                         #SMO attempts to fix this a bit
#                         # ti(xi.w+b) >=1
#                         for i in self.data:
#                             for xi in self.data[i]:
#                                 yi=i
#                                 if not yi*(np.dot(w_t,xi)+b)>=1:
#                                     found_option=False
#                         if found_option:
#                             """
#                             all points in dataset satisfy y(w.x)+b>=1 for this cuurent w_t, b
#                             then put w,b in dict with ||w|| as key
#                             """
#                             opt_dict[np.linalg.norm(w_t)]=[w_t,b]
                
#                 #after w[0] or w[1]<0 then values of w starts repeating itself because of transformation
#                 #Think about it, it is easy
#                 #print(w,len(opt_dict)) Try printing to understand
#                 if w[0]<0:
#                     optimized=True
#                     print("optimized a step")
#                 else:
#                     w = w-step
                    
#             # sorting ||w|| to put the smallest ||w|| at poition 0 
#             norms = sorted([n for n in opt_dict])
#             #optimal values of w,b
#             opt_choice = opt_dict[norms[0]]

#             self.w=opt_choice[0]
#             self.b=opt_choice[1]
            
#             #start with new latest_optimum (initial values for w)
#             latest_optimum = opt_choice[0][0]+step*2
    
#     def predict(self,features):
#         #sign(x.w+b)
#         classification = np.sign(np.dot(np.array(features),self.w)+self.b)
#         if classification!=0 and self.visualization:
#             self.ax.scatter(features[0],features[1],s=200,marker='*',c=self.colors[classification])
#         return (classification,np.dot(np.array(features),self.w)+self.b)
    
#     def visualize(self):
#         [[self.ax.scatter(x[0],x[1],s=100,c=self.colors[i]) for x in data_dict[i]] for i in data_dict]
        
#         # hyperplane = x.w+b (actually its a line)
#         # v = x0.w0+x1.w1+b -> x1 = (v-w[0].x[0]-b)/w1
#         #psv = 1     psv line ->  x.w+b = 1a small value of b we will increase it later
#         #nsv = -1    nsv line ->  x.w+b = -1
#         # dec = 0    db line  ->  x.w+b = 0
#         def hyperplane(x,w,b,v):
#             #returns a x2 value on line when given x1
#             return (-w[0]*x-b+v)/w[1]
       
#         hyp_x_min= self.min_feature_value*0.9
#         hyp_x_max = self.max_feature_value*1.1
        
#         # (w.x+b)=1
#         # positive support vector hyperplane
#         pav1 = hyperplane(hyp_x_min,self.w,self.b,1)
#         pav2 = hyperplane(hyp_x_max,self.w,self.b,1)
#         self.ax.plot([hyp_x_min,hyp_x_max],[pav1,pav2],'k')
        
#         # (w.x+b)=-1
#         # negative support vector hyperplane
#         nav1 = hyperplane(hyp_x_min,self.w,self.b,-1)
#         nav2 = hyperplane(hyp_x_max,self.w,self.b,-1)
#         self.ax.plot([hyp_x_min,hyp_x_max],[nav1,nav2],'k')
        
#         # (w.x+b)=0
#         # db support vector hyperplane
#         db1 = hyperplane(hyp_x_min,self.w,self.b,0)
#         db2 = hyperplane(hyp_x_max,self.w,self.b,0)
#         self.ax.plot([hyp_x_min,hyp_x_max],[db1,db2],'y--')

In [238]:
# data_dict = {-1:np.array([[1,7,10],[2,8,11],[3,8,9]]),1:np.array([[5,1,-10],[6,-1,-9],[7,3,-11]])}

In [239]:
# svm = SVM() # Linear Kernel
# svm.fit(data=data_dict)
# # svm.visualize()

In [240]:
# svm.predict([3,8])

In [241]:
# svm.predict([6,6.5])

<hr>
<hr>

# $Testing$ $Models:$

<hr>

### Reading Dummy Data

In [242]:
train_data = pd.read_csv(r"D:\Uni\GP\Machine_Learning_Module\Data\points_with_classes\data1.csv",header=None)
x_train = train_data.iloc[:,1:]
y_train = train_data.iloc[:,0]

x_test = pd.read_csv(r"D:\Uni\GP\Machine_Learning_Module\Data\points_with_classes\test_data.csv",header=None)
y_test = pd.read_csv(r"D:\Uni\GP\Machine_Learning_Module\Data\points_with_classes\test_data_true.csv",header=None)


In [243]:
print(x_train.head())
print(y_train.head())

          1          2
0  3.272089   0.726774
1 -6.937777  -6.449828
2  6.056029   0.487195
3 -2.347665  -3.819760
4 -1.148770  12.649768
0    3.0
1    1.0
2    3.0
3    1.0
4    2.0
Name: 0, dtype: float64


In [244]:
print(x_test.head())
print(y_test.head())

           0         1
0  10.701414  3.872536
1  -3.818318 -5.009778
2  -3.570719  9.960362
3   4.943090 -0.015394
4   4.260826 -0.613494
     0
0  3.0
1  1.0
2  2.0
3  3.0
4  3.0


<hr>

### Testing Built from Scratch Models:

<hr>

#### Minimum Distance Classifier:

In [270]:
classifications = []
for i in range(len(y_test)):
    classifications.append(MinimumDistanceClassifier(x_test.iloc[i,:],x_train,y_train)) # rect 0

In [271]:
classifications = np.asarray(classifications)
y_test = np.asarray(y_test)
f1_score(y_test,classifications,average='weighted')

0.9601486567986924


<hr>

#### Nearst Neighbor Classifier:

In [297]:
classifications = []
# for i in range(len(y_test)):
count = 0
sum_correct = 0
for i in  range(len(y_test)):
    classifications.append(NearestNeighbor(x_test.iloc[i,:],x_train,y_train)) # rect 0
    count +=1
    if classifications[i] == y_test[i]:
        sum_correct +=1
print(sum_correct / count)

0.9533333333333334


<hr>

#### K-Nearst Neighbor Classifier:

In [294]:
classifications = []
for i in range(len(y_test)):
    classifications.append(KNN(x_test.iloc[i,:],x_train,15,y_train)) # rect 0

In [295]:
classifications = np.asarray(classifications)
y_test = np.asarray(y_test)
f1_score(y_test,classifications,average='weighted')

0.9644732852324301

<hr>

### Testing Library Models:

In [251]:
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(x_train, y_train)
f1_score(y_test,neigh.predict(x_test),average='weighted')

0.9535259648413822

In [296]:
neigh = KNeighborsClassifier(n_neighbors=15)
neigh.fit(x_train, y_train)
f1_score(y_test,neigh.predict(x_test),average='weighted')


0.9644732852324301

In [253]:
clf = SVC(gamma=0.1,C=0.2)
clf.fit(x_train, y_train)
f1_score(y_test,clf.predict(x_test),average='weighted')

0.9644732852324301

In [328]:
clf = MLPClassifier(random_state=1, max_iter=200).fit(x_train, y_train)
clf.fit(x_train, y_train)
f1_score(y_test,clf.predict(x_test),average='weighted')

0.9578488063406323

In [327]:
clf = RandomForestClassifier(n_estimators =1000,max_depth=None, random_state=0)
clf.fit(x_train, y_train)
f1_score(y_test,clf.predict(x_test),average='weighted')

0.9513037731314588

In [342]:
clf = LogisticRegression(multi_class='multinomial', random_state=1)
clf.fit(x_train, y_train)
f1_score(y_test,clf.predict(x_test),average='weighted')

0.960106365638179

<hr>

### Stacking Classifiers:

In [339]:
estimators = [
#      ('rf', RandomForestClassifier(n_estimators =1000,max_depth=None, random_state=0)),
     ('knn', KNeighborsClassifier(n_neighbors=15)),
     ('svm',SVC(gamma=0.1,C=0.2))
 ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
clf.fit(x_train, y_train)
f1_score(y_test,clf.predict(x_test),average='weighted')

0.9644732852324301

<hr>

### Majority Vote Classifier:

In [346]:
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
# clf2 = RandomForestClassifier(n_estimators=1000, random_state=1)
clf3 = KNeighborsClassifier(n_neighbors=15)
clf4 = SVC(gamma=0.1,C=0.2)

eclf1 = VotingClassifier(estimators=[('lr', clf1), ('gnb', clf3), ('rof', clf4)], voting='hard')
eclf1 = eclf1.fit(x_train, y_train)
f1_score(y_test,eclf1.predict(x_test),average='weighted')

0.9644732852324301