In [None]:
import numpy as np
import pandas as pd
import math
import time
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import random
from sklearn.neighbors import NearestNeighbors
hananya1=pd.read_csv('Hananya1.csv')
hananya2=pd.read_csv('Hananya2.csv') 
hashmal1=pd.read_csv('Hashmal1.csv')
hashmal2=pd.read_csv('Hashmal2.csv')







class KDTreeNode:
    def __init__(self, data, left=None, right=None, split_dim=None, split_val=None):
        self.data = data
        self.left = left
        self.right = right
        self.split_dim = split_dim
        self.split_val = split_val

class RandomizedKDTree:
    def __init__(self, leaf_size):
        self.root = None
        self.leaf_size = leaf_size

    def build_kd_tree(self, arr):
        self.root = self._randomized_kd_tree(arr)

    def _randomized_kd_tree(self, arr):
        if len(arr) <= self.leaf_size:
            arr=arr.astype(np.float64)
            return KDTreeNode(arr)
        
        n, m = arr.shape
        # Randomly select split dimension out of the 128 relevant features
        split_dim = np.random.randint(4,m-1)
        # Find the median value of the selected dimension
        median = np.median(arr[:, split_dim])
        #Split the data to 2 groups by the median
        median_idx = len(arr) // 2
        points_sorted = arr[np.argsort(arr[:, split_dim])]
        median_point = points_sorted[median_idx]
        left_points = points_sorted[:median_idx]
        right_points = points_sorted[median_idx+1:]
        
        
        left = self._randomized_kd_tree(left_points)
        right = self._randomized_kd_tree(right_points)

        return KDTreeNode(median, left, right, split_dim, median)

    def construct_tree_from_dataframe(self, dataframe):
        # Convert DataFrame to NumPy array 
        data = dataframe.to_numpy()
        # Call the _randomized_kd_tree function
        self.build_kd_tree(data)
    
    #By a given record\query point this function find the leaf in the tree that point belongs
                
    def find_leaf_node(self, point):
        current_node = self.root
        while current_node.left or current_node.right:
            if point[current_node.split_dim] < current_node.split_val:
                if current_node.left:
                    current_node = current_node.left
                else:
                    break
            else:
                if current_node.right:
                    current_node = current_node.right
                else:
                    break
        return current_node
    
class ANN:
    
    #The implementation of both RKDT and LSH algorithm 
    #L=number of trees\tables
    #leaf_size=the maximum amount of points the a leaf can has(RKDT)
    #num_cuts=the number of dimantions we use to split the data(LSH)
    
    def __init__(self,algorithm,L,leaf_size=None, num_cuts=None):
        self.algo=algorithm
        if self.algo=='RKDT':
            self.L = L
            self.leaf_size = leaf_size
            self.trees=[]
        if self.algo=='LSH':
            self.L=L
            self.num_cuts=num_cuts
            self.tables=[]
            
    #Data contains the relevant features (in our case the 128 features)
    #Target contains the label of the records(in our case x,y of every point)
    # initialize the self.target variable only for the classification
    def fit(self,X,y=None):
        self.data=X
        self.target=y
        #add the coloumn 'index' to the data-set to keep the record number of the original data set for every point
        self.data=self.data.reset_index()
        index_col = self.data.pop("index")  
        self.data["index"] = index_col
        #Create trees\tables to train the data-set
        if self.algo=='RKDT':
            for i in range(self.L):
                tree=RandomizedKDTree(self.leaf_size)
                tree.construct_tree_from_dataframe(self.data)
                self.trees.append(tree)
        if self.algo=='LSH':
            for i in range(self.L):
                lsh=LSH(self.num_cuts,self.data)
                self.tables.append(lsh)
    
    #By a given query_point this method provides th top-k nearest neighbores of the point by calculating the distances
    def find_k_neighbores(self,query_point,k):
        
        #set_tuples=Set that contains tuples((x,y),distance,index) of the k nearest points of every tree\table.
        # use SET to avoid adding the same point over and over again to the data-structure
        set_tuples=set()
        #sorted_tuples=Sorted List of the top k nearest points from ALL trees\tables 
        sorted_tuples=[]
        
        
        
        query_point=query_point.to_numpy().astype(float)
        if self.algo=='RKDT':
            for tree in self.trees:
                leaf=tree.find_leaf_node(query_point)
                self.find_closeset(set_tuples,leaf.data,query_point,k)

            sorted_tuples = sorted(set_tuples, key=lambda x: x[1])

            return sorted_tuples[:k]
        
        if self.algo=='LSH':
            for table in self.tables:
                cut=table.find_table(query_point)

                self.find_closeset(set_tuples,cut,query_point,k)
                
            #This "IF" deal with the case of empty cuts in every table. because of the fact that this is a rare situation we find the k nearest point by KNN algorithm     
            if len(set_tuples)==0:
                
                knn=KNN()
                knn.fit(self.data)
                query_to_data = pd.DataFrame(query_point[4:132])
                df_transposed = query_to_data.transpose()

                kneighbor=knn.kneighbors(df_transposed,k)
                # run all over the k nearest neighbors and extract from every point it's x,y,distance and index in orer to add it to the set
                for i in range(k):
                    y=kneighbor[0].iloc[i,0]
                    x=kneighbor[0].iloc[i,1]
                    dis=kneighbor[1][i]
                    index=kneighbor[0].iloc[i,2]
                    cor=(y,x)
                    knn_tuple=(cor,dis,index)

                    set_tuples.add(knn_tuple)
                
                
            sorted_tuples = sorted(set_tuples, key=lambda x: x[1])

            return sorted_tuples[:k]
                
    def find_closeset(self,set_tuples,group,query_point,k):

        df = pd.DataFrame(group)
        
        df=df.iloc[:,4:132]
        #If the cut of a table is empty
        if group is None:
            return
        
        #Calculates the the distances by the 128 rlevant features between the query point and each point of the cut\leaf (LSH\RDKT) by
        distances = np.linalg.norm(df.values - query_point[4:132], axis=1)
        if len(distances)>1:
            #the logic of the line"[:min(k,len(distances))]" is: if k is bigger than the amount of points in the cut we will return all the points if the cut.
            #otherwise we will choose the top k nearest points in the cut
            
            top_k_indices = np.argpartition(distances, min(k,len(distances)-1))[:min(k,len(distances))]
            top_k_distances = distances[top_k_indices]
            #Extract the x,y,distance and index and add it to the set
            for i in top_k_indices:
                point_y=group[i][0]
                point_x=group[i][1]
                point_index=group[i][132]
                tuple_point=((point_y,point_x),distances[i],point_index)
                set_tuples.add(tuple_point)
        if len(distances)==1:
            
            point_y=group[0][0]
            point_x=group[0][1]
            point_index=group[0][132]
            tuple_point=((point_y,point_x),distances[0],point_index)
            set_tuples.add(tuple_point)
            
    #Method  for the classification, which by a given query point it provides the picture of the point
    def predict(self,query_point,k):
        #Use this method to get the top-k nearest neighbors of the query-point
        nearest_list=self.find_k_neighbores(query_point,k)
        hananya=0
        hashmal=0
        #Run all over th top-k nearest point
        #Check the label of the neighbors and classify the query point by the majority
        for cur_tuple in nearest_list:
            #this is the main reson to save the index of the point befor - cur_tuple [2] provides the index of the point,and by this we can get the label of the point  
            if self.target.iloc[int(cur_tuple[2]),2]=='hananya':
                hananya+=1
                
            if self.target.iloc[int(cur_tuple[2]),2]=='hashmal':
                hashmal+=1
                
        if(hananya>hashmal):
            return 'hananya'
        else:
            return 'hashmal'
            
        
    def ratio(self,query_point):
        
        top2_closeset=self.find_k_neighbores(query_point,2)
        
        #Find_k_neigbors might return only 1 point in case we put k=2, because if the k is bigger than the amount of the points in the cut, we return all the points in cut only.
        #Based on this, the amount of points in cut could be 1 and then the method will return 1 point
        #this "if" deal with this case by using KNN to get the 2 nearest point
        if len(top2_closeset)==1:
            
            top2_closeset.clear()
            knn=KNN()
            knn.fit(self.data,self.data)
            knn_list=knn.kneighbors(query_point[4:132],2)
            
            top2_closeset.append(((knn_list[0].iloc[0,0],knn_list[0].iloc[0,1]),knn_list[1][0]))
            top2_closeset.append(((knn_list[0].iloc[1,0],knn_list[0].iloc[1,1]),knn_list[1][1]))
        
                       
        #calculating the ratio, and return it as well as the cordinate of the nearest point and the distance
        
        value1=top2_closeset[0][1]
        value2=top2_closeset[1][1]
        if(value1>value2):
            ratio=value2/value1
            nearest_dis=value2
            nearest_cordinates=top2_closeset[1][0]
        if(value2>value1):
            ratio=value1/value2
            nearest_dis=value1
            nearest_cordinates=top2_closeset[0][0]
        if(ratio<0.8):
            return nearest_dis,nearest_cordinates,ratio
        else:
            return nearest_dis,None,ratio


    


class KNN:
    def __init__(self):
        pass
    def fit(self,X,y=None):
        
        self.data=X.iloc[:,4:132]
       
        #this if reffers to the case we use this algorithm for classification. in this case we must keep the label of every point and therfor we return the 132 column which posses it. 
        #the else is used for every other use of the method besides classification in which we dont need to keep the labels of every point
        if X.shape[1]==134:
        
            self.target=X.iloc[:,[0,1,132]]
        else:
            self.target=X.iloc[:,[0,1]]
        
    #This method calculate the distance between the query point and all the points in our data in order to select the top k nearest. it return data-frame(target) and numpy-array(distances)    
    def kneighbors(self,new_record,k=1):
        distances = np.linalg.norm(self.data.values - new_record.values, axis=1)
        top_k_indices = np.argpartition(distances, k)[:k]
        top_k_distances = distances[top_k_indices]
        top_k_targets = self.target.iloc[top_k_indices]
           
        return top_k_targets, top_k_distances



#Instead of reuse knn.kneighbors which will provide the same answer always, we run it one time and keep the results in a list

def knn_time(data_set,data_test,k=1):
    knn=KNN()
    knn.fit(data_set)

    start=time.time()
    for i in range(len(data_test)):
        knnlist.append(knn.kneighbors(data_test.iloc[i,4:132],k))
    end=time.time()
    return ("knn",end-start)





#Method that calculates the error of a given ann and data-test by comparing it's results to the knn results

def error_check(ann,data_test):
    error=0
    for i in range(len(data_test)):
        current_knn_dis=float(knnlist[i][1])
        current_ann_dis=ann.find_k_neighbores(data_test.iloc[i,:],1)[0][1]
        error+=float(current_ann_dis/current_knn_dis-1)
    return(error/len(data_test))



#Generates values for RKDT and LSH and return the best tuple of(L,N) by chosing the one with least error
def generate_ln(algo,data_set,data_test):
    if algo=='RKDT':
        for l in range(1,4,2):
            for n in range(5,150,35):
                tupel_ln=(l,n)
                ann=ANN('RKDT',L=l,leaf_size=n)
                ann.fit(data_set)
                start=time.time()
                error=error_check(ann,data_test)
                end=time.time()
                tupel_error=(tupel_ln,error,end-start)
                error_ln_time_list.append(tupel_error)


        return min(error_ln_time_list, key=lambda x: x[1])
    
    if algo=='LSH':
        for l in range(3,6,2):
            for n in range(5,26,5):
                
                tupel_ln=(l,n)
                ann=ANN('LSH',L=l,num_cuts=n)
                ann.fit(data_set)
                start=time.time()
                error=error_check(ann,data_test)
                end=time.time()
                tupel_error=(tupel_ln,error,end-start)
                error_ln_time_list.append(tupel_error)
                
        return min(error_ln_time_list, key=lambda x: x[1])
    


#Use generate_ln method to get the best L,N in order to create a new ANN of this parameters
#Provides the top 10 points from the data-test which have the least ratio and thier nearest point from the data-set
def topk_ratio(data_set,data_test,tuple_ln,k=10,algo='LSH'):
    
    #tuple_ln=generate_ln(algo,data_set,data_test)[0]
    if algo=='LSH':
        ann=ANN(algo,L=tuple_ln[0],num_cuts=tuple_ln[1])
    else:
        ann=ANN(algo,L=tuple_ln[0],leaf_size=tuple_ln[1])
        
    ann.fit(data_set)
    list_tuple=[]
    for i in range(len(data_test)):
        ratio_tuple=ann.ratio(data_test.iloc[i,:])
        current_tuple=ratio_tuple+((data_test.iloc[i,0],data_test.iloc[i,1]),)
        list_tuple.append(current_tuple)
    sorted_list = sorted(list_tuple, key=lambda x: x[2])
    my_list=sorted_list[:k]
    
    return [(x[1], x[3]) for x in my_list]

#Use topk_ratio method to get the "best" points and mark them on the pictures

def mark_picture(algorithm, data_set, data_test,optimal_pair,image1, image2):
    
    points_list=topk_ratio(data_set,data_test,optimal_pair,10,algo=algorithm)
    img1 = Image.open(image1)
    img2=Image.open(image2)
    draw1 = ImageDraw.Draw(img1)
    draw2 = ImageDraw.Draw(img2)
    radius = 4
    for tuple_point in points_list:
        for i in range(10):
            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
            draw1.ellipse((tuple_point[0][1]-radius, tuple_point[0][0]-radius, tuple_point[0][1]+radius, tuple_point[0][0]+radius), fill=color)
            draw2.ellipse((tuple_point[1][1]-radius, tuple_point[1][0]-radius, tuple_point[1][1]+radius, tuple_point[1][0]+radius), fill=color)
    if len(data_set)>4000:
        img1.save("marked_hashmal1.jpg")
        img2.save("marked_hashmal2.jpg")
    else:
        img1.save("marked_hananya1.jpg")
        img2.save("marked_hananya2.jpg")
        
    



#Plot that represent the run-time per parameter
#IMPORTANT-in order to use the plot methods We need to run first the generate_ln method which create the list we use here.
def runtime_plot():
    x_list=[]
    y_list=[]
    x_list.append(knn_tuple[0])
    y_list.append(knn_tuple[1])
    for i in range(len(error_ln_time_list)):
        x_list.append(error_ln_time_list[i][0])
        y_list.append(error_ln_time_list[i][2])
    fig =plt.figure(figsize=(8, 6))
    plt.bar(range(len(x_list)), y_list,width=0.6)
    plt.xticks(range(len(x_list)), x_list)
    plt.xlabel('KNN&ANN Algorithms', fontsize=10)
    plt.ylabel('Run-Time(Seconds)', fontsize=10)
    plt.title('Comparation of run-time between KNN and ANN with different Hyper-Parameters (L,leaf size/num cuts - according to the chosen algorithm) ', fontsize=10)
    
    
    plt.show()


#Plot that represent the error rate per parameter
#IMPORTANT-in order to use the plot methods We need to run first the generate_ln method which create the list we use here.
def accuracy_plot():
    x_list=[]
    y_list=[]
    for i in range(len(error_ln_time_list)):
        x_list.append(error_ln_time_list[i][0])
        y_list.append(error_ln_time_list[i][1])
        
    fig =plt.figure(figsize=(8, 6))
    plt.bar(range(len(x_list)), y_list,width=0.6)
    plt.xticks(range(len(x_list)), x_list)
    plt.xlabel('ANN Algorithms', fontsize=10)
    plt.ylabel('Error rate', fontsize=10)
    plt.title('Comparation of error rates between ANN with different Hyper-Parameters (L,leaf size/num cuts - according to the chosen algorithm) ', fontsize=10)    
    
    plt.show()
    

leaf_size_list = [1,5,35,65, 95,125]

#By given query point and a ANN algorithm from sklearn library the method calculate the distance from the nearest point
def get_nearest_point(new_point,nbrs):
    new_point = np.array(new_point).reshape(1, -1)
    
    # Find the nearest neighbor of the new_point using the trained model
    distances, indices = nbrs.kneighbors(new_point)
    
    # Return the nearest point
    return distances

#Calculate the error for the sklearn model
def error_cal(nbrs,data_test):
    error=0
    for i in range(len(data_test)):
        current_knn_dis=float(knnlist[i][1])
        current_nbrs=get_nearest_point(data_test.iloc[i,4:132].values.reshape(1, -1),nbrs)[0][0]
        error+=float(current_nbrs/current_knn_dis-1)
    return(error/len(data_test))

#Examine the error and run time of the ANN algorithm of sklearn with different parameters and return the one with the least error.
#If the errors equal the method return the one with runs faster
def grid_search(data_set,data_test):
    
    min_tuple=()
    data_set=data_set.iloc[:,4:132]
    for size in leaf_size_list:
        cur_leaf_size=size
        nbrs = NearestNeighbors(n_neighbors=1,leaf_size=cur_leaf_size, algorithm='kd_tree',metric='minkowski',p=2, metric_params=None, n_jobs=None)
        nbrs.fit(data_set.values)
        start=time.time()
        cur_error=error_cal(nbrs,data_test)
        
        end=time.time()
        cur_time=end-start
        cur_tuple=(cur_error,cur_time,cur_leaf_size)
        if len(min_tuple)==0:
            min_tuple=cur_tuple
        if(cur_tuple[0]<min_tuple[0]):
            min_tuple=cur_tuple
        if(cur_tuple[0]==min_tuple[0]):
            if(cur_tuple[1]<min_tuple[1]):
                min_tuple=cur_tuple
    return min_tuple



#Comparation of error rate between sklearn ANN and My ANN with the best hyper-parameter(L,Leaf size)
def sklear_accuracy_comp(data_set,data_test,optimal_pair):
    x_list=[]
    y_list=[]
    sklearn_name='sklearn with \n leaf size: '+str(grid_search(data_set,data_test)[2])
    x_list.append(sklearn_name)
    x_list.append(optimal_pair[0])
    y_list.append(grid_search(data_set,data_test)[0])
    y_list.append(optimal_pair[1])
    
    fig =plt.figure(figsize=(5, 5))
    plt.bar(range(len(x_list)), y_list,width=0.3)
    plt.xticks(range(len(x_list)), x_list)
    plt.xlabel('Sklearn and ANN algorithms', fontsize=10)
    plt.ylabel('Error rate', fontsize=10)
    plt.title('Comparation of error rate between sklearn ANN and My ANN with the best hyper-parameter(L,Leaf size) ', fontsize=10)
    

    
    plt.show()
#Comparation of run time between sklearn ANN and My ANN with the best hyper-parameter(L,Leaf size)

def sklearn_runtime_comp(data_set,data_test,optimal_pair):
    
    x_list=[]
    y_list=[]
    sklearn_name='sklearn with \n leaf size: '+str(grid_search(data_set,data_test)[2])
    x_list.append(sklearn_name)
    x_list.append(optimal_pair[0])
    y_list.append(grid_search(data_set,data_test)[1])
    y_list.append(optimal_pair[2])
    
    fig =plt.figure(figsize=(5, 5))
    plt.bar(range(len(x_list)), y_list,width=0.3)
    plt.xticks(range(len(x_list)), x_list)
    plt.xlabel('Sklearn and ANN algorithms', fontsize=10)
    plt.ylabel('Run time (seconds)', fontsize=10)
    plt.title('Comparation of run time between sklearn ANN and My ANN with the best hyper-parameter(L,Leaf size) ', fontsize=10)
    

    
    plt.show()
    
#Add a label column to the data-set and data-test which posses a string with the name of the picture they belong to
#Combine the 2 data-sets to one.(same for data-tests)
def combine(df1,df2):

    df1['Label'] = 'hananya'
    df2['Label']='hashmal'
    combined = pd.concat([df1, df2])
    return combined
    

#For each point in the combined data-test, the method associate a label according to the majority of the nearest points in the data-set
def classification(algo,L,N,k,data_set1,data_set2,data_test1,data_test2):
    
    combined=combine(data_set1.copy(),data_set2.copy())
    combined2=combine(data_test1.copy(),data_test2.copy())
    if algo=='RKDT':
        ann=ANN('RKDT',L=L,leaf_size=N)
        ann.fit(combined.iloc[:,0:132],combined.iloc[:,[0,1,132]])
    if algo=='LSH':
        ann=ANN('LSH',L=L,num_cuts=N)
        ann.fit(combined.iloc[:,0:132],combined.iloc[:,[0,1,132]])
    correct=0
    
    for i in range(len(combined2)):
        
        if ann.predict(combined2.iloc[i,0:132],k)==combined2.iloc[i,132]:
            correct+=1
            
    return correct/len(combined2)

#num_cuts=the number of dimantions we use to split the data(LSH)
#cuts_dict=Dictionary which posses a key as a hash-vector and value as a list of the points which have the same hash-vector
#list_featues=contains tuples of (split dimention,random value)
class LSH:
    def __init__(self,num_cuts,data):
        self.num_cuts=num_cuts
        self.cuts_dict={}
        self.data=data
        self.list_features=[]
        self.build_LSH()
        

    def build_LSH(self):
        #Adding a column to the data-set that contains the hash vector(for every point)
        self.data['Hash Vector']= '0' * self.num_cuts
        arr=self.data.to_numpy()
        n, m = arr.shape
        list_featurs=[]
        for i in range(self.num_cuts):
            #Chose a random dimention, and a random value in the range of the values in this chosen dimention
            split_dim = np.random.randint(4,m-2)
            max_value = np.max(arr[:, split_dim]) 
            random_value = np.random.uniform(low=0.0, high=max_value)
            tuple_split=(split_dim,random_value)
            self.list_features.append(tuple_split)
        #For each point we calculate its hash-vector and put it in as a key in the dictionary
        for point in arr:
            point[m-1]=self.point_hash(point)
            
        #Add every point to the list of the points which have the same hash-vector        
        for point in arr:
            if point[m-1] in self.cuts_dict:
                self.cuts_dict[point[m-1]].append(point)
            else:
                self.cuts_dict[point[m-1]]=[]
                self.cuts_dict[point[m-1]].append(point)
                
        for key,value in self.cuts_dict.items():
            self.cuts_dict[key]=np.stack(self.cuts_dict[key])
            
            self.cuts_dict[key]=self.cuts_dict[key].astype(np.float64)
            
    #Method which gets a point and by the selected dimention and selected value provide a hash-vector for the point
    #The first split is the first integer in the hash-vector and so in going on through all the dimentions
    #if the current point has a bigger value than the random chosen value for the spesific dimention, the hash get '0', otherwise it gets 1
    
    def point_hash(self,point):
        
        hash_string=[]
        for tuple_split in self.list_features:
            if point[tuple_split[0]]<tuple_split[1]:
                hash_string.append('0')
            else:
                hash_string.append('1')
            
        return ''.join(hash_string)
    #By a given point this method finds it's hash-vector and by that, it finds the points which have the same hash-vector
    def find_table(self,query):
        
        hash_string=self.point_hash(query)
        cut=self.cuts_dict.get(hash_string)
        
        return cut
    
print('#----------------------For hananyas data-set------------------------#')

#Part B.3:
#knnlist keeps for each point in the data-test the nearest point in the data-set and the distance between them        
knnlist=[]
#knn_time return the time of the k-neighbors of the knn algorithm, and add to knnlist objects as described above

knn_tuple=knn_time(hananya1,hananya2)
print('Created a KNN list complited in '+str(knn_tuple[1])+' seconds')

print('#----ALL OF THIS IS FOR RKDT----------#')
#----ALL OF THIS IS FOR RKDT----------#
#Part C.4:
#This list contains all (chosen (L,N), thier error, their run-time)
error_ln_time_list=[]
#Generates values for RKDT and LSH and return the best tuple of(L,N) by chosing the one with least error
optimal_pair=generate_ln('RKDT',hananya1,hananya2)
print('The error and run-time of every pair of (L,N): ')
print(error_ln_time_list)
print('The pair with the optimal error: '+str(optimal_pair))


#Part C.5:
mark_picture('RKDT',hananya1,hananya2,optimal_pair[0],'Hananya1.JPG','Hananya2.JPG')
print('marked images created succesfully')
#Part C.6:
runtime_plot()
accuracy_plot()

#Part D:

sklearn_runtime_comp(hananya1,hananya2,optimal_pair)
sklear_accuracy_comp(hananya1,hananya2,optimal_pair)

#Part E:
start=time.time()
prediction_rate=classification('RKDT',5,150,115,hananya1,hashmal1,hananya2,hashmal2)
print('Classifiaction prediction rate:'+str(prediction_rate))
end=time.time()
print(end-start)




#----ALL OF THIS IS FOR LSH----------#
print('#----ALL OF THIS IS FOR LSH----------#')
#Part C.4:
#This list contains all (chosen (L,N), thier error, their run-time)
error_ln_time_list=[]
#Generates values for RKDT and LSH and return the best tuple of(L,N) by chosing the one with least error
optimal_pair=generate_ln('LSH',hananya1,hananya2)
print('The error and run-time of every pair of (L,N): ')
print(error_ln_time_list)
print('The pair with the optimal error: '+str(optimal_pair))


#Part C.5:
mark_picture('LSH',hananya1,hananya2,optimal_pair[0],'Hananya1.JPG','Hananya2.JPG')
print('marked images created succesfully')
#Part C.6:
runtime_plot()
accuracy_plot()

#Part D:

sklearn_runtime_comp(hananya1,hananya2,optimal_pair)
sklear_accuracy_comp(hananya1,hananya2,optimal_pair)

#Part E:
start=time.time()
prediction_rate=classification('LSH',5,20,70,hananya1,hashmal1,hananya2,hashmal2)

print('Classifiaction prediction rate:'+str(prediction_rate))
end=time.time()
print(end-start)
    
    
print('#----------------------For Hashmals data-set------------------------#')
#Part B.3:
#knnlist keeps for each point in the data-test the nearest point in the data-set and the distance between them        
knnlist=[]
#knn_time return the time of the k-neighbors of the knn algorithm, and add to knnlist objects as described above
knn_tuple=knn_time(hashmal1,hashmal2)
print('Created a KNN list complited in '+str(knn_tuple[1])+' seconds')

print('#----ALL OF THIS IS FOR RKDT----------#')
#----ALL OF THIS IS FOR RKDT----------#
#Part C.4:
#This list contains all (chosen (L,N), thier error, their run-time)
error_ln_time_list=[]
#Generates values for RKDT and LSH and return the best tuple of(L,N) by chosing the one with least error
optimal_pair=generate_ln('RKDT',hashmal1,hashmal2)
print('The error and run-time of every pair of (L,N): ')
print(error_ln_time_list)
print('The pair with the optimal error: '+str(optimal_pair))


#Part C.5:
mark_picture('RKDT',hashmal1,hashmal2,optimal_pair[0],'Hashmal1.jpg','Hashmal2.jpg')
print('marked images created succesfully')
#Part C.6:
runtime_plot()
accuracy_plot()

#Part D:

sklearn_runtime_comp(hashmal1,hashmal2,optimal_pair)
sklear_accuracy_comp(hashmal1,hashmal2,optimal_pair)






#----ALL OF THIS IS FOR LSH----------#
print('#----ALL OF THIS IS FOR LSH----------#')
#Part C.4:
#This list contains all (chosen (L,N), thier error, their run-time)
error_ln_time_list=[]
#Generates values for RKDT and LSH and return the best tuple of(L,N) by chosing the one with least error
optimal_pair=generate_ln('LSH',hashmal1,hashmal2)
print('The error and run-time of every pair of (L,N): ')
print(error_ln_time_list)
print('The pair with the optimal error: '+str(optimal_pair))


#Part C.5:
mark_picture('LSH',hashmal1,hashmal2,optimal_pair[0],'Hashmal1.jpg','Hashmal2.jpg')
print('marked images created succesfully')
#Part C.6:
runtime_plot()
accuracy_plot()

#Part D:

sklearn_runtime_comp(hashmal1,hashmal2,optimal_pair)
sklear_accuracy_comp(hashmal1,hashmal2,optimal_pair)

 

    
