In [12]:
import numpy as np

In [13]:
def fit(X_train,Y_train):                       # creating a dictionary 
    result = {}
    class_values = set(Y_train)
    for current_class in class_values:
        result[current_class]={}
        result["total_data"] = len(Y_train)
        current_class_rows = (Y_train == current_class)
        X_train_current = X_train[current_class_rows]
        Y_train_current = Y_train[current_class_rows]
        num_features = X_train.shape[1]
        result[current_class]["total_count"] = len(Y_train_current)
        for j in range (1,num_features+1):                                           # j = current_feature
            result[current_class][j]={}
            feature_values = set(X_train[:,j-1])
            for current_feature_value in feature_values:
                result[current_class][j][current_feature_value] = (X_train_current[:,j-1] == current_feature_value).sum() 
        
        return result
            
        

In [14]:
def probability(dictionary,x,current_class):       # finding probability for each class 
    output = np.log(dictionary[current_class]["total_count"]) - np.log(dictionary["total_data"])
    num_features = len(dictionary[current_class].keys())-1
    for j in range(1,num_features+1):
        xj = x[j-1]
        count_current_class_with_value_xj = dictionary[current_class][j][xj]+1    # laplace correction
        count_current_class = dictionary[current_class]["total_count"] + len(dictionary[current_class][j].keys())    #laplace correction
        current_xj_probability =  np.log(count_current_class_with_value_xj) - np.log(count_current_class)
        output = output + current_xj_probability
    return output
        
        

In [15]:
def predictSinglePoint(dictionary,x):                            # iterating through the classes 
    classes = dictionary.keys()
    best_p = -1000
    best_class = -1
    first_run = True
    for current_class in classes:
        if(current_class == "total_data"):
            continue
        current_p = probability(dictionary,x,current_class)
        if(first_run or current_p>best_p):
            best_p = current_p
            best_class = current_class
        first_run = False
    return best_class 
            
            
       
   
    

In [16]:
def predict(dictionary,X_test):                                  # creating a Y_pred array and appending the result to it by iterating through X_test 
    Y_pred = []
    for x in X_test:
        x_class = predictSinglePoint(dictionary,x)
        Y_pred.append(x_class)
    return Y_pred
   
        
        
    
    
     
    

In [17]:
def makeLabelled(column):
    second_limit = column.mean()
    print(second_limit)
    first_limit = 0.5*second_limit
    third_limit = 1.5*second_limit
    for j in range(0,len(column)):
        
        if(column[j]<first_limit):
            column[j] = 0
            
        elif(column[j]<second_limit):
            column[j] = 1
            
        elif(column[j]<third_limit):
            column[j] = 2
            
        else:
            column[j] = 3

    return column
    
    
        
   

In [18]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data 
Y = iris.target


Y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [19]:
for i in range(0,X.shape[-1]):
    X[:,i] = makeLabelled(X[:,i])
    
    


5.843333333333334
3.0573333333333337
3.7580000000000005
1.1993333333333336


In [20]:
from sklearn import model_selection
X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X,Y,test_size = 0.25,random_state = 0)


In [21]:
dictionary = fit(X_train,Y_train)


In [22]:
dictionary

{0: {'total_count': 37,
  1: {1.0: 37, 2.0: 0},
  2: {1.0: 6, 2.0: 31},
  3: {0.0: 37, 1.0: 0, 2.0: 0, 3.0: 0},
  4: {0.0: 36, 1.0: 1, 2.0: 0, 3.0: 0}},
 'total_data': 112}

In [23]:
Y_pred = predict(dictionary,X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
print(classification_report(Y_test,Y_pred,zero_division='warn'))
print(confusion_matrix(Y_test,Y_pred))