In [10]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [11]:
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Overcast', 'Sunny', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}
df = pd.DataFrame(data)
X = df[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y = df['PlayTennis']

Unnamed: 0,Outlook,Temperature,Humidity,Wind,PlayTennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Overcast,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [12]:
X = df[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y = df['PlayTennis']

In [13]:
class NaiveClassifier:
    def __init__(self):
        self.prior = {}
        self.conditional = {}
        self.feature_types = {}

    def fit(self, X: pd.DataFrame, y: pd.DataFrame):
        self.prior = y.value_counts(normalize=True).to_dict()
        

        for feature in X.columns:
            self.feature_types[feature] = 'continuous' if X[feature].dtype in ['int64', 'int16','int8','float64', 'float32'] else 'categorical' 
            self.conditional[feature] = {}
            
            if self.feature_types[feature] == 'categorical':
                unique_value_of_feat = X[feature].unique()
                for item in unique_value_of_feat:
                    self.conditional[feature][item] = {}
                    for class_label in y.unique():
                        self.conditional[feature][item][class_label] = X[y == class_label][feature].value_counts(normalize=True).to_dict()
            else:
                for class_label in y.unique():
                    class_data = X[y == class_label][feature]
                    self.conditional[feature][class_label] = {
                        'mean': class_data.mean(),
                        'std': max(class_data.std(), 1e-6)
                    }
        
    def predict(self, x):
        probabilities = {}
        for class_label in self.prior.keys():
            prob = self.prior[class_label]
            for feature, value in x.items():
                if self.feature_types[feature] == 'categorical':
                    prob *= self.conditional[feature][value][class_label].get(value, 00000.1)
                else:  # continuous feature
                    mean = self.conditional[feature][class_label]['mean']
                    std = self.conditional[feature][class_label]['std']
                    prob *= norm.pdf(value, mean, std)
                
            
            probabilities[class_label] = prob
        
        return probabilities

                

In [14]:
X['Outlook'].unique()

array(['Sunny', 'Overcast', 'Rain'], dtype=object)

In [15]:
X = df[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y = df['PlayTennis']

In [16]:
clf = NaiveClassifier()
clf.fit(X, y)


In [17]:
X_test = {
    'Outlook': 'Sunny',
    'Temperature': 'Cool',
    'Humidity': 'High',
    'Wind': 'Strong'
}
clf.predict(X_test)

{'Yes': 0.002777777777777777, 'No': 0.018750000000000003}

# Multi-class

In [18]:
weather_class_features = {
    "Day": ["Weekday", "Weekday", "Weekday", "Holiday", "Saturday", "Weekday", "Holiday", "Sunday", 
            "Weekday", "Weekday", "Saturday", "Weekday", "Weekday", "Weekday", "Weekday", "Saturday", 
            "Weekday", "Holiday", "Weekday", "Weekday"],
    "Season": ["Spring", "Winter", "Winter", "Winter", "Summer", "Autumn", "Summer", "Summer", 
               "Winter", "Summer", "Spring", "Summer", "Winter", "Summer", "Winter", "Autumn", 
               "Autumn", "Spring", "Spring", "Spring"],
    "Fog": ["None", "None", "None", "High", "Normal", "Normal", "High", "Normal", 
            "High", "None", "High", "High", "Normal", "High", "Normal", "High", 
            "None", "Normal", "Normal", "Normal"],
    "Rain": ["None", "Slight", "None", "Slight", "None", "None", "Slight", "None", 
             "Heavy", "Slight", "Heavy", "Slight", "None", "None", "Heavy", "Slight", 
             "Heavy", "Slight", "None", "Heavy"],
    "Class": ["On Time", "On Time", "On Time", "Late", "On Time", "Very Late", "On Time", "On Time", 
              "Very Late", "On Time", "Cancelled", "On Time", "Late", "On Time", "Very Late", "On Time", 
              "On Time", "On Time", "On Time", "On Time"]
}
df_2 = pd.DataFrame(weather_class_features)

In [19]:
X = df_2[['Day', 'Season', 'Fog', 'Rain']]
y = df_2['Class']

In [20]:
clf_2 = NaiveClassifier()
clf_2.fit(X, y)


In [21]:
clf_2.prior

{'On Time': 0.7, 'Very Late': 0.15, 'Late': 0.1, 'Cancelled': 0.05}

In [22]:
X_test = {
    'Day': 'Weekday', 
    'Season': 'Winter',
    'Fog': 'High', 
    'Rain': 'Heavy'

}

In [23]:
clf_2.predict(X_test)

{'On Time': 0.0026239067055393583,
 'Very Late': 0.022222222222222216,
 'Late': 0.0025000000000000005,
 'Cancelled': 0.0005000000000000001}

# Iris

In [24]:
data = {
    "Length": [1.4, 1.0, 1.3, 1.9, 2.0, 1.8, 3.0, 3.8, 4.1, 3.9, 4.2, 3.4],
    "Class": [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
}
df_iris = pd.DataFrame(data)

In [25]:
X = df_iris[['Length']]
y = df_iris['Class']
X

Unnamed: 0,Length
0,1.4
1,1.0
2,1.3
3,1.9
4,2.0
5,1.8
6,3.0
7,3.8
8,4.1
9,3.9


In [26]:
clf_iris = NaiveClassifier()
clf_iris.fit(X, y)

In [27]:
clf_iris.conditional['Length'][1]

{'mean': np.float64(3.733333333333333), 'std': np.float64(0.4546060565661952)}

In [28]:
X_test = {
    "Length" : 3.4
}

In [29]:
clf_iris.predict(X_test)

{0: np.float64(9.68913769105268e-06), 1: np.float64(0.3353505510396091)}

In [33]:
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Overcast', 'Sunny', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}
df = pd.DataFrame(data)
X = df[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y = df['PlayTennis']

In [34]:
clstmp = NaiveClassifier()
clstmp.fit(X, y)

In [35]:
X_test = {
    'Outlook': 'Sunny', 
    'Temperature': 'Cool',
    "Humidity": 'High', 
    'Wind': 'Strong', 
}
clstmp.predict(X_test)

{'Yes': 0.002777777777777777, 'No': 0.018750000000000003}

In [42]:
clstmp.conditional['Outlook']['Sunny']['No']

{'Sunny': 0.5, 'Rain': 0.25, 'Overcast': 0.25}