In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

In [3]:
df = pd.read_csv('Salary_dataset.csv')
df.drop(columns='Unnamed: 0', inplace=True)

from sklearn.model_selection import train_test_split

X = df['YearsExperience']
y = df['Salary']
X_train, X_test, y_train, y_test  = train_test_split(X, y) 

In [None]:
class GaussianNB():
    def fit(self, X_train, y_train):
        self.classes = np.unique(y_train)
        self.class_probs = np.zeros(len(self.classes))
        self.feature_probs = []
        
        for i , cls in enumerate(self.classes):
            X_cls = X_train[y_train == cls]
            self.class_probs[i] = len(X_cls) / len(X_train)
            self.feature_probs.append([(np.mean(X_cls[:, j]), np.std(X_cls[:, j])) for j in range(X_train.shape[1])])
            
            
    def _gaussian_prob(self, X_train, mean, std):
        return np.exp(-((X_train - mean)**2) / (2 * std**2)) / (np.sqrt(2 * np.pi) * std)
        
    def predict(self, X_test):
        preds = []
        for x in X_test:
            class_probs = []
            for i, cls in enumerate(self.classes):
                likelihood = 0
                for j, feature in enumerate(x):
                    mean, std = self.feature_probs[i][j]
                    if std == 0:  # Handling zero standard deviation
                        std = 1e-9  # Adding a small epsilon
                    likelihood += np.log(self._gaussian_prob(feature, mean, std))
                class_probs.append(likelihood + np.log(self.class_probs[i]))  # Taking log probabilities
            preds.append(self.classes[np.argmax(class_probs)])
        return preds

In [None]:
class MultinomialNB():
    def __init__(self, alpha=1.0):
        self.alpha = alpha
    
    def fit(self, X_train, y_train):
        self.classes = np.unique(y_train)
        self.class_counts = np.zeros(len(self.classes))
        self.feature_counts = np.zeros((len(self.classes), X_train.shape[1]))
        self.class_probs = np.zeros(len(self.classes))
        
        for i, cls in enumerate(self.classes):
            X_cls = X_train[y_train == cls]
            self.class_counts[i] = len(X_cls)
            self.feature_counts[i] = np.sum(X_cls, axis=0)
            
        self.class_probs = (self.class_counts + self.alpha) / (len(y_train) + self.alpha * len(self.classes))
        
        
    def _multinomial_prob(self, X_train, class_index):
        return np.sum(np.log((self.feature_counts[class_index] + self.alpha) / (self.class_counts[class_index] + self.alpha * X_train.shape[1])) * X_train)
    
    def predict(self, X_test):
        preds = []
        for x in X_test:
            class_probs = []
            for i, cls in enumerate(self.classes):
                class_prob = np.log(self.class_probs[i])
                feature_prob = self._multinomial_prob(x, i)
                class_probs.append(class_prob + feature_prob)
            preds.append(self.classes[np.argmax(class_probs)])
        return preds