In [89]:
import pandas as pd
from ucimlrepo import fetch_ucirepo
from typing import List, Dict
import numpy as np

In [217]:
iris_dataset = fetch_ucirepo(id=53)
X_iris = iris_dataset.data.features
y_iris = iris_dataset.data.targets['class']

In [144]:
dataset = np.array([[3.393533211,2.331273381,0],
	[3.110073483,1.781539638,0],
	[1.343808831,3.368360954,0],
	[3.582294042,4.67917911,0],
	[2.280362439,2.866990263,0],
	[7.423436942,4.696522875,1],
	[5.745051997,3.533989803,1],
	[9.172168622,2.511101045,1],
	[7.792783481,3.424088941,1],
	[7.939820817,0.791637231,1]])
dataset_df = pd.DataFrame(dataset, columns=['x1', 'x2', 'class'])
X_train = dataset_df.iloc[:, :-1]
y_train = dataset_df['class']

In [201]:
def get_parameters(X_train: pd.DataFrame, y_train: pd.Series):
    """Calculates the mean and the standard deviation of the features per class.

    Returns:
        Dict[str, Dict[str, Dict[str, float]]]: Mean and standard deviation of the features per class.
    """
    parameters = {}
    for class_ in np.unique(y_train):
        parameters[class_] = {
            'apriori': len(y_train[y_train == class_])/len(y_train)
        }

        for feature in X_train.columns:
            parameters[class_][feature] = {}
            parameters[class_][feature]['mean'] = X_train[y_train == class_][feature].mean()
            parameters[class_][feature]['std'] = X_train[y_train == class_][feature].std()

    return parameters

In [204]:
parameters = get_parameters(X_train=X_train, y_train=y_train)
parameters

{0.0: {'apriori': 0.5,
  'x1': {'mean': 2.7420144012, 'std': 0.9265683289298018},
  'x2': {'mean': 3.0054686692, 'std': 1.1073295894898725}},
 1.0: {'apriori': 0.5,
  'x1': {'mean': 7.6146523718, 'std': 1.2344321550313704},
  'x2': {'mean': 2.9914679790000003, 'std': 1.4541931384601618}}}

In [205]:
def _gauss_likelihood(feature: pd.Series, mean: float, std: float) -> pd.Series:
    """Calculates the likelihood of a feature given a mean and a standard deviation.

    Args:
        feature (pd.Series): Feature to calculate the likelihood.
        mean (float): Mean of the values of the feature per class.
        std (float): Standard deviation of the values of the feature per class.

    Returns:
        pd.Series: Likelihood of the feature given the mean and the standard deviation.
    """
    exponent = np.exp(-1/2*((feature - mean)/std)**2)
    function = 1/(std*np.sqrt(2*np.pi))*exponent
    return function

In [211]:
def fit(X_train: pd.DataFrame, y_train: pd.Series) -> Dict[str, Dict[str, Dict[str, float]]]:
    """Calculates the mean and the standard deviation of the features per class.

    Returns:
        Dict[str, Dict[str, Dict[str, float]]]: Mean and standard deviation of the features per class.
    """
    parameters = get_parameters(X_train=X_train, y_train=y_train)
    probabilities = {}
    for class_ in np.unique(y_train):
        likelihood = 1
        for feat in X_train.columns:
            likelihood*=_gauss_likelihood(X_train[feat], parameters[class_][feat]['mean'], parameters[class_][feat]['std'])
        probabilities[class_] = likelihood*parameters[class_]['apriori']
    return probabilities
        

In [219]:
probabilities = fit(X_train=X_iris, y_train=y_iris)

In [222]:
pd.DataFrame(probabilities)

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
0,2.736783e+00,8.322426e-18,6.008423e-25
1,1.519728e+00,4.716457e-17,1.032493e-24
2,1.157609e+00,2.753295e-18,8.422280e-26
3,1.105612e+00,3.332872e-17,9.435992e-25
4,2.589006e+00,2.702798e-18,2.360722e-25
...,...,...,...
145,5.228938e-186,1.046795e-07,1.322454e-01
146,2.237519e-146,1.294745e-03,4.543724e-02
147,2.742285e-163,9.585593e-05,2.178377e-01
148,2.727127e-194,1.866789e-08,5.516259e-02


P(class=0|X1,X2) = P(X1|class=0) * P(X2|class=0) * P(class=0)

P(class=1|X1,X2) = P(X1|class=1) * P(X2|class=1) * P(class=1)