In [5]:
import numpy as np
import pandas as pd
import seaborn as sns

In [8]:
class GaussianNaiveBayes:
    def fit(self, X, y):
        classes, cls_counts = np.unique(y, return_counts=True)
        n_classes = len(classes)
        self.priors = cls_counts / len(y)

        # calculate the mean and standard deviations of features by classes
        self.X_cls_mean = np.array([np.mean(X[y == c], axis=0) for c in range(n_classes)])
        self.X_stds = np.array([np.std(X[y == c], axis=0) for c in range(n_classes)])

    # calculate the probability density of the feature according to the Gaussian distribution
    def pdf(self, x, mean, std):
        return (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-0.5 * ((x - mean) / std) ** 2)

    def predict(self, X):
        pdfs = np.array([self.pdf(x, self.X_cls_mean, self.X_stds) for x in X])
        posteriors = self.priors * np.prod(pdfs, axis=2)   # shorten Bayes formula

        return np.argmax(posteriors, axis=1)

In [9]:
from sklearn.model_selection import train_test_split

df = pd.read_csv('Datasets/AIS2.csv')
X = df.drop(columns='price_range')
y = df['price_range']

X_np = np.array(X)
y_np = np.array(y)

# Разделение данных на обучающую и тестовую выборки.
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X_np, y_np, test_size=test_size, random_state=42)

In [10]:
from sklearn.metrics import accuracy_score

gn = GaussianNaiveBayes()
gn.fit(X_train, y_train)
predictions = gn.predict(X_test)

accuracy_score(y_test, predictions)

0.7975