In [1]:
from embeddedML import NaiveBayesFast
from embeddedML import Preprocessing
from embeddedML import Metrics

In [2]:
import pandas as pd
import numpy as np

In [3]:
# The objects are created using classes imported from the embeddedML library.
model=NaiveBayesFast()
preprocesser=Preprocessing()
metrics=Metrics()

In [4]:
data=pd.read_csv("diabetes2.csv")
data_dummies=pd.get_dummies(data,drop_first=True)

In [5]:
X=data_dummies.loc[:,data_dummies.columns!="Outcome"].values.astype(float)
y=data_dummies.loc[:,data_dummies.columns=="Outcome"].values.astype(float)

In [6]:
# The dataset are divided with the train_val_split function of the preprocessor object.
X_train,X_val,y_train,y_val=preprocesser.train_val_split(X,y,train_rate=0.8,is_shuffle=True)

In [8]:
# The train and validation datasets are optimized in terms of memory consumption with the type_function function of the preprocessor object.
X_train,y_train=preprocesser.type_function(X_train,y_train)
X_val,y_val=preprocesser.type_function(X_val,y_val)

In [9]:
# The train and validation datasets are normalized with the standard_scaler function of the preprocessor object.
X_train = preprocesser.standard_scaler(X_train, fast=True)
X_val = preprocesser.standard_scaler(X_val, fast=True)

In [13]:
# The labels are flattened and reshaped from 2D to 1D.
y_train = y_train.flatten()

In [14]:
# Model training and prediction are performed with the object produced by the NaiveBayesFast class.
model.train(X_train,y_train)

In [15]:
# The labels are flattened and reshaped from 2D to 1D.
y_val = y_val.flatten()

In [16]:
y_pred = model.predict(X_val)

In [17]:
print(f"Accuracy : {metrics.accuracy(y_val, y_pred):.2f}%")  
print(f"Precision: {metrics.precision(y_val, y_pred):.4f}")     
print(f"Recall   : {metrics.recall(y_val, y_pred):.4f}")        
print(f"F1 Score : {metrics.f1_score(y_val, y_pred):.4f}") 

Accuracy : 75.97%
Precision: 0.6032
Recall   : 0.7600
F1 Score : 0.6726
