In [78]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import BernoulliNB,GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder,LabelEncoder,OrdinalEncoder
from sklearn.metrics import accuracy_score,classification_report
from ucimlrepo import fetch_ucirepo
from sklearn.compose import make_column_transformer,make_column_selector
from sklearn.pipeline import Pipeline

<h1 style = 'color:orange'>Discrete Naive Bayes </h1>

In [11]:
car_evaluation = fetch_ucirepo(id=19) 
  
X = car_evaluation.data.features 
y = car_evaluation.data.targets 

ohe = OneHotEncoder(sparse_output = False,drop='first').set_output(transform = 'pandas')

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,stratify=y,random_state=25)


X_train_ohe = ohe.fit_transform(X_train)
X_test_ohe = ohe.transform(X_test)

nb = BernoulliNB()

nb.fit(X_train_ohe,y_train)
y_pred = nb.predict(X_test_ohe)
print(classification_report(y_test,y_pred))
# accuracy_score(y_test,y_pred)

              precision    recall  f1-score   support

         acc       0.56      0.46      0.51       115
        good       0.36      0.19      0.25        21
       unacc       0.84      0.94      0.89       363
       vgood       0.60      0.15      0.24        20

    accuracy                           0.78       519
   macro avg       0.59      0.44      0.47       519
weighted avg       0.75      0.78      0.75       519



  y = column_or_1d(y, warn=True)


<h1 style = 'color:orange'>Cancer Analysis</h1>

In [38]:
cancer = pd.read_csv('../Cases/Cancer/Cancer.csv')

ohe = OneHotEncoder(drop= 'first',sparse_output=False).set_output(transform='pandas')

X = cancer.drop(['Class','subjid'],axis = 1)
y = cancer['Class']
print(X.info())
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42,stratify=y)

X_train_ohe = ohe.fit_transform(X_train)
X_test_ohe = ohe.transform(X_test)

nb = BernoulliNB()

nb.fit(X_train_ohe,y_train)
y_pred = nb.predict(X_test_ohe)
print(classification_report(y_test,y_pred))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 286 entries, 0 to 285
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   age          286 non-null    object
 1   menopause    286 non-null    object
 2   tumor-size   286 non-null    object
 3   inv-nodes    286 non-null    object
 4   node-caps    278 non-null    object
 5   deg-malig    286 non-null    object
 6   breast       286 non-null    object
 7   breast-quad  285 non-null    object
 8   irradiat     286 non-null    object
dtypes: object(9)
memory usage: 20.2+ KB
None
                      precision    recall  f1-score   support

no-recurrence-events       0.76      0.88      0.82        60
   recurrence-events       0.56      0.35      0.43        26

            accuracy                           0.72        86
           macro avg       0.66      0.61      0.62        86
        weighted avg       0.70      0.72      0.70        86



<h1 style = 'color:orange'>Kernel Naive Bayes</h1>

In [35]:
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')
sonar['Class'].value_counts()

X = sonar.drop("Class",axis =1)
y = sonar['Class']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=25,stratify=y)

nb = GaussianNB()
nb.fit(X_train,y_train)
y_pred = nb.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           M       0.75      0.80      0.77        45
           R       0.75      0.69      0.72        39

    accuracy                           0.75        84
   macro avg       0.75      0.75      0.75        84
weighted avg       0.75      0.75      0.75        84



<h1>Hr Analytics</h1>

In [84]:
hr = pd.read_csv('../Cases/HRAnalytics/HR_comma_sep.csv')

ohe = OneHotEncoder()
oe = OrdinalEncoder(categories=[['low','medium','high']])

column_transform = make_column_transformer((ohe,['Department']),(oe,['salary']),remainder='passthrough',verbose_feature_names_out=False)
nb = GaussianNB()

X = hr.drop('left',axis = 1)
y = hr['left']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

# X_train['salary'] = oe.fit_transform(X_train[['salary']])
# X_test['salary'] = oe.transform(X_test[['salary']])
# X_train['Department'] = le.fit_transform(X_train['Department'])
# X_test['Department'] = le.transform(X_test['Department'])

pipe = Pipeline([('column transform',column_transform),('NB',nb)])

pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
print(classification_report(y_test,y_pred))

# nb.fit(X_train,y_train)
# y_pred =nb.predict (X_test)

# print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.90      0.70      0.79      3429
           1       0.44      0.76      0.56      1070

    accuracy                           0.72      4499
   macro avg       0.67      0.73      0.68      4499
weighted avg       0.80      0.72      0.74      4499

