<a href="https://colab.research.google.com/github/SANTHOSH-SR8245/AIML/blob/main/naive_bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB,ComplementNB
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("/content/kidney_disease_updated.csv")

# Check target column name (assuming "classification" or "target")
print(df.columns)

# Let's assume target column is "classification"
X = df.drop(columns=["classification"])
y = df["classification"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# -------------------------------
# 1. Gaussian Naive Bayes (with scaling)
# -------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

gnb = GaussianNB()
gnb.fit(X_train_scaled, y_train)
y_pred_gnb = gnb.predict(X_test_scaled)

print("GaussianNB Accuracy:", accuracy_score(y_test, y_pred_gnb))
print(classification_report(y_test, y_pred_gnb))

Index(['id', 'age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr',
       'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad',
       'appet', 'pe', 'ane', 'classification'],
      dtype='object')
GaussianNB Accuracy: 0.95
              precision    recall  f1-score   support

           0       1.00      0.92      0.96        76
           1       0.88      1.00      0.94        44

    accuracy                           0.95       120
   macro avg       0.94      0.96      0.95       120
weighted avg       0.96      0.95      0.95       120



In [3]:
# 2. Multinomial Naive Bayes (needs non-negative data)
# -------------------------------
# Shift data to positive values for multinomial
X_train_pos = X_train - X_train.min().min()
X_test_pos = X_test - X_train.min().min()

mnb = MultinomialNB()
mnb.fit(X_train_pos, y_train)
y_pred_mnb = mnb.predict(X_test_pos)

print("MultinomialNB Accuracy:", accuracy_score(y_test, y_pred_mnb))
print(classification_report(y_test, y_pred_mnb))


MultinomialNB Accuracy: 0.9416666666666667
              precision    recall  f1-score   support

           0       1.00      0.91      0.95        76
           1       0.86      1.00      0.93        44

    accuracy                           0.94       120
   macro avg       0.93      0.95      0.94       120
weighted avg       0.95      0.94      0.94       120



In [5]:
cnb = ComplementNB()
cnb.fit(X_train_pos, y_train)
y_pred_cnb = cnb.predict(X_test_pos)

print("\nComplementNB Accuracy:", accuracy_score(y_test, y_pred_cnb))
print(classification_report(y_test, y_pred_cnb))


ComplementNB Accuracy: 0.9416666666666667
              precision    recall  f1-score   support

           0       1.00      0.91      0.95        76
           1       0.86      1.00      0.93        44

    accuracy                           0.94       120
   macro avg       0.93      0.95      0.94       120
weighted avg       0.95      0.94      0.94       120



In [6]:
# -----------------------------
# 1. Import libraries
# -----------------------------
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_csv("/content/kidney_disease_updated.csv")
# -----------------------------
# 2. List of categorical columns
# -----------------------------
cat_columns = ['cad', 'appet', 'pe', 'ane', 'htn', 'dm']  # features
target_col = 'classification'  # target

# -----------------------------
# 3. Handle missing values and encode
# -----------------------------
# Use mode for imputation (most frequent value)
imputer = SimpleImputer(strategy='most_frequent')
df[cat_columns + [target_col]] = imputer.fit_transform(df[cat_columns + [target_col]])

# Encode all categorical columns
label_encoders = {}
for col in cat_columns + [target_col]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # save encoder for later decoding

# -----------------------------
# 4. Split dataset into train/test
# -----------------------------
X = df[cat_columns]
y = df[target_col]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# -----------------------------
# 5. Train Categorical Naive Bayes
# -----------------------------
model = CategoricalNB()
model.fit(X_train, y_train)

# -----------------------------
# 6. Make predictions
# -----------------------------
y_pred = model.predict(X_test)

# -----------------------------
# 7. Evaluate model
# -----------------------------
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9

Confusion Matrix:
 [[55 10]
 [ 0 35]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.85      0.92        65
           1       0.78      1.00      0.88        35

    accuracy                           0.90       100
   macro avg       0.89      0.92      0.90       100
weighted avg       0.92      0.90      0.90       100



In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report

# -------------------------------
# 1. Load dataset
# -------------------------------
df = pd.read_csv("/content/kidney_disease_updated.csv")

# Assume target column is "classification"
X = df.drop(columns=["classification"])
y = df["classification"]

# Laplace smoothing alpha can be tuned (try 0.5 or 1.0)
bnb = BernoulliNB(alpha=0.5)
bnb.fit(X_train, y_train)
y_pred_bnb = bnb.predict(X_test)

# -------------------------------
# 5. Evaluation
# -------------------------------
print("BernoulliNB Accuracy:", accuracy_score(y_test, y_pred_bnb))
print("\nClassification Report:\n", classification_report(y_test, y_pred_bnb))

BernoulliNB Accuracy: 0.9

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.85      0.92        65
           1       0.78      1.00      0.88        35

    accuracy                           0.90       100
   macro avg       0.89      0.92      0.90       100
weighted avg       0.92      0.90      0.90       100

