#### Objective: To understand the Naive bayes algorithm and different types of NB for classification task

In [1]:
import pandas as pd
import numpy as np

In [6]:
dataset = pd.read_csv("Iris.csv")
print("First few rows of the dataset:")
print(dataset.head())

First few rows of the dataset:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


In [5]:
print("\nStatistical summary of the dataset:")
print(dataset.describe())


Statistical summary of the dataset:
               Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
count  150.000000     150.000000    150.000000     150.000000    150.000000
mean    75.500000       5.843333      3.054000       3.758667      1.198667
std     43.445368       0.828066      0.433594       1.764420      0.763161
min      1.000000       4.300000      2.000000       1.000000      0.100000
25%     38.250000       5.100000      2.800000       1.600000      0.300000
50%     75.500000       5.800000      3.000000       4.350000      1.300000
75%    112.750000       6.400000      3.300000       5.100000      1.800000
max    150.000000       7.900000      4.400000       6.900000      2.500000


In [27]:
print(dataset.isna().sum())

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64


In [7]:
print("\nShape of the dataset:")
print(dataset.shape)


Shape of the dataset:
(150, 6)


In [10]:
X = dataset.drop(columns=['Species'])  # Features (inputs)
Y = dataset['Species']  # Target variable (output)


In [11]:
print("Shape of features (X):", X.shape)
print("Shape of target variable (Y):", Y.shape)

Shape of features (X): (150, 5)
Shape of target variable (Y): (150,)


In [13]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
Y_encoded = label_encoder.fit_transform(Y)

In [15]:
from sklearn.model_selection import train_test_split
ratios = [(0.6, 0.4), (0.5, 0.5), (0.7, 0.3)]  # Ratios for splitting
for ratio in ratios:
    train_ratio, test_ratio = ratio
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y_encoded, test_size=test_ratio, random_state=42)
    
    # Print the shapes of training and testing sets
    print(f"\nFor ratio {train_ratio}-{test_ratio}:")
    print("Shape of X_train:", X_train.shape)
    print("Shape of X_test:", X_test.shape)
    print("Shape of Y_train:", Y_train.shape)
    print("Shape of Y_test:", Y_test.shape)


For ratio 0.6-0.4:
Shape of X_train: (90, 5)
Shape of X_test: (60, 5)
Shape of Y_train: (90,)
Shape of Y_test: (60,)

For ratio 0.5-0.5:
Shape of X_train: (75, 5)
Shape of X_test: (75, 5)
Shape of Y_train: (75,)
Shape of Y_test: (75,)

For ratio 0.7-0.3:
Shape of X_train: (105, 5)
Shape of X_test: (45, 5)
Shape of Y_train: (105,)
Shape of Y_test: (45,)


In [26]:
ratios = [(0.6, 0.4), (0.5, 0.5), (0.7, 0.3)]  # Ratios for splitting
random_seeds = [42, 123, 456]  # Random seed values for shuffling

for ratio in ratios:
    train_ratio, test_ratio = ratio
    print(f"\nFor ratio {train_ratio}-{test_ratio}:")
    
    for seed in random_seeds:
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y_encoded, test_size=test_ratio, random_state=seed)
        
        # Print the shapes of training and testing sets
        print(f"Random seed {seed}:")
        print("Shape of X_train:", X_train.shape)
        print("Shape of X_test:", X_test.shape)
        print("Shape of Y_train:", Y_train.shape)
        print("Shape of Y_test:", Y_test.shape)


For ratio 0.6-0.4:
Random seed 42:
Shape of X_train: (90, 5)
Shape of X_test: (60, 5)
Shape of Y_train: (90,)
Shape of Y_test: (60,)
Random seed 123:
Shape of X_train: (90, 5)
Shape of X_test: (60, 5)
Shape of Y_train: (90,)
Shape of Y_test: (60,)
Random seed 456:
Shape of X_train: (90, 5)
Shape of X_test: (60, 5)
Shape of Y_train: (90,)
Shape of Y_test: (60,)

For ratio 0.5-0.5:
Random seed 42:
Shape of X_train: (75, 5)
Shape of X_test: (75, 5)
Shape of Y_train: (75,)
Shape of Y_test: (75,)
Random seed 123:
Shape of X_train: (75, 5)
Shape of X_test: (75, 5)
Shape of Y_train: (75,)
Shape of Y_test: (75,)
Random seed 456:
Shape of X_train: (75, 5)
Shape of X_test: (75, 5)
Shape of Y_train: (75,)
Shape of Y_test: (75,)

For ratio 0.7-0.3:
Random seed 42:
Shape of X_train: (105, 5)
Shape of X_test: (45, 5)
Shape of Y_train: (105,)
Shape of Y_test: (45,)
Random seed 123:
Shape of X_train: (105, 5)
Shape of X_test: (45, 5)
Shape of Y_train: (105,)
Shape of Y_test: (45,)
Random seed 456:
Sh

In [20]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Initialize Gaussian Naive Bayes classifier
naive_bayes_classifier = GaussianNB()

# Train the classifier on the training data
naive_bayes_classifier.fit(X_train, Y_train)

# Predict the labels for the test data
Y_pred = naive_bayes_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [21]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

# Calculate confusion matrix
conf_matrix = confusion_matrix(Y_test, Y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate accuracy rate
accuracy = accuracy_score(Y_test, Y_pred)
print("\nAccuracy:", accuracy)

# Calculate F1 score
f1 = f1_score(Y_test, Y_pred, average='weighted')
print("F1 Score:", f1)

# Calculate precision
precision = precision_score(Y_test, Y_pred, average='weighted')
print("Precision:", precision)

# Calculate recall
recall = recall_score(Y_test, Y_pred, average='weighted')
print("Recall:", recall)

Confusion Matrix:
[[12  0  0]
 [ 0 20  0]
 [ 0  0 13]]

Accuracy: 1.0
F1 Score: 1.0
Precision: 1.0
Recall: 1.0
