# Multinomial NB

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Hockey - class 0
# Space  - class 1

# Load dataset (Dataset having sentences from hockey and space)
df = pd.read_csv("newsgroups20.csv")
X, y = df["sentences"], df["target"]

# Let the vocab be ["is", "was", "that"]
# the count vectorizer will return [2, 0, 1] for "that is is" and [0, 1, 1] for "that was"

# Convert text data to feature vectors
vectorizer = CountVectorizer()
X_vec = vectorizer.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.3, random_state=42)

# Create and train Multinomial Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.9983


In [None]:
# To predict on new data
new_data = ["The hockey game was intense and exciting.", "The spacecraft successfully launched to Mars."]
new_data_vec = vectorizer.transform(new_data)
predictions = model.predict(new_data_vec)
print("\nPredictions:", predictions)


Predictions: [0 1]


# Gaussian NB

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Iris dataset contains measurments from the flowers Setosa, Versicolour, and Virginica
# The measurements are sepal length, sepal width, petal length and petal width
# 0 - setosa
# 1 - versicolor
# 2 - virginica

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train Gaussian Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.9778


In [None]:
# To predict on new data
new_data = [[5.1, 3.5, 1.4, 0.2], [6.7, 3.1, 4.7, 1.5]]
predictions = model.predict(new_data)
print("\nPredictions:", predictions)


Predictions: [0 1]
