# Q1

In [3]:
H = 0.60
D = 0.40
A_H = 0.30
A_S = 0.20

A = (A_H * H) + (A_S* D)

H_A = (A_H * H) / A
print(f"Probability that the student is a hosteler given that they scored an A grade: {H_A:.4f}")

Probability that the student is a hosteler given that they scored an A grade: 0.6923


In [5]:
D = 0.01
N_D = 0.99
Pos_D = 0.99
Pos_N_D = 0.02

Pos = (Pos_D * D) + (Pos_N_D * N_D)

P = (Pos_D * D) / Pos

print(f"Probability of having the disease given a positive test result: {P:.4f}")


Probability of having the disease given a positive test result: 0.3333


# Q2

In [30]:
import pandas as pd
import numpy as np

df = pd.read_csv('buyers.csv')

print(df.head())

       age  income student credit_rating buys_computer
0     <=30    high      no          fair            no
1     <=30    high      no     excellent            no
2  31...40    high      no          fair            no
3      >40  medium      no          fair           yes
4      >40     low     yes          fair           yes


In [35]:
class NaiveBayesClassifier:
    def __init__(self):
        self.class_prob = {}
        self.feature_prob = {}
        self.classes = []

    def fit(self, X, y):
        self.classes = np.unique(y)
        self.class_prob = {cls: np.mean(y == cls) for cls in self.classes}
        
        self.feature_prob = {cls: {} for cls in self.classes}
        for cls in self.classes:
            cls_data = X[y == cls]
            for feature in X.columns:
                feature_values = np.unique(X[feature])
                self.feature_prob[cls][feature] = {}
                for value in feature_values:
                    self.feature_prob[cls][feature][value] = np.mean(cls_data[feature] == value)

    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            class_probs = {}
            for cls in self.classes:
                prob = self.class_prob[cls]
                for feature in X.columns:
                    if row[feature] in self.feature_prob[cls][feature]:
                        prob *= self.feature_prob[cls][feature][row[feature]]
                    else:
                        prob = 0
                class_probs[cls] = prob
                print(prob)
            predictions.append(max(class_probs, key=class_probs.get))
        return np.array(predictions)

In [36]:
df = pd.read_csv('buyers.csv')
X = df.drop(columns=['buys_computer'])
y = df['buys_computer']

nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(X, y)

test_data = pd.DataFrame({
    'age': ['<=30'],
    'income': ['medium'],
    'student': ['yes'],
    'credit_rating': ['fair']
})

predictions = nb_classifier.predict(test_data)
print("Predictions:", predictions)


0.0098876953125
0.0439453125
Predictions: ['yes']


# Q3

In [108]:
import pandas as pd
import numpy as np
from collections import Counter

data = pd.read_csv("text_data.csv")
df = pd.DataFrame(data)

def preprocess(text):
    return text.lower().split()

class NaiveBayesClassifier:

    def __init__(self):
        self.word_probs = {}
        self.class_probs = {}
        self.vocab = set()
        self.class_counts = {}
        self.total_docs = 0
        self.total_words = {}
        self.a = []

    def fit(self, X, y):
        class_counts = Counter(y)
        self.total_docs = len(y)
        self.class_counts = class_counts
        self.class_probs = {cls: count / self.total_docs for cls, count in class_counts.items()}

        word_counts = {cls: Counter() for cls in class_counts}
        total_words = {cls: 0 for cls in class_counts}

        for text, label in zip(X, y):
            words = preprocess(text)
            word_counts[label].update(words)
            self.vocab.update(words)
            total_words[label] += len(words)

        self.total_words = total_words
        vocab_size = len(self.vocab)

        self.word_probs = {cls: {} for cls in class_counts}

        for cls, counts in word_counts.items():
            total_count = total_words[cls]
            for word in self.vocab:
                self.word_probs[cls][word] = (counts[word] + 1) / (total_count + vocab_size)

    def predict(self, text):
        words = preprocess(text)

        scores = {cls: np.log(self.class_probs[cls]) for cls in self.class_probs}

        raw_probs = {cls: self.class_probs[cls] for cls in self.class_probs}

        for cls in self.class_probs:
            log_prob = np.log(self.class_probs[cls])
            for word in words:
                if word in self.word_probs[cls]:
                    word_prob = self.word_probs[cls][word]
                else:
                    word_prob = 1 / (self.total_words[cls] + len(self.vocab))
                log_prob += np.log(word_prob)
            scores[cls] = log_prob

        self.a = [np.exp(score) for score in scores.values()]

        predicted_class = max(scores, key=scores.get)

        return predicted_class

nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(df['Text'], df['Tag'])

df['Predicted'] = df['Text'].apply(nb_classifier.predict)

new_sentence = "A very close game"
predicted_tag = nb_classifier.predict(new_sentence)
print(f"The sentence '{new_sentence}' is classified as: {predicted_tag}")

print(f"Probabilities: {nb_classifier.a}")


The sentence 'A very close game' is classified as: Sports
Probabilities: [2.7648000000000044e-05, 5.717532455930328e-06]
