<a href="https://colab.research.google.com/github/PRUTU29/ML_LAB/blob/main/BANANA/jupyter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import seaborn as sb
import pandas as pd
from sklearn.utils import resample
# STEP 1: DATA LOADING & DISCRETE CONVERSION
# Load dataset
tips = sb.load_dataset('tips')

# Create Bernoulli feature dataset
df = pd.DataFrame()

df['dinner'] = (tips['time'] == 'Dinner').astype(int)
df['weekend'] = tips['day'].isin(['Sat', 'Sun']).astype(int)
df['no_of_ppl'] = (tips['size'] > 2).astype(int)
df['is_female'] = (tips['sex'] == 'Female').astype(int)
df['target'] = (tips['smoker'] == 'Yes').astype(int)

# STEP 2: HANDLING IMBALANCE (PARTIAL OVERSAMPLING)
df_majority = df[df.target == 0]  # Non-smokers
df_minority = df[df.target == 1]  # Smokers

# Upsample minority class
df_minority_upsampled = resample(
    df_minority,
    replace=True,
    n_samples=130,
    random_state=6
)

# Combine
df_balanced = pd.concat([df_majority, df_minority_upsampled])

X = df_balanced.drop('target', axis=1).values
y = df_balanced['target'].values
feature_names = df_balanced.drop('target', axis=1).columns.tolist()

print("---- Training Data Info ----")
print(f"Non-Smokers: {len(df_majority)}")
print(f"Smokers: {len(df_minority_upsampled)} (Upsampled)")
print(f"Total: {len(df_balanced)}")
print(f"Features: {feature_names} (0/1)")
print("-" * 30)
print()


# STEP 3: PARAMETER ESTIMATION (Bernoulli Naive Bayes)


def estimate_bernoulli_params(X, y, factor=1.0):
    """
    Computes Maximum Likelihood Estimates with Laplace Smoothing.
    """
    n_samples, n_features = X.shape
    classes = np.unique(y)

    class_priors = {}
    feature_probs = {}

    for c in classes:
        X_class = X[y == c]

        # Prior P(Y=c)
        class_priors[c] = X_class.shape[0] / n_samples

        # Feature probabilities with Laplace smoothing
        feature_counts = np.sum(X_class, axis=0)
        class_counts = X_class.shape[0]

        theta = (feature_counts + factor) / (class_counts + 2 * factor)
        feature_probs[c] = theta

    return class_priors, feature_probs


# Train model
priors, feature_probs = estimate_bernoulli_params(X, y, factor=1)

# STEP 4: PREDICTION FUNCTION

def predict_bernoulli(X_new, priors, feature_probs):
    """
    Classifies a sample using log-linear discriminant:
    g(x) = log(Prior) + Sum(log(Likelihood))
    """
    classes = sorted(priors.keys())
    class_scores = {}

    for c in classes:
        theta = feature_probs[c]
        eps = 1e-9  # Prevent log(0)

        log_prior = np.log(priors[c])

        log_likelihood = (
            X_new * np.log(theta + eps) +
            (1 - X_new) * np.log(1 - theta + eps)
        )

        total_score = log_prior + np.sum(log_likelihood)
        class_scores[c] = total_score

    return max(class_scores, key=class_scores.get)


# RANDOM TEST DATA GENERATOR
def test_random():
    """
    Generates random 0/1 features and predicts class.
    """
    random_features = np.random.randint(0, 2, size=(1, 4))
    prediction = predict_bernoulli(random_features, priors, feature_probs)

    d, w, l, f = random_features[0]

    time_str = "Dinner" if d == 1 else "Lunch"
    day_str = "Weekend" if w == 1 else "Weekday"
    size_str = "Large (>2)" if l == 1 else "Small (<=2)"
    gender_str = "Female" if f == 1 else "Male"

    result_str = "Smoker (1)" if prediction == 1 else "Non-Smoker (0)"

    print("---- Random Test Case ----")
    print(f"Case: [{time_str}, {day_str}, {size_str}, {gender_str}]")
    print(f"Model Predicts: {result_str}")
    print()
# RUN TESTS
np.random.seed(50)
for _ in range(5):
    test_random()

---- Training Data Info ----
Non-Smokers: 151
Smokers: 130 (Upsampled)
Total: 281
Features: ['dinner', 'weekend', 'no_of_ppl', 'is_female'] (0/1)
------------------------------

---- Random Test Case ----
Case: [Lunch, Weekday, Large (>2), Female]
Model Predicts: Non-Smoker (0)

---- Random Test Case ----
Case: [Dinner, Weekday, Small (<=2), Male]
Model Predicts: Non-Smoker (0)

---- Random Test Case ----
Case: [Dinner, Weekday, Small (<=2), Female]
Model Predicts: Smoker (1)

---- Random Test Case ----
Case: [Dinner, Weekday, Large (>2), Male]
Model Predicts: Non-Smoker (0)

---- Random Test Case ----
Case: [Dinner, Weekend, Small (<=2), Male]
Model Predicts: Smoker (1)

