<a href="https://colab.research.google.com/github/Tejaswini120805/Machinelearninglab_050/blob/main/bayesian_theoryml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report

df = pd.read_csv("Breast_Cancer.csv")
df.dropna(inplace=True)

target_col = 'Status'
X = df.drop(columns=[target_col])
y = df[target_col]

cat_cols = X.select_dtypes(include=['object']).columns

label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

le_target = LabelEncoder()
y = le_target.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = GaussianNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("=== Classification Report ===")
print(classification_report(y_test, y_pred, target_names=le_target.classes_))

print("=== Confusion Matrix ===")
print(confusion_matrix(y_test, y_pred))

cost_matrix = np.array([[0, 1],
                        [5, 0]])

probs = model.predict_proba(X_test)
expected_loss = np.dot(probs, cost_matrix.T)
bayes_decision = np.argmin(expected_loss, axis=1)

print("\n=== Bayesian Decision Theory with Cost Matrix ===")
print(classification_report(y_test, bayes_decision, target_names=le_target.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, bayes_decision))


=== Classification Report ===
              precision    recall  f1-score   support

       Alive       0.91      0.90      0.91       685
        Dead       0.46      0.49      0.48       120

    accuracy                           0.84       805
   macro avg       0.69      0.70      0.69       805
weighted avg       0.84      0.84      0.84       805

=== Confusion Matrix ===
[[617  68]
 [ 61  59]]

=== Bayesian Decision Theory with Cost Matrix ===
              precision    recall  f1-score   support

       Alive       0.90      0.92      0.91       685
        Dead       0.47      0.42      0.45       120

    accuracy                           0.84       805
   macro avg       0.69      0.67      0.68       805
weighted avg       0.84      0.84      0.84       805

Confusion Matrix:
 [[628  57]
 [ 69  51]]


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix

columns = [
    'word_freq_make', 'word_freq_address', 'word_freq_all', 'word_freq_3d', 'word_freq_our',
    'word_freq_over', 'word_freq_remove', 'word_freq_internet', 'word_freq_order',
    'word_freq_mail', 'word_freq_receive', 'word_freq_will', 'word_freq_people',
    'word_freq_report', 'word_freq_addresses', 'word_freq_free', 'word_freq_business',
    'word_freq_email', 'word_freq_you', 'word_freq_credit', 'word_freq_your',
    'word_freq_font', 'word_freq_000', 'word_freq_money', 'word_freq_hp', 'word_freq_hpl',
    'word_freq_george', 'word_freq_650', 'word_freq_lab', 'word_freq_labs', 'word_freq_telnet',
    'word_freq_857', 'word_freq_data', 'word_freq_415', 'word_freq_85', 'word_freq_technology',
    'word_freq_1999', 'word_freq_parts', 'word_freq_pm', 'word_freq_direct', 'word_freq_cs',
    'word_freq_meeting', 'word_freq_original', 'word_freq_project', 'word_freq_re',
    'word_freq_edu', 'word_freq_table', 'word_freq_conference', 'char_freq_;', 'char_freq_(',
    'char_freq_[', 'char_freq_!', 'char_freq_$', 'char_freq_#', 'capital_run_length_average',
    'capital_run_length_longest', 'capital_run_length_total', 'spam_label'
]

df = pd.read_csv("spambase.data", names=columns)

X = df.drop(columns=['spam_label'])
y = df['spam_label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = GaussianNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

cost_matrix = np.array([[0, 1],
                        [10, 0]])

probs = model.predict_proba(X_test)
expected_loss = np.dot(probs, cost_matrix.T)
bayes_decision = np.argmin(expected_loss, axis=1)

print(classification_report(y_test, bayes_decision))
print(confusion_matrix(y_test, bayes_decision))


              precision    recall  f1-score   support

           0       0.94      0.74      0.83       531
           1       0.72      0.94      0.82       390

    accuracy                           0.82       921
   macro avg       0.83      0.84      0.82       921
weighted avg       0.85      0.82      0.82       921

[[391 140]
 [ 24 366]]
              precision    recall  f1-score   support

           0       0.94      0.74      0.83       531
           1       0.72      0.94      0.82       390

    accuracy                           0.82       921
   macro avg       0.83      0.84      0.82       921
weighted avg       0.85      0.82      0.82       921

[[392 139]
 [ 24 366]]
