In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load the data
df = pd.read_csv('input/ds3.csv')

# Convert categorical variables to numeric variables
le = LabelEncoder()
categorical_cols = df.select_dtypes(include=['object']).columns
df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

# Separate features and target
X = df.drop('Response', axis=1)
y = df['Response']

# Split data into training, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Initialize and fit the model on the training set
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions on the training set and calculate loss
y_pred_train = model.predict(X_train)
y_pred_train_proba = model.predict_proba(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
train_loss = log_loss(y_train, y_pred_train_proba)

# Make predictions on the validation set and calculate loss
y_pred_val = model.predict(X_val)
y_pred_val_proba = model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_loss = log_loss(y_val, y_pred_val_proba)

# Print training and validation loss and accuracy
print("Training Loss: %.2f" % train_loss)
print("Training Accuracy: %.2f%%" % (train_accuracy * 100.0))
print("Validation Loss: %.2f" % val_loss)
print("Validation Accuracy: %.2f%%" % (val_accuracy * 100.0))

# Print precision, recall, and F1 score for the validation set
print(classification_report(y_val, y_pred_val))


Training Loss: 0.38
Training Accuracy: 85.95%
Validation Loss: 0.41
Validation Accuracy: 83.92%
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1007
           1       0.00      0.00      0.00       193

    accuracy                           0.84      1200
   macro avg       0.42      0.50      0.46      1200
weighted avg       0.70      0.84      0.77      1200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
