In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load datasets
red_wine = pd.read_csv("winequality-red.csv", sep=";")
white_wine = pd.read_csv("winequality-white.csv", sep=";")

# Add a feature to distinguish wine types
red_wine["wine_type"] = 0  # 0 for red wine
white_wine["wine_type"] = 1  # 1 for white wine

# Combine datasets
wine_data = pd.concat([red_wine, white_wine], axis=0).reset_index(drop=True)

# Define features and target
X = wine_data.drop(columns=["quality"])
y = wine_data["quality"]

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Train Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)


Accuracy: 0.3215
Classification Report:
               precision    recall  f1-score   support

           3       0.09      0.33      0.14         6
           4       0.30      0.19      0.23        43
           5       0.52      0.42      0.46       428
           6       0.45      0.35      0.39       567
           7       0.25      0.12      0.17       216
           8       0.06      0.05      0.05        39
           9       0.00      1.00      0.01         1

    accuracy                           0.32      1300
   macro avg       0.24      0.35      0.21      1300
weighted avg       0.42      0.32      0.36      1300

