In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, Matern
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler

# Load the white wine dataset
white_wine_data = pd.read_csv('/content/winequality-white.csv', sep=';')

# Check for missing values and handle them (if any)
if white_wine_data.isnull().sum().any():
    white_wine_data = white_wine_data.fillna(white_wine_data.mean())

# Separate features (X) and target (y)
X_white = white_wine_data.drop('quality', axis=1)
y_white = white_wine_data['quality']

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_white, y_white, test_size=0.2, random_state=42)

# -------- Gaussian Process Regression Model --------
# Define the kernel: product of constant kernel and RBF kernel
kernel = C(1.0, (1e-4, 1e1)) * RBF(1.0, (1e-4, 1e1))

# Instantiate the GaussianProcessRegressor with the chosen kernel
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)

# Fit the model to the training data
gp.fit(X_train, y_train)

# Cross-validation to evaluate model performance more robustly
cross_val_score_result = cross_val_score(gp, X_white, y_white, cv=3, scoring='neg_mean_squared_error')
mean_cross_val_score = np.mean(cross_val_score_result)
print(f"Mean Cross-Validation MSE (GPR): {-mean_cross_val_score}")

# Make predictions on the test data
y_pred_gpr = gp.predict(X_test)

# Evaluate the GPR model using Mean Squared Error (MSE)
mse_gpr = mean_squared_error(y_test, y_pred_gpr)
print(f"Mean Squared Error on Test Set (GPR): {mse_gpr}")

# -------- Naive Bayes Model (Bayesian Approach) --------
# Binarize wine quality for classification (high vs low quality)
y_binarized = (y_white >= 6).astype(int)  # Classifying high vs low quality (0 = low, 1 = high)

# Normalize the data before applying Naive Bayes
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_white)

# Train Naive Bayes model (simplified Bayesian approach)
nb = GaussianNB()
nb.fit(X_scaled, y_binarized)

# Make predictions with the Naive Bayes model
y_pred_nb = nb.predict(X_scaled)

# Evaluate the Naive Bayes model using accuracy
accuracy_nb = accuracy_score(y_binarized, y_pred_nb)
print(f"Accuracy of Naive Bayes Model: {accuracy_nb}")
# 1. Histogram of wine quality distribution
plt.figure(figsize=(8, 6))
plt.hist(y_white, bins=10, color='skyblue', edgecolor='black')
plt.xlabel('Wine Quality')
plt.ylabel('Frequency')
plt.title('Distribution of Wine Quality')
plt.show()

# 2. Residual Plot for GPR
residuals_gpr = y_test - y_pred_gpr
plt.figure(figsize=(8, 6))
plt.scatter(y_pred_gpr, residuals_gpr, alpha=0.6)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel('Predicted Quality (GPR)')
plt.ylabel('Residuals (True - Predicted)')
plt.title('Residuals: Predicted vs True Values (GPR)')
plt.show()

# 3. True vs Predicted Wine Quality (GPR)
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred_gpr, alpha=0.6, label="GPR Predictions")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--')  # Line of perfect prediction
plt.xlabel('True Quality')
plt.ylabel('Predicted Quality (GPR)')
plt.title('True vs Predicted Wine Quality (GPR)')
plt.legend()
plt.show()


# 4. Feature Importance Visualization (for Naive Bayes)
feature_importance = np.abs(nb.theta_).mean(axis=0)  # Taking the mean of feature coefficients
features = X_white.columns
sorted_idx = np.argsort(feature_importance)[::-1]

plt.figure(figsize=(8, 6))
plt.barh(features[sorted_idx], feature_importance[sorted_idx], color='teal')
plt.xlabel('Feature Importance')
plt.title('Feature Importance (Naive Bayes)')
plt.show()


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
