<a href="https://colab.research.google.com/github/Murugavell47/IBM_EDUNET_GENAI/blob/main/WINE_QUALITY_PREDICTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import seaborn as sns
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Load red wine dataset, skipping the first row for column names
red_wine_data = pd.read_csv('/content/winequality-red.csv', sep=';', header=None, skiprows=1)

# Assign column names for red wine
red_wine_data.columns = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
                         'chlorides', 'free sulfur dioxide', 'total sulfur dioxide',
                         'density', 'pH', 'sulphates', 'alcohol', 'quality']

# Preprocessing for red wine
X_red = red_wine_data.drop(columns=['quality'])
y_red = red_wine_data['quality']

# Classify wine quality into categories for red wine
def classify_quality_red(quality):
    if quality <= 4:
        return 'Low'
    elif quality <= 6:
        return 'Medium'
    else:
        return 'High'

y_class_red = y_red.apply(classify_quality_red)

# Split data into training and testing sets for red wine
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_class_red, test_size=0.2, random_state=42)

# Standardize features for red wine
scaler_red = StandardScaler()
X_train_scaled_red = scaler_red.fit_transform(X_train_red)
X_test_scaled_red = scaler_red.transform(X_test_red)

# Reshape input data for LSTM for red wine
X_train_lstm_red = X_train_scaled_red.reshape((X_train_scaled_red.shape[0], 1, X_train_scaled_red.shape[1]))
X_test_lstm_red = X_test_scaled_red.reshape((X_test_scaled_red.shape[0], 1, X_test_scaled_red.shape[1]))

# Define the LSTM model for red wine
model_red = Sequential([
    LSTM(64, input_shape=(X_train_lstm_red.shape[1], X_train_lstm_red.shape[2])),
    Dense(3, activation='softmax')  # 3 output classes: Low, Medium, High
])

# Compile the model for red wine
model_red.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model for red wine
history_red = model_red.fit(X_train_lstm_red, pd.get_dummies(y_train_red), epochs=50, batch_size=32, verbose=0)

# Evaluate the model for red wine
loss_red, accuracy_red = model_red.evaluate(X_test_lstm_red, pd.get_dummies(y_test_red), verbose=0)
print("Red Wine Model Accuracy:", accuracy_red)

# Make predictions for red wine
predictions_red = model_red.predict(X_test_lstm_red)
predicted_classes_red = np.argmax(predictions_red, axis=1)

# Print classification report for red wine
print("Classification Report for Red Wine:")
print(classification_report(np.argmax(pd.get_dummies(y_test_red).values, axis=1), predicted_classes_red))

# Calculate F1 score for red wine
f1_score_red = f1_score(np.argmax(pd.get_dummies(y_test_red).values, axis=1), predicted_classes_red, average='weighted')
print("F1 Score for Red Wine:", f1_score_red)

# Create confusion matrix for red wine
conf_matrix_red = confusion_matrix(np.argmax(pd.get_dummies(y_test_red).values, axis=1), predicted_classes_red)

# Load white wine dataset, skipping the first row for column names
white_wine_data = pd.read_csv('/content/winequality-white.csv', sep=';', header=None, skiprows=1)

# Assign column names for white wine
white_wine_data.columns = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
                           'chlorides', 'free sulfur dioxide', 'total sulfur dioxide',
                           'density', 'pH', 'sulphates', 'alcohol', 'quality']

# Preprocessing for white wine
X_white = white_wine_data.drop(columns=['quality'])
y_white = white_wine_data['quality']

# Classify wine quality into categories for white wine
def classify_quality_white(quality):
    if quality <= 4:
        return 'Low'
    elif quality <= 6:
        return 'Medium'
    else:
        return 'High'

y_class_white = y_white.apply(classify_quality_white)

# Split data into training and testing sets for white wine
X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(X_white, y_class_white, test_size=0.2, random_state=42)

# Standardize features for white wine
scaler_white = StandardScaler()
X_train_scaled_white = scaler_white.fit_transform(X_train_white)
X_test_scaled_white = scaler_white.transform(X_test_white)

# Reshape input data for LSTM for white wine
X_train_lstm_white = X_train_scaled_white.reshape((X_train_scaled_white.shape[0], 1, X_train_scaled_white.shape[1]))
X_test_lstm_white = X_test_scaled_white.reshape((X_test_scaled_white.shape[0], 1, X_test_scaled_white.shape[1]))

# Define the LSTM model for white wine
model_white = Sequential([
    LSTM(64, input_shape=(X_train_lstm_white.shape[1], X_train_lstm_white.shape[2])),
    Dense(3, activation='softmax')  # 3 output classes: Low, Medium, High
])

# Compile the model for white wine
model_white.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model for white wine
history_white = model_white.fit(X_train_lstm_white, pd.get_dummies(y_train_white), epochs=50, batch_size=32, verbose=0)

# Evaluate the model for white wine
loss_white, accuracy_white = model_white.evaluate(X_test_lstm_white, pd.get_dummies(y_test_white), verbose=0)
print("White Wine Model Accuracy:", accuracy_white)

# Make predictions for white wine
predictions_white = model_white.predict(X_test_lstm_white)
predicted_classes_white = np.argmax(predictions_white, axis=1)

# Print classification report for white wine
print("Classification Report for White Wine:")
print(classification_report(np.argmax(pd.get_dummies(y_test_white).values, axis=1), predicted_classes_white))

# Calculate F1 score for white wine
f1_score_white = f1_score(np.argmax(pd.get_dummies(y_test_white).values, axis=1), predicted_classes_white, average='weighted')
print("F1 Score for White Wine:", f1_score_white)

# Create confusion matrix for white wine
conf_matrix_white = confusion_matrix(np.argmax(pd.get_dummies(y_test_white).values, axis=1), predicted_classes_white)

# Plot confusion matrices for red and white wine using Plotly
fig = make_subplots(rows=1, cols=2, subplot_titles=('Confusion Matrix for Red Wine', 'Confusion Matrix for White Wine'))

# Confusion Matrix for Red Wine
heatmap_red = go.Heatmap(z=conf_matrix_red,
                         x=['Low', 'Medium', 'High'],
                         y=['Low', 'Medium', 'High'],
                         colorscale='Blues',
                         showscale=True)
fig.add_trace(heatmap_red, row=1, col=1)

# Confusion Matrix for White Wine
heatmap_white = go.Heatmap(z=conf_matrix_white,
                           x=['Low', 'Medium', 'High'],
                           y=['Low', 'Medium', 'High'],
                           colorscale='Blues',
                           showscale=True)
fig.add_trace(heatmap_white, row=1, col=2)

fig.update_layout(title_text='Confusion Matrix Comparison',
                  height=600,
                  width=1000,
                  font=dict(size=14),
                  annotations=[dict(x=0.5, y=-0.15, showarrow=False, text="Predicted", xref="paper", yref="paper", font=dict(size=16)),
                               dict(x=-0.15, y=0.5, showarrow=False, text="Actual", textangle=-90, xref="paper", yref="paper", font=dict(size=16))])

fig.show()

# Model comparison visualization using Plotly
model_comparison_fig = go.Figure(data=[go.Bar(x=['Red Wine', 'White Wine'], y=[accuracy_red, accuracy_white], marker_color=['red', 'blue'])])
model_comparison_fig.update_layout(title_text='Model Accuracy Comparison',
                                   xaxis_title='Wine Type',
                                   yaxis_title='Accuracy',
                                   font=dict(size=14))
model_comparison_fig.show()

# F1 score comparison visualization using Plotly
f1_score_fig = go.Figure(data=[go.Bar(x=['Red Wine', 'White Wine'], y=[f1_score_red, f1_score_white], marker_color=['red', 'blue'])])
f1_score_fig.update_layout(title_text='F1 Score Comparison',
                           xaxis_title='Wine Type',
                           yaxis_title='F1 Score',
                           font=dict(size=14))
f1_score_fig.show()


Red Wine Model Accuracy: 0.8374999761581421
Classification Report for Red Wine:
              precision    recall  f1-score   support

           0       0.64      0.34      0.44        47
           1       0.00      0.00      0.00        11
           2       0.86      0.96      0.91       262

    accuracy                           0.84       320
   macro avg       0.50      0.43      0.45       320
weighted avg       0.80      0.84      0.81       320

F1 Score for Red Wine: 0.8074540367705836
White Wine Model Accuracy: 0.7877551317214966
Classification Report for White Wine:
              precision    recall  f1-score   support

           0       0.66      0.42      0.51       227
           1       0.55      0.20      0.29        30
           2       0.81      0.93      0.87       723

    accuracy                           0.79       980
   macro avg       0.67      0.52      0.56       980
weighted avg       0.77      0.79      0.77       980

F1 Score for White Wine: 0.76707

In [6]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Train the model for red wine
history_red = model_red.fit(X_train_lstm_red, pd.get_dummies(y_train_red), epochs=50, batch_size=32, verbose=1)

# Train the model for white wine
history_white = model_white.fit(X_train_lstm_white, pd.get_dummies(y_train_white), epochs=50, batch_size=32, verbose=1)

# Create a new figure for the interactive plot
fig = make_subplots(rows=1, cols=1)

# Add training loss trace for red wine
fig.add_trace(go.Scatter(x=list(range(1, len(history_red.history['loss']) + 1)),
                         y=history_red.history['loss'],
                         mode='lines',
                         name='Red Wine'),
              row=1, col=1)

# Add training loss trace for white wine
fig.add_trace(go.Scatter(x=list(range(1, len(history_white.history['loss']) + 1)),
                         y=history_white.history['loss'],
                         mode='lines',
                         name='White Wine'),
              row=1, col=1)

# Update layout
fig.update_layout(title='Training Loss Comparison for Red and White Wine Models',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')

# Show the interactive plot
fig.show()


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [4]:
# Create F1 score comparison visualization with best F1 score line
f1_score_fig = go.Figure()

# Add F1 scores for our model
f1_score_fig.add_trace(go.Scatter(x=['Red Wine', 'White Wine'],
                                  y=[f1_score_red, f1_score_white],
                                  mode='markers+text',
                                  marker=dict(color=['red', 'blue'], size=10),
                                  text=[f"F1 Score: {f1_score_red:.2f}", f"F1 Score: {f1_score_white:.2f}"],
                                  textposition='top center',
                                  name='Our Model'))

# Add best F1 score line
f1_score_fig.add_shape(type="line",
                        x0=-0.5,
                        y0=best_f1_score,
                        x1=1.5,
                        y1=best_f1_score,
                        line=dict(color="green", width=3, dash="dash"),
                        name="Best F1 Score")

f1_score_fig.update_layout(title_text='F1 Score Comparison with Best F1 Score Line',
                           xaxis_title='Wine Type',
                           yaxis_title='F1 Score',
                           font=dict(size=14),
                           legend=dict(x=0.01, y=0.99))

f1_score_fig.show()
