In [None]:
import kagglehub

In [None]:
dataset_path = kagglehub.dataset_download("cpluzshrijayan/milkquality")

In [None]:
import pandas as pd
import os

# 1. Load and Understand Dataset

In [None]:
df = pd.read_csv(os.path.join(dataset_path, "milknew.csv"))

In [None]:
print(df.head())
print(df.describe())

# 2. Problem Statement

Predict milk quality grade (Low, Medium, High) using its chemical and sensory features.

# 3. Data Visualization

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.countplot(x='Grade', data=df)
plt.title("Distribution of Milk Grades")
plt.show()

In [None]:
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title("Feature Correlation")
plt.show()

# 4. Data Cleaning
Handle missing values

In [None]:
df = df.dropna()

# 5. Data Manipulation

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
label_enc = LabelEncoder()
df['Grade'] = label_enc.fit_transform(df['Grade'])

Separate features and target

In [None]:
X = df[["pH", "Temprature", "Taste", "Turbidity", "Colour"]]

In [None]:
y = df["Grade"]

# 6. Preprocessing for Model Building

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
y_cat = to_categorical(y)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_cat, test_size=0.2, random_state=42)

# 7. Model Building and Evaluation

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

In [None]:
model = Sequential()
model.add(Conv1D(64, kernel_size=2, activation='relu', input_shape=(X_scaled.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1)

Evaluation

In [None]:
_, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
import numpy as np
from sklearn.metrics import classification_report

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
conf_matrix = confusion_matrix(y_true, y_pred_classes)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
label_enc.inverse_transform([0, 1, 2])