In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("../data/mock_milk_quality.csv")

# Preview the data
df.head()

# Check dataset structure
df.info()

# Check for missing values
print("\nMissing values:\n", df.isnull().sum())

# Encode 'Quality' into numeric labels
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Quality_Label'] = le.fit_transform(df['Quality'])  # 'High' = 0 or 1 depending on mapping

# Preview updated data
df.head()


In [None]:
# Prepare features (X) and labels (y)
X = df.drop(['Quality', 'Quality_Label'], axis=1)
y = df['Quality_Label']

# Split into training and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check dimensions
X_train.shape, X_test.shape


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
