In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib

In [33]:
df = pd.read_csv("../data/full_milk_quality_3class.csv")
df.head()

Unnamed: 0,pH,Temperature,Fat_Content,SNF,Titratable_Acidity,Protein_Content,Lactose_Content,TPC,SCC,Quality
0,6.5,34.5,3.5,8.3,0.13,3.1,4.8,5000,20,Moderate
1,6.7,35.2,3.8,8.7,0.14,3.3,5.0,3000,18,Moderate
2,6.3,36.0,3.2,8.1,0.16,2.9,4.6,7000,25,Low
3,6.8,34.8,4.0,9.0,0.12,3.5,5.2,2500,17,High
4,7.0,35.5,3.9,8.9,0.11,3.4,5.1,2000,16,High


In [34]:
label_map = {'Low': 0, 'Moderate': 1, 'High': 2}
df['Quality_Label'] = df['Quality'].map(label_map)

In [35]:
X = df.drop(['Quality', 'Quality_Label'], axis=1)
y = df['Quality_Label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [36]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [37]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

target_names = ['Low', 'Moderate', 'High']
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=target_names))

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

         Low       1.00      1.00      1.00         1
    Moderate       1.00      1.00      1.00         1
        High       1.00      1.00      1.00         3

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5


Confusion Matrix:
 [[1 0 0]
 [0 1 0]
 [0 0 3]]


In [38]:
import joblib
import os

# Correct absolute save path to Flask app's model folder
save_path = "C:/Users/Administrator/dairyiq/ml_model/dairy_model_3class.pkl"

# Create the folder if it doesn't exist
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Save the model
joblib.dump(model, save_path)

['C:/Users/Administrator/dairyiq/ml_model/dairy_model_3class.pkl']

In [40]:
import pandas as pd

# Corrected column names
columns = [
    'pH',
    'Temperature',
    'Fat_Content',
    'SNF',
    'Titratable_Acidity',
    'Protein_Content',
    'Lactose_Content',
    'TPC',   # Must match training data
    'SCC'    # Must match training data
]


# Sample in correct format
sample_df = pd.DataFrame([[6.8, 3.8, 4.2, 8.6, 0.13, 3.6, 4.8, 12, 17]], columns=columns)

pred = model.predict(sample_df)[0]

label_map_rev = {0: 'Low', 1: 'Moderate', 2: 'High'}
print("✅ Predicted Label:", label_map_rev[pred])

✅ Predicted Label: High


In [41]:
df['Quality_Label'].value_counts()

Quality_Label
2    8
1    4
0    3
Name: count, dtype: int64

In [42]:
joblib.dump(model, "ml_model/dairy_model_3class.pkl")

['ml_model/dairy_model_3class.pkl']