In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the CSV
df = pd.read_csv("dataset-rgb.csv")

# Select features and label
X = df[["Norm_R", "Norm_G", "Norm_B"]]  # or use raw R, G, B
y = df["Tag"]

# Encode label
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.3, random_state=42
)

# Train classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 1.00

Classification Report:
              precision    recall  f1-score   support

     Control       1.00      1.00      1.00        45
        High       1.00      1.00      1.00        56
         Low       1.00      1.00      1.00        37
      Medium       1.00      1.00      1.00        51
 Medium-High       1.00      1.00      1.00        53
    Very Low       1.00      1.00      1.00        58

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300


Confusion Matrix:
[[45  0  0  0  0  0]
 [ 0 56  0  0  0  0]
 [ 0  0 37  0  0  0]
 [ 0  0  0 51  0  0]
 [ 0  0  0  0 53  0]
 [ 0  0  0  0  0 58]]


In [3]:
import joblib
joblib.dump(model, "color_tag_model.pkl")
joblib.dump(le, "label_encoder.pkl")


['label_encoder.pkl']

In [None]:
model = joblib.load("color_tag_model.pkl")
le = joblib.load("label_encoder.pkl")