In [None]:

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [None]:
# Read from CSV
train_df = pd.read_csv("Train_data.csv")
test_df = pd.read_csv("Test_data.csv")

print("Train size:", train_df.shape)
print("Test size:", test_df.shape)



Train size: (3999, 20)
Test size: (1000, 20)


In [37]:
# Separate features and labels, convert labels to numbers if needed

features = train_df.columns.difference(['label']).tolist()


# Use LabelEncoder for encoding the label
le = LabelEncoder()
y_train = le.fit_transform(train_df['label'])
y_test = le.transform(test_df['label'])

X_train = train_df[features].values
X_test = test_df[features].values

print("Labels:", le.classes_)


Labels: ['Heart' 'Oblong' 'Oval' 'Round' 'Square']


In [38]:
features

['RBot',
 'RTop',
 'RTop_RBot_diff',
 'cheekbone_width',
 'chin_angle',
 'chin_distance',
 'fAR',
 'face_length',
 'face_rectangularity',
 'forehead_distance',
 'forehead_rectangularity',
 'forehead_width',
 'jawline_width',
 'left_cheek_angle',
 'left_cheek_distance',
 'middle_face_rectangularity',
 'right_cheek_angle',
 'right_cheek_distance',
 'top_jaw_distance']

In [39]:
len(features)

19

In [None]:
# Training and Evaluation of Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=42)

rf_model.fit(X_train, y_train)

# Prediction on test set
y_pred_rf = rf_model.predict(X_test)

#evaluation Metrics
print("=== Random Forest Classifier ===")
print("Accuracy: {:.2f}".format(accuracy_score(y_test, y_pred_rf)))
print("Classification Report:")
print(classification_report(y_test, y_pred_rf, target_names=le.classes_))



=== Random Forest Classifier ===
Accuracy: 0.46
Classification Report:
              precision    recall  f1-score   support

       Heart       0.42      0.49      0.45       200
      Oblong       0.47      0.47      0.47       200
        Oval       0.38      0.30      0.33       200
       Round       0.46      0.36      0.40       200
      Square       0.54      0.67      0.60       200

    accuracy                           0.46      1000
   macro avg       0.45      0.46      0.45      1000
weighted avg       0.45      0.46      0.45      1000



In [None]:
rf_model_filename = "models/rf_model.pkl"


joblib.dump(rf_model, rf_model_filename)

joblib.dump(le, "models/label_encoder.pkl")
print(f"Saved Random Forest model as: {rf_model_filename}")

Saved Random Forest model as: rf_model.pkl
