In [13]:
import numpy as np
import pandas as pd
import sklearn
import optuna
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split,cross_validate
from sklearn.ensemble import RandomForestClassifier



data = pd.read_csv(r"D:\amareshsirnotes\attendence_data.csv")
data.dropna(inplace=True)
x = data.iloc[:,:-1]
y = data.iloc[:,-1]
x_final = []

# preprocessing
for i in x.values:
    landmarks = i.reshape(478,3)
    centre = landmarks-landmarks[0]
    distance = np.linalg.norm(landmarks[33]-landmarks[263])
    normalize = centre/distance
    x_final.append(normalize.flatten())
    
x_f = pd.DataFrame(x_final)
x_f.dropna(inplace=True)
# train_test_split
x_train,x_test,y_train,y_test = train_test_split(x_final,y,test_size=0.2,stratify=y,random_state=12)

# Label encoding
le = LabelEncoder()
y_train_final = le.fit_transform(y_train)
y_test_final = le.transform(y_test)

# Model 
def objective(trial):
    clf = RandomForestClassifier(
                n_estimators=trial.suggest_int("n_estimators", 50, 300),
                max_depth=trial.suggest_int("max_depth", 5, 30),
                min_samples_split=trial.suggest_int("min_samples_split", 2, 10),
                min_samples_leaf=trial.suggest_int("min_samples_leaf", 1, 5),
            )
    scores = cross_validate(clf, x_train, y_train_final, cv=3, scoring='accuracy')
    return scores['test_score'].mean()

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

print("Best parameters:", study.best_params)

[I 2025-07-15 10:52:23,108] A new study created in memory with name: no-name-91da8be6-a0f0-429e-b154-d27a9b78bfd0
[I 2025-07-15 10:52:49,742] Trial 0 finished with value: 0.9574212518978341 and parameters: {'n_estimators': 175, 'max_depth': 23, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.9574212518978341.
[I 2025-07-15 10:53:14,841] Trial 1 finished with value: 0.9507821501556247 and parameters: {'n_estimators': 180, 'max_depth': 13, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.9574212518978341.
[I 2025-07-15 10:53:22,055] Trial 2 finished with value: 0.942577192312937 and parameters: {'n_estimators': 55, 'max_depth': 23, 'min_samples_split': 10, 'min_samples_leaf': 5}. Best is trial 0 with value: 0.9574212518978341.
[I 2025-07-15 10:53:50,008] Trial 3 finished with value: 0.951561417525325 and parameters: {'n_estimators': 205, 'max_depth': 26, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 0 with value: 

Best parameters: {'n_estimators': 66, 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 1}


In [14]:
model = RandomForestClassifier(**study.best_params,random_state=42)

In [15]:
model.fit(x_train,y_train_final)

In [17]:
pred_ytest = model.predict(x_test)

In [18]:
from sklearn.metrics import accuracy_score

In [19]:
accuracy_score(pred_ytest,y_test_final)

0.9609375