In [30]:
import os
import numpy as np
from tqdm import tqdm
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb
import cv2
from imgaug import augmenters as iaa

In [4]:
# Path to the dataset
DATASET_PATH = 'Silhouettes of human posture/'
CLASSES = ['bending', 'lying', 'sitting', 'standing']

In [5]:
# --- Data Augmentation ---
aug = iaa.Sequential([
    iaa.Sometimes(0.5, iaa.Affine(rotate=(-10, 10))),  # Rotate images slightly
    iaa.Sometimes(0.5, iaa.Affine(scale=(0.9, 1.1))),   # Scale images slightly
])

In [6]:
# --- Feature Extraction Function ---
def extract_hu_moments(image, apply_augmentation=False):
    """Loads an image, preprocesses it, and extracts Hu Moments."""
    if apply_augmentation:
        image = aug.augment_image(image)
    
    # Resize and threshold
    image = cv2.resize(image, (100, 100))
    _, thresh = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    
    # Calculate moments
    moments = cv2.moments(thresh)
    hu_moments = cv2.HuMoments(moments)
    
    # Log transform
    hu_moments = -1 * np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-7)
    
    return hu_moments.flatten()

In [7]:
# --- Load Data and Extract Features ---
features = []
labels = []

In [8]:
print("Extracting features from the dataset...")
for class_idx, class_name in enumerate(CLASSES):
    class_path = os.path.join(DATASET_PATH, class_name)
    for image_name in tqdm(os.listdir(class_path), desc=f"Processing {class_name}"):
        image_path = os.path.join(class_path, image_name)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            continue
        
        # Extract original features
        hu_features = extract_hu_moments(image)
        if hu_features is not None:
            features.append(hu_features)
            labels.append(class_idx)
        
        # Extract augmented features (e.g., one augmented version per image)
        hu_features_aug = extract_hu_moments(image, apply_augmentation=True)
        if hu_features_aug is not None:
            features.append(hu_features_aug)
            labels.append(class_idx)

Extracting features from the dataset...


Processing bending: 100%|█████████████████████████████████████████████████████████| 1200/1200 [00:08<00:00, 138.47it/s]
Processing lying: 100%|███████████████████████████████████████████████████████████| 1200/1200 [00:09<00:00, 126.34it/s]
Processing sitting: 100%|█████████████████████████████████████████████████████████| 1200/1200 [00:08<00:00, 136.21it/s]
Processing standing: 100%|████████████████████████████████████████████████████████| 1200/1200 [00:08<00:00, 146.23it/s]


In [9]:
# Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

print(f"\nFeature extraction complete!")
print(f"Data shapes: X={X.shape}, y={y.shape}")


Feature extraction complete!
Data shapes: X=(9600, 7), y=(9600,)


In [10]:
# --- Feature Standardization ---
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [11]:
# Saving the scaler for use in frontend
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [12]:
# --- Split Data ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [38]:
# --- Hyperparameter Tuning ---
print("Tuning XGBoost hyperparameters...")
param_grid = {
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=len(CLASSES),
    eval_metric='mlogloss',
    random_state=42
)

grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    cv=5,  # 5-fold cross-validation
    scoring='accuracy',
    n_jobs=1,
    verbose=1
)
# random_search = RandomizedSearchCV(
#     estimator=xgb_model,
#     param_distributions=param_grid,
#     n_iter=10,  # Only 10 random combinations instead of all
#     cv=5,
#     scoring='accuracy',
#     n_jobs=1,
#     verbose=1,
#     random_state=42
# )

Tuning XGBoost hyperparameters...


In [39]:
# random_search.fit(X_train, y_train)

# print("Best parameters:", random_search.best_params_)
# print("Best cross-validation accuracy:", random_search.best_score_ * 100)

grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_ * 100)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.3, 'max_depth': 7, 'n_estimators': 300, 'subsample': 0.8}
Best cross-validation accuracy: 72.87760416666667


In [40]:
# --- Train Final Model with Best Parameters ---
xgb_model = grid_search.best_estimator_
xgb_model.fit(X_train, y_train)

0,1,2
,objective,'multi:softmax'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,1.0
,device,
,early_stopping_rounds,
,enable_categorical,False


In [41]:
# --- Evaluate Model ---
y_pred = xgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=CLASSES))


Model Accuracy: 75.94%

Classification Report:
              precision    recall  f1-score   support

     bending       0.76      0.79      0.77       480
       lying       0.76      0.68      0.72       480
     sitting       0.77      0.78      0.77       480
    standing       0.75      0.79      0.77       480

    accuracy                           0.76      1920
   macro avg       0.76      0.76      0.76      1920
weighted avg       0.76      0.76      0.76      1920



In [42]:
# --- Save Model and Scaler ---
joblib.dump(xgb_model, 'posture_model.pkl')
print("Model saved as 'posture_model.pkl'")

Model saved as 'posture_model.pkl'
