In [1]:
import pandas as pd
import numpy as np
import ast
import joblib
import os

In [2]:
# Sklearn imports
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [4]:
#Load the CSV data
DATA_FILE = '/content/cleaned_data.csv'
if not os.path.exists(DATA_FILE):
    raise FileNotFoundError(f"CSV file '{DATA_FILE}' not found. Please check the path.")

df = pd.read_csv(DATA_FILE)

In [5]:
# We convert it into a tuple/list of numbers using ast.literal_eval.
def parse_data_column(data_str):
    try:
        # Convert string representation to a tuple
        return ast.literal_eval(data_str)
    except Exception as e:
        print(f"Error parsing data: {data_str}")
        raise e

In [6]:
# Apply conversion to the entire column
df['data'] = df['data'].apply(parse_data_column)

In [7]:
#Expand the "data" column into separate features.
data_expanded = pd.DataFrame(df['data'].tolist(), index=df.index)
data_expanded.columns = [f"data_{i}" for i in data_expanded.columns]

In [8]:
# Concatenate the new data columns with the original dataframe,
df = pd.concat([df.drop('data', axis=1), data_expanded], axis=1)

In [9]:
#Prepare Features and Target
feature_columns = ['rssi', 'rate', 'noise_floor', 'channel'] + list(data_expanded.columns)
X = df[feature_columns]

In [10]:
# The target is the "label" column.
y = df['label']

In [11]:
# If your label is already binary or multi-class with meaningful strings,
# we encode them to numeric labels.
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [12]:
#Split Data into Training and Testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

In [14]:
#Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
#Build and Train the Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)

In [16]:
# perform hyperparameter tuning using GridSearchCV.
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42),
                           param_grid,
                           cv=5,
                           n_jobs=-1,
                           scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)
print("Best parameters from GridSearchCV:", grid_search.best_params_)
clf = grid_search.best_estimator_


Best parameters from GridSearchCV: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}


In [17]:
#STEP 7: Evaluate the Model
y_pred = clf.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Test Accuracy: 100.00%

Classification Report:
               precision    recall  f1-score   support

Standing1Pose       1.00      1.00      1.00       466
Standing2Pose       1.00      1.00      1.00       510
   StandingTR       1.00      1.00      1.00       317
        Still       1.00      1.00      1.00       317

     accuracy                           1.00      1610
    macro avg       1.00      1.00      1.00      1610
 weighted avg       1.00      1.00      1.00      1610

Confusion Matrix:
[[466   0   0   0]
 [  0 510   0   0]
 [  0   0 317   0]
 [  0   0   0 317]]


In [18]:
#Save the Model and Preprocessing Objects
joblib.dump(clf, 'human_presence_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

print("\nModel, scaler, and label encoder have been saved successfully.")


Model, scaler, and label encoder have been saved successfully.
