In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [28]:
# Load cleaned data
df = pd.read_csv("cleaned_pet_activity_data.csv")
df.head()


Unnamed: 0,timestamp,steps_cleaned,heart_rate_cleaned,hour,day_of_week,activity_type_cleaned,activity_label
0,2023-01-01 00:00:00,1.0,100.2,0,6,light_activity,1
1,2023-01-01 00:01:00,2.0,100.2,0,6,light_activity,1
2,2023-01-01 00:02:00,1.0,100.2,0,6,light_activity,1
3,2023-01-01 00:03:00,1.0,100.2,0,6,light_activity,1
4,2023-01-01 00:04:00,1.0,100.2,0,6,resting,2


In [29]:
# Separate features and target
x = df.drop(["activity_type_cleaned", "timestamp","activity_label"], axis=1)
y = df["activity_label"]

In [30]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [40]:
x_train.sample()
x_test.sample()

Unnamed: 0,steps_cleaned,heart_rate_cleaned,hour,day_of_week
6463,1.0,100.2,11,3


In [41]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [31]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [42]:
# Initialize and train
model_rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
model_rf.fit(x_train_scaled, y_train)

# Predict
y_pred_rf = model_rf.predict(x_test_scaled)

In [43]:
print(classification_report(y_test, y_pred_rf))

              precision    recall  f1-score   support

           0       0.96      0.29      0.45        78
           1       0.71      0.88      0.79      1232
           2       0.65      0.44      0.52       706

    accuracy                           0.70      2016
   macro avg       0.77      0.54      0.59      2016
weighted avg       0.70      0.70      0.68      2016



In [44]:
#Hyperparameter tuning

from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    "n_estimators": [100, 200, 300],
    "max_depth": [5, 10, 15],
    "min_samples_split": [2, 5]
}

# Search
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3)
grid_rf.fit(x_train_scaled, y_train)

# Best parameters
print("Best params:", grid_rf.best_params_)

Best params: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}


In [45]:
#So now I will train random forest by giving their best params

params = {
    "n_estimators": [100],
    "max_depth": [5],
    "min_samples_split": [2]
}

model_rf_params = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42 ,min_samples_split=2)
model_rf_params.fit(x_train_scaled, y_train)

# Predict
y_pred_rf_param = model_rf.predict(x_test_scaled)

In [46]:
print(classification_report(y_test, y_pred_rf_param))

              precision    recall  f1-score   support

           0       0.96      0.29      0.45        78
           1       0.71      0.88      0.79      1232
           2       0.65      0.44      0.52       706

    accuracy                           0.70      2016
   macro avg       0.77      0.54      0.59      2016
weighted avg       0.70      0.70      0.68      2016



In [47]:
from sklearn.metrics import confusion_matrix, f1_score, recall_score, precision_score

# Random Forest
print("Random Forest Metrics:")
print("F1-Score:", f1_score(y_test, y_pred_rf_param, average="weighted"))
print("Recall:", recall_score(y_test, y_pred_rf_param, average="weighted"))
print("Precision:", precision_score(y_test, y_pred_rf_param, average="weighted"))


Random Forest Metrics:
F1-Score: 0.6798392943585114
Recall: 0.6994047619047619
Precision: 0.6988207700984498


In [38]:
#Saving the model
import pickle
with open("model.pkl", "wb") as f:
    pickle.dump(model_rf_params, f)

In [48]:
with open("scaler.pkl","wb") as file:
    pickle.dump(scaler,file)
