In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load the Dataset
data = pd.read_csv('calendar_activity_dataset_enhanced.csv')

data.head()

Unnamed: 0,Gender,Activity begin time,Activity Name,Physical Intensity,Mental Intensity,Health Score,Activity end time,total time
0,M,06:51,Running,High,Low,63,07:51,1.0
1,F,23:02,Reading,Low,High,49,00:32,1.5
2,M,11:57,Meditation,Low,High,52,14:03,2.1
3,M,09:27,Cooking,Medium,Medium,55,10:33,1.1
4,M,02:36,Work Meeting,Medium,High,53,04:12,1.6


In [5]:
# Step 2: Preprocess the Data
# Convert Activity Begin and End Times into minutes since midnight
data['Activity begin time'] = pd.to_datetime(data['Activity begin time'], format='%H:%M').dt.hour * 60 + pd.to_datetime(data['Activity begin time'], format='%H:%M').dt.minute
data['Activity end time'] = pd.to_datetime(data['Activity end time'], format='%H:%M').dt.hour * 60 + pd.to_datetime(data['Activity end time'], format='%H:%M').dt.minute

# Create a new column for total duration in minutes
data['Total Duration'] = data['Activity end time'] - data['Activity begin time']

# Label Encoding for categorical variables
label_encoder_activity = LabelEncoder()
data['Activity Name'] = label_encoder_activity.fit_transform(data['Activity Name'])

# Define Inputs and Outputs
features = data[['Activity begin time', 'Activity end time', 'Physical Intensity', 'Mental Intensity']]
target = data['Health Score']

# Encode categorical intensity values
features['Physical Intensity'] = features['Physical Intensity'].map({'Low': 1, 'Medium': 2, 'High': 3})
features['Mental Intensity'] = features['Mental Intensity'].map({'Low': 1, 'Medium': 2, 'High': 3})

# Step 3: Split the Data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train the Decision Tree Model for Regression
regressor = DecisionTreeRegressor()
regressor.fit(X_train, y_train)

# Step 4: Make Predictions
y_pred = regressor.predict(X_test)

# Evaluate the Model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('R-squared:', r2)

Mean Squared Error: 7.18
R-squared: 0.7616737934943252


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Physical Intensity'] = features['Physical Intensity'].map({'Low': 1, 'Medium': 2, 'High': 3})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Mental Intensity'] = features['Mental Intensity'].map({'Low': 1, 'Medium': 2, 'High': 3})


In [6]:
import pickle

# Save the model to a file
with open('decision_tree_model.pkl', 'wb') as file:
    pickle.dump(regressor, file)