In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load dataset
data = pd.read_csv("activity_recommendation_dataset.csv")

In [3]:
# Display first few rows
print(data.head())


                                User ID  Age  Gender Health Condition  \
0  1cd9a5ee-c54b-4ba1-98bc-0ee55ba0f5cc   56   Other     Heart Issues   
1  a288d062-1d6e-44b0-8bbd-1c146a3f0f4d   69  Female     Heart Issues   
2  416c09c4-7a78-48c7-bf72-adeba9a20188   46  Female     Heart Issues   
3  9cd3f87d-5996-49ed-a3b3-07a0ab60d340   32    Male              NaN   
4  309cff58-caf9-4c52-9929-76675e47f593   60  Female              NaN   

  Activity Level Preference  Temperature (°C)  Humidity (%)  \
0         Active     Indoor         17.540195     88.264820   
1      Sedentary    Outdoor          9.022857     52.431460   
2         Active     Social         -5.652037     70.552206   
3       Moderate       Solo         21.461663     56.251874   
4      Sedentary    Outdoor          2.722871     24.858035   

   Wind Speed (km/h)  Air Quality Index  Crime Rate  Traffic Congestion Index  \
0           3.926254                281   10.092672                 62.305971   
1          17.644643

In [4]:
# Check the column names to debug
print(data.columns.tolist())

# Clean column names (remove extra spaces)
data.columns = data.columns.str.strip()

# Print cleaned column names
print("\nCleaned column names:", data.columns.tolist())

# Display unique recommended activities
print("\nUnique Recommended Activities:", data["Recommended Activity"].unique())

# Initialize and fit target encoder ONCE
target_encoder = LabelEncoder()
data["Recommended Activity Encoded"] = target_encoder.fit_transform(data["Recommended Activity"])

# Display the activity mapping for reference
activity_mapping = dict(zip(target_encoder.classes_, target_encoder.transform(target_encoder.classes_)))
print("\nActivity Name Mapping:", activity_mapping)



['User ID', 'Age', 'Gender', 'Health Condition', 'Activity Level', 'Preference', 'Temperature (°C)', 'Humidity (%)', 'Wind Speed (km/h)', 'Air Quality Index', 'Crime Rate', 'Traffic Congestion Index', 'Community Event', 'Health Advisory', 'Recommended Activity']

Cleaned column names: ['User ID', 'Age', 'Gender', 'Health Condition', 'Activity Level', 'Preference', 'Temperature (°C)', 'Humidity (%)', 'Wind Speed (km/h)', 'Air Quality Index', 'Crime Rate', 'Traffic Congestion Index', 'Community Event', 'Health Advisory', 'Recommended Activity']

Unique Recommended Activities: ['Stay Indoors - Meditation or Gym' 'Home Workouts or Reading' 'Gym'
 'Volunteer' 'Group Workout' 'Community Event' 'Book Club' 'Yoga' 'Hiking'
 'Running' 'Biking']

Activity Name Mapping: {'Biking': 0, 'Book Club': 1, 'Community Event': 2, 'Group Workout': 3, 'Gym': 4, 'Hiking': 5, 'Home Workouts or Reading': 6, 'Running': 7, 'Stay Indoors - Meditation or Gym': 8, 'Volunteer': 9, 'Yoga': 10}


In [5]:
# Encode categorical features
label_encoders = {}
categorical_columns = ["Gender", "Health Condition", "Activity Level", "Preference", "Community Event", "Health Advisory", "Recommended Activity"]

for col in categorical_columns:
    if col in data.columns:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

In [6]:
# Define features and target variable
features = data.drop(columns=["Recommended Activity", "User ID"])  # Dropping 'User ID' as it's not a useful feature
target = data["Recommended Activity"]


In [7]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [8]:
# Train Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [9]:
# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate model (Accuracy)
accuracy = (y_pred == y_test).mean()
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 100.00%


In [10]:
print("Target Encoder Classes:", target_encoder.classes_)

Target Encoder Classes: ['Biking' 'Book Club' 'Community Event' 'Group Workout' 'Gym' 'Hiking'
 'Home Workouts or Reading' 'Running' 'Stay Indoors - Meditation or Gym'
 'Volunteer' 'Yoga']


In [11]:
# After training the model and fitting the target encoder, predict with inverse_transform

# Example new data for prediction
new_data = pd.DataFrame({
    "Age": [30],
    "Gender": [label_encoders["Gender"].transform(["Male"])[0] if "Male" in label_encoders["Gender"].classes_ else 0],
    "Health Condition": [label_encoders["Health Condition"].transform(["Good"])[0] if "Good" in label_encoders["Health Condition"].classes_ else 0],
    "Activity Level": [label_encoders["Activity Level"].transform(["Moderate"])[0] if "Moderate" in label_encoders["Activity Level"].classes_ else 0],
    "Preference": [label_encoders["Preference"].transform(["Nature"])[0] if "Nature" in label_encoders["Preference"].classes_ else 0],
    "Temperature (°C)": [22],
    "Humidity (%)": [55],
    "Wind Speed (km/h)": [10],
    "Air Quality Index": [50],
    "Crime Rate": [20],
    "Traffic Congestion Index": [35],
    "Community Event": [label_encoders["Community Event"].transform(["Yes"])[0] if "Yes" in label_encoders["Community Event"].classes_ else 0],
    "Health Advisory": [label_encoders["Health Advisory"].transform(["Low Risk"])[0] if "Low Risk" in label_encoders["Health Advisory"].classes_ else 0]
})



In [14]:
# Example new data for prediction
new_data = pd.DataFrame({
    "Age": [30],
    "Gender": [label_encoders["Gender"].transform(["Male"])[0] if "Male" in label_encoders["Gender"].classes_ else 0],
    # ... (rest of your columns)
})

# Ensure all feature columns are present and in correct order
for col in features.columns:
    if col not in new_data.columns:
        new_data[col] = 0  # Add missing columns with default value
new_data = new_data[features.columns]  # Reorder to match training

# Make prediction
predicted_activity_encoded = rf_model.predict(new_data)[0]
predicted_activity_name = target_encoder.inverse_transform([predicted_activity_encoded])[0]

print("\nRecommended Activity:", predicted_activity_name)


Recommended Activity: Book Club
