In [None]:
import pandas as pd
import numpy as np
import re
import os

from google.colab import files

# Upload the file
uploaded = files.upload()

Saving combined_df.xlsx to combined_df.xlsx


In [None]:
combined_df = pd.read_excel('combined_df.xlsx', sheet_name='Sheet1')

print(combined_df.columns.tolist())

['Date', 'Time of Meal', 'Type of Meal', 'Main Dish', 'Side Dish(es)', 'Beverage', 'Calories Consumed (kcal)_meal', 'Healthiness Rating_meal', 'Satisfaction Level', 'Mood Before Meal', 'Mood After Meal', 'Location of Meal', 'Reason for Meal Choice', 'Company During Meal', 'Time Woken Up', 'First Activity', 'Duration of First Activity (min)', 'Time Started Breakfast', 'Type of Breakfast', 'Calories Consumed (kcal)_morning', 'Healthiness Rating_morning', 'Exercise Performed', 'Type of Exercise', 'Duration of Exercise (min)', 'Time Started Work/Study', 'Mood After Morning Routine', 'Weather', 'Time Spent Preparing (min)', 'Time Left Home', 'Mode of Transportation', 'Departure Time (am)', 'Arrival Time (am)', 'Duration (min)', 'Distance Covered (KM)', 'Route Taken', 'Traffic Condition', 'Weather_commute', 'Delays Encountered', 'Stress Level During Commute', 'Company During Commute', 'Reason for Commute', 'Commute Start Location', 'Commute End Location', 'Start Time', 'End Time', 'Duration 

In [None]:
# Verify missing values for relevant columns
columns_to_check = [
    'Time Woken Up',  # Ensure exact column name
    'Healthiness Rating_meal',  # Healthiness Rating
    'Exercise Performed',  # Exercise performed during the morning routine
    'Traffic Condition',  # Traffic condition during the commute
    'Productivity Level'  # Productivity level for target variable
]

# Check for missing values in the relevant columns
missing_values = combined_df[columns_to_check].isnull().sum()
print("Missing values in relevant columns:")
print(missing_values)


Missing values in relevant columns:
Time Woken Up              0
Healthiness Rating_meal    0
Exercise Performed         0
Traffic Condition          0
Productivity Level         0
dtype: int64


In [None]:
import pandas as pd


# --- Step 1: Handle 'Time Woken Up' properly ---
# Convert 'Time Woken Up' to datetime format for easier comparison
combined_df['Time Woken Up'] = pd.to_datetime(combined_df['Time Woken Up'], format='%H:%M:%S', errors='coerce')

# Define the reference time as 7:00 AM
seven_am = pd.to_datetime('07:00:00', format='%H:%M:%S')

# Create a new column for categorical time of day: 'Before 7:00 AM', 'At 7:00 AM', 'After 7:00 AM'
def categorize_time_of_day(time):
    if pd.isna(time):
        return 'Unknown'
    elif time < seven_am:
        return 'Before 7:00 AM'
    elif time == seven_am:
        return 'Exactly 7:00 AM'
    else:
        return 'After 7:00 AM'

combined_df['Time Woken Up'] = combined_df['Time Woken Up'].apply(categorize_time_of_day)

# --- Step 2: Standardize Exercise Performed to 'Yes'/'No' ---
combined_df['Exercise Performed'] = combined_df['Exercise Performed'].apply(lambda x: 'Yes' if x.lower() in ['yes', 'yoga', 'jogging'] else 'No')

print("Unique Values in 'Traffic Condition':")
print(combined_df['Traffic Condition'].unique())
print("-" * 40)

# --- Step 4: Ensure Healthiness Rating_meal has no missing values ---
# We'll check if there are any missing values in 'Healthiness Rating_morning' and handle them
combined_df['Healthiness Rating_meal'] = combined_df['Healthiness Rating_meal'].fillna('Unknown')

healthiness_mapping = {
    'healthy': 'Healthy',
    'moderately healthy': 'Moderately Healthy',
    'not healthy': 'Unhealthy',
    'moderate': 'Unhealthy',  # "Moderate" should be considered "Unhealthy"
    ' healthy ': 'Healthy',   # extra spaces around text
    ' healthy': 'Healthy',    # leading space
    'healthy ': 'Healthy',    # trailing space
    ' healthy': 'Healthy',    # leading space
    ' unhealthy': 'Unhealthy',  # leading space
    'moderately healthy ': 'Moderately Healthy',  # trailing space
    'moderately healthy': 'Moderately Healthy',
    'not healthy ': 'Unhealthy',  # trailing space
    'unhealthy': 'Unhealthy',  # unifying all lowercase versions
    ' not healthy': 'Unhealthy'  # leading space
}

# Apply the mapping by normalizing the text: lowercase and strip spaces
combined_df['Healthiness Rating_meal'] = (
    combined_df['Healthiness Rating_meal']
    .fillna('')  # Handle missing values
    .astype(str)  # Convert everything to string type
    .str.strip()  # Strip any leading/trailing whitespace
    .str.lower()  # Convert to lowercase
    .replace(healthiness_mapping)  # Map based on the dictionary
)

productivity_label_mapping = {
    'low': 'Low',
    'medium': 'Medium',
    'high': 'High'
}
combined_df['Productivity Level'] = (
    combined_df['Productivity Level'].str.lower()
    .replace(productivity_label_mapping)
)

# --- Step 5: Check unique values and count again ---
print("Unique Values in 'Healthiness Rating_meal':")
print(combined_df['Healthiness Rating_meal'].unique())
print("-" * 40)

print("Unique Values in 'Time Woken Up':")
print(combined_df['Time Woken Up'].unique())
print("-" * 40)

print("Unique Values in 'Exercise Performed':")
print(combined_df['Exercise Performed'].unique())
print("-" * 40)

print("Unique Values in Traffic Condition:")
print(combined_df['Traffic Condition'].unique())
print("-" * 40)

# --- Step 6: Print the distribution of features ---
print("Distribution of features and target:")
for feature in ['Time Woken Up', 'Exercise Performed', 'Traffic Condition', 'Healthiness Rating_meal']:
    print(f"Distribution of {feature}:")
    print(combined_df[feature].value_counts())
    print("-" * 40)

print("Distribution of target (Productivity Level):")
print(combined_df['Productivity Level'].value_counts())


Unique Values in 'Traffic Condition':
['Moderate' 'Heavy' 'Light']
----------------------------------------
Unique Values in 'Healthiness Rating_meal':
['Healthy' 'Moderately Healthy' 'Unhealthy']
----------------------------------------
Unique Values in 'Time Woken Up':
['After 7:00 AM' 'Before 7:00 AM' 'Exactly 7:00 AM']
----------------------------------------
Unique Values in 'Exercise Performed':
['Yes' 'No']
----------------------------------------
Unique Values in Traffic Condition:
['Moderate' 'Heavy' 'Light']
----------------------------------------
Distribution of features and target:
Distribution of Time Woken Up:
Time Woken Up
Before 7:00 AM     768
After 7:00 AM      311
Exactly 7:00 AM     32
Name: count, dtype: int64
----------------------------------------
Distribution of Exercise Performed:
Exercise Performed
No     660
Yes    451
Name: count, dtype: int64
----------------------------------------
Distribution of Traffic Condition:
Traffic Condition
Moderate    711
Ligh

In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import export_text
import joblib

# Encode the target and features
label_encoder = LabelEncoder()
combined_df['Productivity Level'] = label_encoder.fit_transform(combined_df['Productivity Level'])

# Encode categorical features
categorical_columns = ['Time Woken Up', 'Exercise Performed', 'Traffic Condition', 'Healthiness Rating_meal']
for col in categorical_columns:
    combined_df[col] = label_encoder.fit_transform(combined_df[col])

# Split data into features and target
X = combined_df[categorical_columns]
y = combined_df['Productivity Level']

# Train Decision Tree Classifier using 'entropy' (ID3 criterion) #Measure of uncertainty
clf = DecisionTreeClassifier(criterion='entropy')
clf.fit(X, y)

# Extract decision tree rules in text format
tree_rules = export_text(clf, feature_names=categorical_columns)
print("Generated Decision Tree Rules:")
print(tree_rules)


Generated Decision Tree Rules:
|--- Time Woken Up <= 1.50
|   |--- Exercise Performed <= 0.50
|   |   |--- Traffic Condition <= 1.50
|   |   |   |--- Traffic Condition <= 0.50
|   |   |   |   |--- Time Woken Up <= 0.50
|   |   |   |   |   |--- Healthiness Rating_meal <= 1.50
|   |   |   |   |   |   |--- Healthiness Rating_meal <= 0.50
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- Healthiness Rating_meal >  0.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- Healthiness Rating_meal >  1.50
|   |   |   |   |   |   |--- class: 2
|   |   |   |   |--- Time Woken Up >  0.50
|   |   |   |   |   |--- Healthiness Rating_meal <= 1.50
|   |   |   |   |   |   |--- Healthiness Rating_meal <= 0.50
|   |   |   |   |   |   |   |--- class: 2
|   |   |   |   |   |   |--- Healthiness Rating_meal >  0.50
|   |   |   |   |   |   |   |--- class: 2
|   |   |   |   |   |--- Healthiness Rating_meal >  1.50
|   |   |   |   |   |   |--- class: 2
|   |   |   |--- Traff

In [None]:
# Reverse encoding mappings
time_woken_up_mapping = {0: 'Before 7:00 AM', 1: 'Exactly 7:00 AM', 2: 'After 7:00 AM'}
exercise_performed_mapping = {0: 'No', 1: 'Yes'}
traffic_condition_mapping = {0: 'Light', 1: 'Moderate', 2: 'Heavy'}
healthiness_rating_mapping = {0: 'Unhealthy', 1: 'Moderately Healthy', 2: 'Healthy'}
productivity_level_mapping = {0: 'Low', 1: 'Medium', 2: 'High'}

def reverse_encoding(value, mapping):
    return mapping.get(value, value)  # Return the category name or value if not found

def decode_tree_rules(rules_text):
    decoded_rules = []

    # Split the decision tree text into lines
    lines = rules_text.split('\n')

    # Process each line to decode the numeric feature values into their categorical labels
    for line in lines:
        # Replace encoded feature values with human-readable categories
        if 'Time Woken Up' in line:
            line = line.replace('0.50', reverse_encoding(0, time_woken_up_mapping))\
                       .replace('1.50', reverse_encoding(1, time_woken_up_mapping))\
                       .replace('2.50', reverse_encoding(2, time_woken_up_mapping))
        if 'Exercise Performed' in line:
            line = line.replace('0.50', reverse_encoding(0, exercise_performed_mapping))\
                       .replace('1.50', reverse_encoding(1, exercise_performed_mapping))
        if 'Traffic Condition' in line:
            line = line.replace('0.50', reverse_encoding(0, traffic_condition_mapping))\
                       .replace('1.50', reverse_encoding(1, traffic_condition_mapping))\
                       .replace('2.50', reverse_encoding(2, traffic_condition_mapping))
        if 'Healthiness Rating_meal' in line:
            line = line.replace('0.50', reverse_encoding(0, healthiness_rating_mapping))\
                       .replace('1.50', reverse_encoding(1, healthiness_rating_mapping))\
                       .replace('2.50', reverse_encoding(2, healthiness_rating_mapping))

        # Replace class labels (0, 1, 2) with Productivity Level
        if 'class' in line:
            # Extract the numeric class value (0, 1, 2)
            if 'class: 0' in line:
                line = line.replace('class: 0', f'class: {reverse_encoding(0, productivity_level_mapping)}')
            elif 'class: 1' in line:
                line = line.replace('class: 1', f'class: {reverse_encoding(1, productivity_level_mapping)}')
            elif 'class: 2' in line:
                line = line.replace('class: 2', f'class: {reverse_encoding(2, productivity_level_mapping)}')

        decoded_rules.append(line)

    return '\n'.join(decoded_rules)


# Decode and print the human-readable tree rules
decoded_rules = decode_tree_rules(tree_rules)  # assuming `tree_rules` contains the original decision tree output
print("Decoded Decision Tree Rules:")
print(decoded_rules)

joblib.dump(clf, 'decision_tree_model.pkl')

with open('decision_tree_rules.txt', 'w') as file:
    file.write(decoded_rules)

print("Model and rules have been saved successfully.")


Decoded Decision Tree Rules:
|--- Time Woken Up <= Exactly 7:00 AM
|   |--- Exercise Performed <= No
|   |   |--- Traffic Condition <= Moderate
|   |   |   |--- Traffic Condition <= Light
|   |   |   |   |--- Time Woken Up <= Before 7:00 AM
|   |   |   |   |   |--- Healthiness Rating_meal <= Moderately Healthy
|   |   |   |   |   |   |--- Healthiness Rating_meal <= Unhealthy
|   |   |   |   |   |   |   |--- class: Low
|   |   |   |   |   |   |--- Healthiness Rating_meal >  Unhealthy
|   |   |   |   |   |   |   |--- class: Medium
|   |   |   |   |   |--- Healthiness Rating_meal >  Moderately Healthy
|   |   |   |   |   |   |--- class: High
|   |   |   |   |--- Time Woken Up >  Before 7:00 AM
|   |   |   |   |   |--- Healthiness Rating_meal <= Moderately Healthy
|   |   |   |   |   |   |--- Healthiness Rating_meal <= Unhealthy
|   |   |   |   |   |   |   |--- class: High
|   |   |   |   |   |   |--- Healthiness Rating_meal >  Unhealthy
|   |   |   |   |   |   |   |--- class: High
|   |  

In [None]:
import joblib

# Save the model to the specified path with a filename
model_path = r"C:\Users\bida22-068\OneDrive - Botswana Accountancy College\IS ASSIGNENT DEIRDRE\decision_tree_model.pkl"
joblib.dump(clf, model_path)

print(f"Model saved to {model_path}")

# Save the rules to a specific path with a filename
rules_path = r"C:\Users\bida22-068\OneDrive - Botswana Accountancy College\IS ASSIGNENT DEIRDRE\decision_tree_rules.txt"

with open(rules_path, 'w') as file:
    file.write(decoded_rules)

print(f"Rules saved to {rules_path}")


Model saved to C:\Users\bida22-068\OneDrive - Botswana Accountancy College\IS ASSIGNENT DEIRDRE\decision_tree_model.pkl
Rules saved to C:\Users\bida22-068\OneDrive - Botswana Accountancy College\IS ASSIGNENT DEIRDRE\decision_tree_rules.txt


In [None]:
# Decision rules in dictionary format
rules = {
    'Before 7:00 AM': {
        'No': {
            'Moderate': {
                'Moderately Healthy': 'Medium',
                'Unhealthy': 'Low'
            },
            'Light': 'High'
        },
        'Yes': 'Low'  # If exercise is performed
    },
    'After 7:00 AM': {
        'No': {
            'Moderate': {
                'Moderately Healthy': 'High',
                'Unhealthy': 'High'
            },
            'Light': 'High',
            'Heavy': 'High'
        },
        'Yes': {
            'Moderately Healthy': 'High',
            'Unhealthy': 'High'
        }
    },
    'Exactly 7:00 AM': {
        'No': {
            'Light': {
                'Moderately Healthy': {
                    'No': 'High',
                    'Yes': 'High'
                }
            },
            'Heavy': 'High',
            'Moderate': {
                'Moderately Healthy': 'Medium',
                'Unhealthy': 'Low'
            }
        },
        'Yes': 'High'
    }
}


In [None]:
import json
# Save the decision rules as a JSON file
rules_path = r"C:\Users\bida22-068\OneDrive - Botswana Accountancy College\IS ASSIGNENT DEIRDRE\decision_tree_rules.json"

with open(rules_path, 'w') as file:
    json.dump(rules, file, indent=4)

print(f"Decision rules saved to {rules_path}")

Decision rules saved to C:\Users\bida22-068\OneDrive - Botswana Accountancy College\IS ASSIGNENT DEIRDRE\decision_tree_rules.json


In [None]:
# Extract the first row of the dataset
row = combined_df.iloc[0]
print(row)

# Extract feature values from the row
time_woken_up = row['Time Woken Up']
exercise_performed = row['Exercise Performed']
traffic_condition = row['Traffic Condition']
healthiness_rating = row['Healthiness Rating_meal']

# Map the raw values to standardized ones using the defined mappings
facts = {
    'Time Woken Up': time_woken_up_mapping.get(time_woken_up, 'Unknown'),  # Default to 'Unknown' if not mapped
    'Exercise Performed': exercise_performed_mapping.get(exercise_performed, 'Unknown'),
    'Traffic Condition': traffic_condition_mapping.get(traffic_condition, 'Unknown'),
    'Healthiness Rating_meal': healthiness_rating_mapping.get(healthiness_rating, 'Unknown')
}

print(facts)




Date                                                   2024-08-07 00:00:00
Time of Meal                                                      13:00:00
Type of Meal                                                         Lunch
Main Dish                                                            Pasta
Side Dish(es)                                                        Fruit
Beverage                                                             Water
Calories Consumed (kcal)_meal                                   432.102006
Healthiness Rating_meal                                                  0
Satisfaction Level                                               Satisfied
Mood Before Meal                                                    Hungry
Mood After Meal                                                  Satisfied
Location of Meal                                                      Home
Reason for Meal Choice                                             Craving
Company During Meal      

In [None]:
# Forward chaining example to predict productivity level
def forward_chaining(facts):
    # This is a simplified version of the forward chaining logic
    if facts['Time Woken Up'] == 'Before 7:00 AM':
        if facts['Exercise Performed'] == 'No':
            if facts['Traffic Condition'] == 'Moderate':
                if facts['Healthiness Rating_meal'] == 'Moderately Healthy':
                    return 'Medium'
                elif facts['Healthiness Rating_meal'] == 'Unhealthy':
                    return 'Low'
            elif facts['Traffic Condition'] == 'Light':
                return 'High'
        else:
            return 'Low'  # In case exercise was performed (simplified logic)
    elif facts['Time Woken Up'] == 'Exactly 7:00 AM':
        if facts['Exercise Performed'] == 'No':
            if facts['Traffic Condition'] == 'Light':
                return 'High'
            elif facts['Traffic Condition'] == 'Moderate':
                return 'Medium'
            elif facts['Traffic Condition'] == 'Heavy':
                return 'Low'
        else:  # Exercise was performed
            if facts['Traffic Condition'] == 'Light':
                return 'High'
            elif facts['Traffic Condition'] == 'Moderate':
                return 'Medium'
            elif facts['Traffic Condition'] == 'Heavy':
                return 'Low'
    elif facts['Time Woken Up'] == 'After 7:00 AM':
        if facts['Exercise Performed'] == 'No':
            if facts['Traffic Condition'] == 'Moderate':
                if facts['Healthiness Rating_meal'] == 'Moderately Healthy':
                    return 'Medium'
                elif facts['Healthiness Rating_meal'] == 'Unhealthy':
                    return 'Low'
            elif facts['Traffic Condition'] == 'Light':
                return 'High'
        else:  # Exercise was performed
            return 'Low'  # Simplified logic for after 7:00 AM

    return 'Unknown'  # Default if no conditions are met

# Run forward chaining with the mapped facts from the row
predicted_productivity = forward_chaining(facts)
print(f"Predicted Productivity Level: {predicted_productivity}")



Predicted Productivity Level: Low


In [None]:
def backward_chaining(goal, facts):
    # Check if the goal is achievable
    if goal == 'High':
        # Check conditions that could lead to 'High' productivity
        # These conditions depend on the rules you've set in forward chaining
        if facts['Time Woken Up'] == 'Before 7:00 AM' and facts['Exercise Performed'] == 'No' and facts['Traffic Condition'] == 'Light':
            return True
        elif facts['Time Woken Up'] == 'Exactly 7:00 AM' and facts['Exercise Performed'] == 'Yes' and facts['Traffic Condition'] == 'Light':
            return True
        elif facts['Time Woken Up'] == 'After 7:00 AM' and facts['Exercise Performed'] == 'No' and facts['Traffic Condition'] == 'Light':
            return True
        # Add more conditions here depending on your rules
    elif goal == 'Medium':
        # Conditions that could lead to 'Medium' productivity
        if facts['Time Woken Up'] == 'Before 7:00 AM' and facts['Exercise Performed'] == 'No' and facts['Traffic Condition'] == 'Moderate':
            return True
        elif facts['Time Woken Up'] == 'Exactly 7:00 AM' and facts['Exercise Performed'] == 'No' and facts['Traffic Condition'] == 'Moderate':
            return True
        elif facts['Time Woken Up'] == 'After 7:00 AM' and facts['Exercise Performed'] == 'No' and facts['Traffic Condition'] == 'Moderate':
            return True
        # Add more conditions for 'Medium' as needed
    elif goal == 'Low':
        # Conditions that could lead to 'Low' productivity
        if facts['Time Woken Up'] == 'Before 7:00 AM' and facts['Exercise Performed'] == 'Yes' and facts['Traffic Condition'] == 'Heavy':
            return True
        elif facts['Time Woken Up'] == 'Exactly 7:00 AM' and facts['Exercise Performed'] == 'Yes' and facts['Traffic Condition'] == 'Heavy':
            return True
        elif facts['Time Woken Up'] == 'After 7:00 AM' and facts['Exercise Performed'] == 'Yes' and facts['Traffic Condition'] == 'Heavy':
            return True
        elif facts['Time Woken Up'] == 'Before 7:00 AM' and facts['Exercise Performed'] == 'No' and facts['Traffic Condition'] == 'Moderate' and facts['Healthiness Rating_meal'] == 'Unhealthy':
            return True
        # Add more conditions for 'Low' as needed

    # If no conditions are met, return False
    return False

goal = 'High'
can_achieve_goal = backward_chaining(goal, facts)

print(f"Can we achieve {goal} productivity? {'Yes' if can_achieve_goal else 'No'}")


Can we achieve High productivity? No
