In [21]:
import csv
from collections import defaultdict

# Step 1: Parse the CSV file and extract the necessary information
def parse_csv(file_path):
    data = []
    with open(file_path, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            if row['lifecycle'] == 'start':  # Consider only activities with lifecycle "start"
                data.append(row['name'])
    return data

# Step 2: Build a Markov model
def build_markov_model(data):
    transitions = defaultdict(list)
    for i in range(len(data) - 1):
        current_activity = data[i]
        next_activity = data[i + 1]
        transitions[current_activity].append(next_activity)
    return transitions

# Step 3: Predict the sequence of activities for the whole day
def predict_activities(markov_model, initial_activity, num_steps=24):
    current_activity = initial_activity
    predicted_activities = [current_activity]
    for _ in range(num_steps):
        next_activities = markov_model[current_activity]
        if next_activities:
            next_activity = next_activities[0]  # Choosing the most probable next activity
        else:
            break  # Stop prediction if there are no more transitions
        predicted_activities.append(next_activity)
        current_activity = next_activity
    return predicted_activities

# Step 4: Compute accuracy
def compute_accuracy(predicted_activities, actual_activities):
    total_correct = sum(1 for pred, actual in zip(predicted_activities, actual_activities) if pred == actual)
    total = len(actual_activities)
    accuracy = total_correct / total
    return accuracy

# Main function
def main():
    file_path = 'data.csv'
    data = parse_csv(file_path)
    markov_model = build_markov_model(data)
    
    # Assuming the first activity in the data is the initial activity
    initial_activity = data[0]
    
    # Predict activities for the whole day
    predicted_activities = predict_activities(markov_model, initial_activity)
    
    # Load actual activities for comparison
    actual_activities = parse_csv(file_path)
    
    # Compute accuracy
    accuracy = compute_accuracy(predicted_activities, actual_activities)
    
    # Print predicted activities and accuracy
    print("Predicted activities for the whole day:")
    for hour, activity in enumerate(predicted_activities):
        print(f"Hour {hour}: {activity}")
    print(f"Accuracy: {accuracy:.2%}")

if __name__ == "__main__":
    main()


Predicted activities for the whole day:

Hour 0: Start

Hour 1: washing

Hour 2: watchingtv

Hour 3: toilet

Hour 4: washing

Hour 5: watchingtv

Hour 6: toilet

Hour 7: washing

Hour 8: watchingtv

Hour 9: toilet

Hour 10: washing

Hour 11: watchingtv

Hour 12: toilet

Hour 13: washing

Hour 14: watchingtv

Hour 15: toilet

Hour 16: washing

Hour 17: watchingtv

Hour 18: toilet

Hour 19: washing

Hour 20: watchingtv

Hour 21: toilet

Hour 22: washing

Hour 23: watchingtv

Hour 24: toilet

Accuracy: 1.01%
