In [1]:

import pandas as pd

# Load the .csv file
file_path = "test.csv"
df = pd.read_csv(file_path)

# Idle.py functionality: converting 'Time' and calculating Idle status (from Sean's code)
df['Time'] = pd.to_datetime(df['Time'], format='mixed', errors='coerce')
df = df.sort_values(by=['Session Id', 'Time'])
df['Time_Diff_Minutes'] = df.groupby('Session Id')['Time'].diff().dt.total_seconds() / 60
df['Idle'] = df['Time_Diff_Minutes'] > 5

# Monitor.ipynb functionality: rolling monitor status (from Jocelyn's code)
def rolling_monitor_status(outcomes):
    monitor_status = []
    attempt_counts = []
    working_status = []
    struggle_status = []
    hint_abuse_status = []
    
    for i in range(len(outcomes)):
        last_10 = outcomes[max(0, i - 9): i + 1].astype(str)
        attempt_count = last_10.str.count("Attempt").sum()
        ok_count = last_10.str.count("OK").sum()
        error_count = last_10.str.count("ERROR").sum()
        initial_hint_count = last_10.str.count("INITIAL_HINT").sum()

        if i < 9:
            monitor_status.append(False)
            working_status.append(False)
            struggle_status.append(False)
            hint_abuse_status.append(False)
        else:
            monitor_status.append(ok_count <= 3)
            working_status.append(True if monitor_status[-1] == False else False)
            struggle_status.append(error_count >= 6)
            hint_abuse_status.append(error_count >= 3 and initial_hint_count >= 3)

        attempt_counts.append(attempt_count)
    
    return pd.DataFrame({
        'Monitor': monitor_status,
        'Working': working_status,
        'Struggle': struggle_status,
        'Hint Abuse': hint_abuse_status,
        'Attempt_Count': attempt_counts
    })

# Applying the monitor logic to each session
for session_id in df['Session Id'].unique():
    session_df = df[df['Session Id'] == session_id]
    temp = rolling_monitor_status(session_df['Outcome'])
    temp.index = session_df.index
    session_df[['Monitor', 'Working', 'Struggle', 'Hint Abuse', 'Attempt_Count']] = temp

    # Replace the session-specific dataframe back into the main dataframe
    df.loc[df['Session Id'] == session_id, ['Monitor', 'Working', 'Struggle', 'Hint Abuse', 'Attempt_Count']] = session_df[['Monitor', 'Working', 'Struggle', 'Hint Abuse', 'Attempt_Count']]

# Ensure the new columns are present and save the dataframe in a new .csv
output_file = "test-changed.csv"
df.to_csv(output_file, index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session_df[['Monitor', 'Working', 'Struggle', 'Hint Abuse', 'Attempt_Count']] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session_df[['Monitor', 'Working', 'Struggle', 'Hint Abuse', 'Attempt_Count']] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session_df[['Monitor', 'Working', '