In [None]:
# Efficient History Tracking Implementation
import pandas as pd

# Function to calculate the "Struggle" status based on conditions
def calculate_struggle_status(df, window_size=10, threshold=3, BKTparams=None):
    struggle_status = []
    attempt_window = [0] * window_size  # Initialize window for attempts (for detecting struggle over time)
    history = []
    
    # Define thresholds for struggle
    error_threshold = 2  # Placeholder for error count threshold
    familiarity_threshold = 0.4  # Placeholder for familiarity threshold
    mastery_threshold = 0.8  # Placeholder for mastery level threshold
    
    # Track the student's skill knowledge levels across steps
    onboardSkills = {}
    
    for i in range(len(df)):
        # Extract relevant columns from the DataFrame
        prev_p_know = df['CF (Skill Previous p-Known)'].iloc[i]
        new_p_know = df['CF (Skill New p-Known)'].iloc[i]
        attempts = df['Attempt At Step'].iloc[i]
        action_evaluation = df['Outcome'].iloc[i]  # Correct or incorrect
        
        skill_name = df['Problem Name'].iloc[i]  # Skill name
        
        # Initialize skill if not already tracked
        if skill_name not in onboardSkills:
            onboardSkills[skill_name] = BKTparams["p_know"]  # Initialize with default p_know
        
        # Update the student's knowledge using Bayesian Knowledge Tracing (BKT)
        p_know_tminus1 = onboardSkills[skill_name]
        p_slip = BKTparams["p_slip"]
        p_guess = BKTparams["p_guess"]
        p_transit = BKTparams["p_transit"]
        
        if action_evaluation.lower() == "correct":
            p_know_given_obs = (p_know_tminus1 * (1 - p_slip)) / ((p_know_tminus1 * (1 - p_slip)) + ((1 - p_know_tminus1) * p_guess))
        else:
            p_know_given_obs = (p_know_tminus1 * p_slip) / ((p_know_tminus1 * p_slip) + ((1 - p_know_tminus1) * (1 - p_guess)))

        onboardSkills[skill_name] = p_know_given_obs + (1 - p_know_given_obs) * p_transit
        
        # Add to attempt window and check struggle condition
        attemptCorrect = 1 if action_evaluation.lower() == "correct" else 0
        attempt_window.append(attemptCorrect)
        if len(attempt_window) > window_size:
            attempt_window.pop(0)
        
        # Struggle detection logic
        if (new_p_know < mastery_threshold or prev_p_know < mastery_threshold) and attempts >= threshold:
            struggle_status.append(True)
        else:
            struggle_status.append(False)
        
        # Store history as a dictionary for each student step
        history.append({
            'step': df['Step Name'].iloc[i],
            'skill': skill_name,
            'previous_p_know': prev_p_know,
            'new_p_know': new_p_know,
            'attempts': attempts,
            'action_evaluation': action_evaluation,
            'struggle': struggle_status[-1]
        })
    
    return struggle_status, history

# Load the CSV again for adding the "Struggle" column
df = pd.read_csv('new_test.csv')

# Define BKT parameters
BKTparams = {
    "p_transit": 0.2,
    "p_slip": 0.1,
    "p_guess": 0.2,
    "p_know": 0.25  # Initial knowledge probability
}

# Apply the struggle calculation to the DataFrame
struggle_status, history = calculate_struggle_status(df, window_size=10, threshold=3, BKTparams=BKTparams)

# Add the "Struggle" status to the DataFrame
df['Struggle'] = struggle_status

# Save the updated DataFrame with the new 'Struggle' column
output_file_path = 'new_test-struggle.csv'
df.to_csv(output_file_path, index=False)

output_file_path  # Return the path to the new file
