In [1]:
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
from docx import Document
from docx.shared import Inches
import json
from io import BytesIO
import os

In [2]:
import urllib.parse as up

uri = "hidden"
result = up.urlparse(uri)

DB_CONFIG = {
    "dbname": result.path.lstrip("/"),          # 'postgres'
    "user":   result.username,                  # 'postgres'
    "password": result.password,                # 'MY_SECRET_PASSWORD'
    "host":   result.hostname,                  # 'db.abcd1234.supabase.co'
    "port":   result.port or 5432
}

In [3]:
# List of participant names you want to generate reports for
PARTICIPANT_NAMES = [f'P{i}' for i in range(1, 11)]

In [6]:
def get_db_connection():
    """Establishes and returns a database connection."""
    try:
        conn = psycopg2.connect(**DB_CONFIG)
        return conn
    except psycopg2.OperationalError as e:
        print(f"Error: Could not connect to the database. Please check DB_CONFIG.")
        print(f"Details: {e}")
        return None

def fetch_lookup_data(conn):
    """Fetches static data like KC names, Goal names, etc., into dictionaries for easy lookup."""
    print("Fetching lookup data (KCs, Goals, Metrics)...")
    kcs = pd.read_sql("SELECT id, kc_identifier, name FROM kcs", conn).set_index('id').to_dict('index')
    goals = pd.read_sql("SELECT id, name, description FROM goals", conn).set_index('id').to_dict('index')
    metrics = pd.read_sql("SELECT id, name FROM metrics", conn).set_index('id').to_dict('index')
    return {"kcs": kcs, "goals": goals, "metrics": metrics}

def create_metric_plot(df_metric_history, lookup_tables):
    """Creates a line chart of metric performance over decisions and returns it as an image stream."""
    if df_metric_history.empty:
        return None

    # <<< FIX: Ensure 'value' column is numeric
    df_metric_history['value'] = pd.to_numeric(df_metric_history['value'], errors='coerce')
    df_metric_history.dropna(subset=['value'], inplace=True)
    
    # Replace metric_id with metric_name for the legend
    df_metric_history['metric_name'] = df_metric_history['metric_id'].apply(lambda x: lookup_tables['metrics'].get(x, {}).get('name', f'Metric {x}'))
    
    fig, ax = plt.subplots(figsize=(8, 4))
    
    # Pivot data to plot each metric as a separate line
    pivot_df = df_metric_history.pivot_table(index='decision_number', columns='metric_name', values='value', aggfunc='last')
    pivot_df.plot(kind='line', marker='o', ax=ax)
    
    ax.set_title("Metric Performance Over Decisions")
    ax.set_xlabel("Decision Number")
    ax.set_ylabel("Metric Value")
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(title='Metrics')
    plt.tight_layout()
    
    # Save plot to a memory buffer
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    plt.close(fig)
    
    return img_buffer

def parse_and_plot_kcs(dialogue_history_json, target_kcs, lookup_tables):
    """Parses dialogue history to track KC scores and generates a plot."""
    if not dialogue_history_json:
        return None, "No dialogue history available for this goal."

    # Initialize progress tracker
    kc_progress = {kc_id: [0] for kc_id in target_kcs}
    
    try:
        # Loop through the conversation turns
        for i, turn in enumerate(dialogue_history_json):
            if turn.get('role') == 'user' and 'User chose decision index' in turn.get('content', ''):
                decision_index_str = turn['content'].split(': ')[-1].split(':')[0].strip() # Handle cases like "User chose decision index: 0: '...'"
                decision_index = int(decision_index_str)
                
                # The assistant's turn with the question is the one before the user's choice
                assistant_turn = dialogue_history_json[i-1]
                assistant_content = json.loads(assistant_turn['content'])
                
                options = assistant_content.get('decisionPoint', {}).get('options', [])
                if decision_index < len(options):
                    chosen_option = options[decision_index]
                    kc_impacts = chosen_option.get('kc_impacts', [])
                    
                    # Update scores for all KCs before applying new impact
                    for kc_id in kc_progress:
                        last_score = kc_progress[kc_id][-1]
                        kc_progress[kc_id].append(last_score)

                    # Apply the new impacts
                    for impact in kc_impacts:
                        kc_identifier = impact['kc_identifier']
                        score_change = impact.get('score', 0)
                        
                        # Find the kc_id from the identifier
                        for k_id, k_info in lookup_tables['kcs'].items():
                            if k_info['kc_identifier'] == kc_identifier:
                                if k_id in kc_progress:
                                    kc_progress[k_id][-1] += score_change
                                break

    except (json.JSONDecodeError, KeyError, IndexError, TypeError, ValueError) as e:
        return None, f"Could not parse KC data from dialogue history. Error: {e}"

    if not any(len(v) > 1 for v in kc_progress.values()):
        return None, "No scorable KC decisions were made in this goal."

    # Prepare DataFrame for plotting
    plot_data = []
    for kc_id, scores in kc_progress.items():
        kc_name = lookup_tables['kcs'].get(kc_id, {}).get('name', f'KC {kc_id}')
        for i, score in enumerate(scores):
            plot_data.append({'decision': i, 'kc_name': kc_name, 'score': score})
    
    df_plot = pd.DataFrame(plot_data)

    fig, ax = plt.subplots(figsize=(8, 4))
    pivot_df = df_plot.pivot_table(index='decision', columns='kc_name', values='score')
    pivot_df.plot(kind='line', marker='o', ax=ax)
    
    ax.set_title("Knowledge Component (KC) Learning Curve")
    ax.set_xlabel("Decision Number")
    ax.set_ylabel("Cumulative KC Score")
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(title='Knowledge Components')
    plt.tight_layout()
    
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    plt.close(fig)
    
    return img_buffer, "KC learning curve generated."


def add_decision_path_to_doc(doc, dialogue_history_json, lookup_tables):
    """Parses dialogue history and adds the decision path to the Word document."""
    if not dialogue_history_json:
        doc.add_paragraph("No dialogue history available.")
        return
        
    doc.add_heading("C. Decision Path & Rationale", level=3)
    
    decision_counter = 1
    try:
        for i, turn in enumerate(dialogue_history_json):
            if turn.get('role') == 'assistant':
                assistant_content = json.loads(turn['content'])
                decision_point = assistant_content.get('decisionPoint')
                if decision_point and 'question' in decision_point and i + 1 < len(dialogue_history_json):
                    # This is a question turn. The user's answer is next.
                    user_turn = dialogue_history_json[i+1]
                    if 'User chose decision index' in user_turn.get('content', ''):
                        decision_index_str = user_turn['content'].split(': ')[-1].split(':')[0].strip() # Handle cases like "User chose decision index: 0: '...'"
                        decision_index = int(decision_index_str)
                        chosen_option = decision_point['options'][decision_index]
                        
                        doc.add_paragraph(f"Decision {decision_counter}:", style='List Bullet')
                        p = doc.add_paragraph()
                        p.add_run("Scenario: ").bold = True
                        p.add_run(decision_point['question'])

                        p = doc.add_paragraph()
                        p.add_run("Participant's Choice: ").bold = True
                        p.add_run(chosen_option['text'])
                        
                        kc_impact_str = ", ".join([f"{imp['score']} to {imp['kc_identifier']}" for imp in chosen_option.get('kc_impacts', [])])
                        p = doc.add_paragraph()
                        p.add_run("Immediate KC Impact: ").bold = True
                        p.add_run(kc_impact_str if kc_impact_str else "None")
                        
                        doc.add_paragraph() # Add some space
                        decision_counter += 1

    except (json.JSONDecodeError, KeyError, IndexError, TypeError, ValueError) as e:
        doc.add_paragraph(f"Error parsing decision path: {e}")

def generate_report_for_user(user_name, conn, lookup_tables):
    """Main function to generate a single report for a given user."""
    print(f"\n--- Generating report for {user_name} ---")
    
    # 1. Fetch all data for this specific user
    user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
    if user_info.empty:
        print(f"User '{user_name}' not found in the database. Skipping.")
        return
    user_id = user_info['id'].iloc[0]
    
    df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
    df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
    df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
    df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
    df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)
    
    # <<< FIX: Convert score columns to numeric types right after fetching data
    df_kc_scores['current_score'] = pd.to_numeric(df_kc_scores['current_score'], errors='coerce')
    df_kc_scores.dropna(subset=['current_score'], inplace=True)
    
    df_metric_scores['current_value'] = pd.to_numeric(df_metric_scores['current_value'], errors='coerce')
    df_metric_scores.dropna(subset=['current_value'], inplace=True)

    # 2. Start creating the Word Document
    doc = Document()
    doc.add_heading('AI Entrepreneurship Game: Learning Report', 0)
    doc.add_heading(f"Participant: {user_name}", level=1)
    doc.add_paragraph(f"Participant ID: {user_id}")
    
    # 3. Section 1: Executive Summary
    doc.add_heading("1. Executive Summary & Overall Performance", level=2)
    
    # Final Metrics
    doc.add_paragraph().add_run("Final Key Metrics:").bold = True
    if df_metric_scores.empty:
        doc.add_paragraph("No final metric scores available.", style='List Bullet')
    else:
        for _, row in df_metric_scores.iterrows():
            metric_name = lookup_tables['metrics'].get(row['metric_id'], {}).get('name', f'Metric {row["metric_id"]}')
            doc.add_paragraph(f"{metric_name}: {row['current_value']:.2f}", style='List Bullet')
        
    # KC Strengths/Weaknesses
    df_kc_scores_named = df_kc_scores.copy()
    df_kc_scores_named['kc_name'] = df_kc_scores_named['kc_id'].apply(lambda x: lookup_tables['kcs'].get(x, {}).get('name', f'KC {x}'))
    
    doc.add_paragraph().add_run("Key Strengths (Top KCs):").bold = True
    if df_kc_scores_named.empty:
        doc.add_paragraph("No KC scores available.", style='List Bullet')
    else:
        for _, row in df_kc_scores_named.nlargest(3, 'current_score').iterrows():
            doc.add_paragraph(f"{row['kc_name']} - Score: {row['current_score']}", style='List Bullet')

    doc.add_paragraph().add_run("Areas for Improvement (Lowest KCs):").bold = True
    if df_kc_scores_named.empty:
        doc.add_paragraph("No KC scores available.", style='List Bullet')
    else:
        for _, row in df_kc_scores_named.nsmallest(3, 'current_score').iterrows():
            doc.add_paragraph(f"{row['kc_name']} - Score: {row['current_score']}", style='List Bullet')
        
    # 4. Section 2: Goal-by-Goal Analysis
    doc.add_heading("2. Detailed Goal-by-Goal Analysis", level=2)

    if df_user_goals.empty:
        doc.add_paragraph("This participant has not attempted any goals yet.")
    else:
        for _, goal_row in df_user_goals.iterrows():
            goal_id = goal_row['goal_id']
            goal_name = lookup_tables['goals'].get(goal_id, {}).get('name', f'Goal {goal_id}')
            
            doc.add_heading(f"Goal: {goal_name}", level=3)
            doc.add_paragraph(f"Status: {goal_row['status']}, Attempts: {goal_row['attempts_for_current_goal_cycle']}")
            
            # A. Metric Performance Curve
            doc.add_heading("A. Metric Performance Curve", level=4)
            df_goal_metrics = df_metric_history[df_metric_history['goal_id'] == goal_id]
            metric_plot_img = create_metric_plot(df_goal_metrics, lookup_tables)
            if metric_plot_img:
                doc.add_picture(metric_plot_img, width=Inches(6.0))
            else:
                doc.add_paragraph("No metric history recorded for this goal.")

            # B. KC Learning Curve
            doc.add_heading("B. Knowledge Component (KC) Learning Curve", level=4)
            target_kcs_for_goal = df_goal_kcs[df_goal_kcs['goal_id'] == goal_id]['kc_id'].tolist()
            kc_plot_img, message = parse_and_plot_kcs(goal_row['dialogue_history'], target_kcs_for_goal, lookup_tables)
            if kc_plot_img:
                doc.add_picture(kc_plot_img, width=Inches(6.0))
            else:
                doc.add_paragraph(message)
                
            # C. Decision Path
            add_decision_path_to_doc(doc, goal_row['dialogue_history'], lookup_tables)

    # 5. Section 3: Final Entrepreneurial Profile (Simplified logic)
    doc.add_heading("3. Final Entrepreneurial Profile", level=2)
    
    playstyle = "Balanced Entrepreneur"
    if not df_kc_scores_named.empty:
        top_kc = df_kc_scores_named.nlargest(1, 'current_score')
        if not top_kc.empty:
            top_kc_name = top_kc['kc_name'].iloc[0]
            if any(style in top_kc_name for style in ['Trust', 'Reputation', 'Ethical']):
                playstyle = "The Community Builder"
            elif any(style in top_kc_name for style in ['Revenue', 'Pricing', 'profit']):
                playstyle = "The Profit Maximizer"
            
    doc.add_paragraph().add_run("Identified Playstyle: ").bold = True
    doc.add_paragraph(playstyle)
    doc.add_paragraph().add_run("Recommendations:").bold = True
    doc.add_paragraph("Based on the KC scores, this participant could benefit from focusing on scenarios that challenge their lower-scoring skills to develop a more well-rounded entrepreneurial profile.")

    # 6. Save the document
    file_name = f"Learning_Report_{user_name}.docx"
    doc.save(file_name)
    print(f"Successfully generated report: {file_name}")

In [7]:
if __name__ == "__main__":
    conn = get_db_connection()
    if conn:
        try:
            lookup_data = fetch_lookup_data(conn)
            for name in PARTICIPANT_NAMES:
                generate_report_for_user(name, conn, lookup_data)
        finally:
            conn.close()
            print("\nDatabase connection closed.")

Fetching lookup data (KCs, Goals, Metrics)...

--- Generating report for P1 ---


  kcs = pd.read_sql("SELECT id, kc_identifier, name FROM kcs", conn).set_index('id').to_dict('index')
  goals = pd.read_sql("SELECT id, name, description FROM goals", conn).set_index('id').to_dict('index')
  metrics = pd.read_sql("SELECT id, name FROM metrics", conn).set_index('id').to_dict('index')
  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM go

Successfully generated report: Learning_Report_P1.docx

--- Generating report for P2 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P2.docx

--- Generating report for P3 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P3.docx

--- Generating report for P4 ---
Successfully generated report: Learning_Report_P4.docx

--- Generating report for P5 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)
  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SE

Successfully generated report: Learning_Report_P5.docx

--- Generating report for P6 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P6.docx

--- Generating report for P7 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P7.docx

--- Generating report for P8 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P8.docx

--- Generating report for P9 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P9.docx

--- Generating report for P10 ---


  user_info = pd.read_sql("SELECT id FROM users WHERE name = %(name)s", conn, params={"name": user_name})
  df_user_goals = pd.read_sql("SELECT * FROM user_goals WHERE user_id = %(user_id)s ORDER BY goal_id", conn, params={"user_id": str(user_id)})
  df_metric_history = pd.read_sql("SELECT * FROM user_metric_history WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_kc_scores = pd.read_sql("SELECT * FROM user_kc_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_metric_scores = pd.read_sql("SELECT * FROM user_metric_scores WHERE user_id = %(user_id)s", conn, params={"user_id": str(user_id)})
  df_goal_kcs = pd.read_sql("SELECT * FROM goal_kcs", conn)


Successfully generated report: Learning_Report_P10.docx

Database connection closed.


In [11]:
import pandas as pd
from docx import Document
from docx.shared import Pt
import json

# --- Configuration & Business Logic ---

# This dictionary encodes the logic from your route.ts file
METRIC_WEIGHTS = {
    "Revenue": 450,
    "Customer Satisfaction": 2,
    "Reputation": 0.15,
    "Ethical Decision Making": 2,
    "Risk-Taking": 2,
    "default": 2
}

PARTICIPANT_NAMES = [f'P{i}' for i in range(1, 11)]

def load_data():
    """Loads all necessary CSV files into pandas DataFrames."""
    print("Loading data from CSV files...")
    try:
        data = {
            'users': pd.read_csv('users_rows.csv'),
            'user_goals': pd.read_csv('user_goals_rows.csv'),
            'user_kc_scores': pd.read_csv('user_kc_scores_rows.csv'),
            'user_metric_scores': pd.read_csv('user_metric_scores_rows.csv'),
            'kcs': pd.read_csv('kcs_rows.csv'),
            'metrics': pd.read_csv('metrics_rows.csv'),
            'goals': pd.read_csv('goals_rows.csv'),
            'kc_metric_effects': pd.read_csv('kc_metric_effects_rows.csv')
        }
        print("Data loaded successfully.")
        return data
    except FileNotFoundError as e:
        print(f"Error: Could not find a required CSV file. {e}")
        return None

def calculate_peer_averages(df_metric_scores, df_kc_scores):
    """Calculates the average scores across all participants."""
    avg_metrics = df_metric_scores.groupby('metric_id')['current_value'].mean()
    avg_kcs = df_kc_scores.groupby('kc_id')['current_score'].mean()
    return avg_metrics, avg_kcs

def get_metric_change(kc_id, score_change, data_tables):
    """Calculates the metric changes for a given KC score change based on business logic."""
    metric_changes = []
    # Find which metrics this KC affects
    affected_links = data_tables['kc_metric_effects'][data_tables['kc_metric_effects']['kc_id'] == kc_id]
    
    for _, link in affected_links.iterrows():
        metric_id = link['metric_id']
        metric_info = data_tables['metrics'].set_index('id').loc[metric_id]
        metric_name = metric_info['name']
        
        weight = METRIC_WEIGHTS.get(metric_name, METRIC_WEIGHTS['default'])
        change = score_change * weight
        metric_changes.append({'metric_name': metric_name, 'change': change})
        
    return metric_changes

def parse_final_attempt(dialogue_history_json, data_tables):
    """Parses the dialogue history for the final attempt to get KC and metric changes."""
    kc_changes_by_decision = {}
    
    if not isinstance(dialogue_history_json, str):
        return kc_changes_by_decision

    try:
        history = json.loads(dialogue_history_json)
        decision_counter = 0
        for i, turn in enumerate(history):
            if turn.get('role') == 'user' and 'User chose decision index' in turn.get('content', ''):
                decision_counter += 1
                assistant_turn = history[i-1]
                assistant_content = json.loads(assistant_turn['content'])
                decision_point = assistant_content.get('decisionPoint', {})
                
                decision_index_str = turn['content'].split(': ')[-1].split(':')[0].strip()
                decision_index = int(decision_index_str)
                chosen_option = decision_point.get('options', [])[decision_index]

                impacts = []
                for impact in chosen_option.get('kc_impacts', []):
                    kc_identifier = impact['kc_identifier']
                    score_change = impact.get('score', 0)
                    
                    # Find kc_id and kc_name
                    kc_row = data_tables['kcs'][data_tables['kcs']['kc_identifier'] == kc_identifier]
                    if not kc_row.empty:
                        kc_id = kc_row.iloc[0]['id']
                        kc_name = kc_row.iloc[0]['name']
                        metric_deltas = get_metric_change(kc_id, score_change, data_tables)
                        impacts.append({
                            'kc_id': kc_id,
                            'kc_name': kc_name,
                            'score_change': score_change,
                            'metric_impacts': metric_deltas
                        })
                kc_changes_by_decision[decision_counter] = impacts
    except (json.JSONDecodeError, KeyError, IndexError, TypeError, ValueError) as e:
        print(f"Warning: Could not parse dialogue history. {e}")

    return kc_changes_by_decision

def generate_report_for_user(user_info, data, peer_averages):
    """Generates a single DOCX report for a given user."""
    user_id = user_info['id']
    user_name = user_info['name']
    
    print(f"\n--- Generating report for {user_name} ---")

    # Filter data for the current user
    user_goals = data['user_goals'][data['user_goals']['user_id'] == user_id]
    user_kc_scores = data['user_kc_scores'][data['user_kc_scores']['user_id'] == user_id]
    user_metric_scores = data['user_metric_scores'][data['user_metric_scores']['user_id'] == user_id]

    doc = Document()
    doc.add_heading('AI Entrepreneurship Game: Learning Report', 0)
    doc.add_heading(f"Participant: {user_name}", level=1)
    
    # --- Section 1: Executive Summary ---
    doc.add_heading("1. Executive Summary & Overall Performance", level=2)
    doc.add_paragraph().add_run("Final Key Metrics (vs. Peer Average):").bold = True
    
    for _, row in user_metric_scores.iterrows():
        metric_id = row['metric_id']
        metric_name = data['metrics'].set_index('id').loc[metric_id, 'name']
        peer_avg = peer_averages['metrics'].get(metric_id, 0)
        doc.add_paragraph(f"{metric_name}: {row['current_value']:.2f} (Peer Avg: {peer_avg:.2f})", style='List Bullet')

    doc.add_paragraph().add_run("Key Strengths (Top KCs):").bold = True
    for _, row in user_kc_scores.nlargest(3, 'current_score').iterrows():
        kc_name = data['kcs'].set_index('id').loc[row['kc_id'], 'name']
        doc.add_paragraph(f"{kc_name} - Final Score: {row['current_score']}", style='List Bullet')
        
    doc.add_paragraph().add_run("Areas for Improvement (Lowest KCs):").bold = True
    for _, row in user_kc_scores.nsmallest(3, 'current_score').iterrows():
        kc_name = data['kcs'].set_index('id').loc[row['kc_id'], 'name']
        doc.add_paragraph(f"{kc_name} - Final Score: {row['current_score']}", style='List Bullet')
        
    # --- Section 2: Reconstructed Decision Analysis ---
    doc.add_heading("2. Decision & KC Impact Analysis", level=2)

    for _, goal_row in user_goals.iterrows():
        goal_id = goal_row['goal_id']
        goal_name = data['goals'].set_index('id').loc[goal_id, 'name']
        
        doc.add_heading(f"Goal: {goal_name}", level=3)
        doc.add_paragraph(f"Status: {goal_row['status']}, Attempts: {goal_row['attempts_for_current_goal_cycle']}")
        
        # Step 1: Analyze the final, known attempt
        final_attempt_impacts = parse_final_attempt(goal_row['dialogue_history'], data)
        
        # Sum up changes from the final attempt
        final_attempt_kc_delta = pd.Series(dtype=float)
        for _, impacts in final_attempt_impacts.items():
            for impact in impacts:
                final_attempt_kc_delta[impact['kc_id']] = final_attempt_kc_delta.get(impact['kc_id'], 0) + impact['score_change']
        
        # Step 2: Calculate the total change needed in the missing attempts
        kc_scores_for_goal = user_kc_scores[user_kc_scores['kc_id'].isin(final_attempt_kc_delta.index)]
        kc_delta_for_missing_attempts = pd.Series(dtype=float)
        for _, kc_row in kc_scores_for_goal.iterrows():
            kc_id = kc_row['kc_id']
            final_score = kc_row['current_score']
            change_from_last_attempt = final_attempt_kc_delta.get(kc_id, 0)
            kc_delta_for_missing_attempts[kc_id] = final_score - change_from_last_attempt

        # Step 3: Reconstruct and combine with known data
        num_attempts = goal_row['attempts_for_current_goal_cycle']
        num_decisions_per_attempt = 3
        num_missing_decisions = (num_attempts - 1) * num_decisions_per_attempt
        
        table = doc.add_table(rows=1, cols=4)
        table.style = 'Table Grid'
        hdr_cells = table.rows[0].cells
        hdr_cells[0].text = 'Decision #'
        hdr_cells[1].text = 'KC Impacted'
        hdr_cells[2].text = 'KC Score Change'
        hdr_cells[3].text = 'Metric Change'

        # Reconstruct missing decisions
        if num_missing_decisions > 0:
            for decision_num in range(1, num_missing_decisions + 1):
                for kc_id, total_change in kc_delta_for_missing_attempts.items():
                    kc_name = data['kcs'].set_index('id').loc[kc_id, 'name']
                    # Distribute the change evenly
                    score_change_per_step = total_change / num_missing_decisions
                    metric_changes = get_metric_change(kc_id, score_change_per_step, data)
                    
                    row_cells = table.add_row().cells
                    row_cells[0].text = f"{decision_num}"
                    row_cells[1].text = kc_name
                    row_cells[2].text = f"{score_change_per_step:+.2f}"
                    row_cells[3].text = "\n".join([f"{mc['change']:+.2f} {mc['metric_name']}" for mc in metric_changes])

        # Add known decisions from final attempt
        start_decision = num_missing_decisions + 1
        for i, impacts in final_attempt_impacts.items():
            decision_num = start_decision + i -1
            if not impacts:
                row_cells = table.add_row().cells
                row_cells[0].text = str(decision_num)
                row_cells[1].text = "No KC Impact"
            for impact in impacts:
                row_cells = table.add_row().cells
                row_cells[0].text = str(decision_num)
                row_cells[1].text = impact['kc_name']
                row_cells[2].text = f"{impact['score_change']:+.2f}"
                row_cells[3].text = "\n".join([f"{mc['change']:+.2f} {mc['metric_name']}" for mc in impact['metric_impacts']])
    
    # Save the document
    file_name = f"Learning_Report_{user_name}.docx"
    doc.save(file_name)
    print(f"Successfully generated report: {file_name}")


if __name__ == "__main__":
    all_data = load_data()
    if all_data:
        # Pre-process data types
        all_data['user_kc_scores']['current_score'] = pd.to_numeric(all_data['user_kc_scores']['current_score'])
        all_data['user_metric_scores']['current_value'] = pd.to_numeric(all_data['user_metric_scores']['current_value'])
        
        peer_avg_metrics, peer_avg_kcs = calculate_peer_averages(all_data['user_metric_scores'], all_data['user_kc_scores'])
        
        # Create a user map from name to ID
        user_map = all_data['users'][all_data['users']['name'].isin(PARTICIPANT_NAMES)]
        
        for _, user_row in user_map.iterrows():
            generate_report_for_user(user_row, all_data, {'metrics': peer_avg_metrics, 'kcs': peer_avg_kcs})

Loading data from CSV files...
Data loaded successfully.

--- Generating report for P9 ---
Successfully generated report: Learning_Report_P9.docx

--- Generating report for P8 ---
Successfully generated report: Learning_Report_P8.docx

--- Generating report for P7 ---
Successfully generated report: Learning_Report_P7.docx

--- Generating report for P6 ---
Successfully generated report: Learning_Report_P6.docx

--- Generating report for P5 ---
Successfully generated report: Learning_Report_P5.docx

--- Generating report for P4 ---
Successfully generated report: Learning_Report_P4.docx

--- Generating report for P3 ---
Successfully generated report: Learning_Report_P3.docx

--- Generating report for P2 ---
Successfully generated report: Learning_Report_P2.docx

--- Generating report for P10 ---
Successfully generated report: Learning_Report_P10.docx

--- Generating report for P1 ---
Successfully generated report: Learning_Report_P1.docx


In [12]:
import pandas as pd
from docx import Document
from docx.shared import Pt, Inches
import json
import matplotlib.pyplot as plt
from io import BytesIO

# --- Configuration & Business Logic ---
METRIC_WEIGHTS = {
    "Revenue": 450,
    "Customer Satisfaction": 2,
    "Reputation": 0.15,
    "Ethical Decision Making": 2,
    "Risk-Taking": 2,
    "default": 2
}
PARTICIPANT_NAMES = [f'P{i}' for i in range(1, 11)]

def load_data():
    """Loads all necessary CSV files into pandas DataFrames."""
    print("Loading data from CSV files...")
    try:
        data = {
            'users': pd.read_csv('users_rows.csv'),
            'user_goals': pd.read_csv('user_goals_rows.csv'),
            'user_kc_scores': pd.read_csv('user_kc_scores_rows.csv'),
            'user_metric_scores': pd.read_csv('user_metric_scores_rows.csv'),
            'kcs': pd.read_csv('kcs_rows.csv'),
            'metrics': pd.read_csv('metrics_rows.csv'),
            'goals': pd.read_csv('goals_rows.csv'),
            'kc_metric_effects': pd.read_csv('kc_metric_effects_rows.csv')
        }
        print("Data loaded successfully.")
        return data
    except FileNotFoundError as e:
        print(f"Error: Could not find a required CSV file. {e}")
        return None

def calculate_peer_stats(df_metric_scores, df_kc_scores, data_tables): # <<< NEW FEATURE
    """Calculates min, average, and max scores across all participants."""
    peer_metrics = df_metric_scores.groupby('metric_id')['current_value'].agg(['min', 'mean', 'max'])
    
    # Calculate total KC score per user for leaderboard
    user_total_kc = df_kc_scores.groupby('user_id')['current_score'].sum().reset_index()
    user_total_kc = user_total_kc.merge(data_tables['users'][['id', 'name']], left_on='user_id', right_on='id')
    leaderboard = user_total_kc[['name', 'current_score']].sort_values(by='current_score', ascending=False)
    
    return peer_metrics, leaderboard

def get_metric_change(kc_id, score_change, data_tables):
    """Calculates the metric changes for a given KC score change based on business logic."""
    metric_changes = []
    affected_links = data_tables['kc_metric_effects'][data_tables['kc_metric_effects']['kc_id'] == kc_id]
    
    for _, link in affected_links.iterrows():
        metric_id = link['metric_id']
        metric_info = data_tables['metrics'].set_index('id').loc[metric_id]
        metric_name = metric_info['name']
        
        weight = METRIC_WEIGHTS.get(metric_name, METRIC_WEIGHTS['default'])
        change = score_change * weight
        metric_changes.append({'metric_name': metric_name, 'change': change})
        
    return metric_changes

def parse_final_attempt(dialogue_history_json, data_tables):
    """Parses the dialogue history for the final attempt to get KC and metric changes."""
    kc_changes_by_decision = {}
    
    if not isinstance(dialogue_history_json, str):
        return kc_changes_by_decision

    try:
        history = json.loads(dialogue_history_json)
        decision_counter = 0
        for i, turn in enumerate(history):
            if turn.get('role') == 'user' and 'User chose decision index' in turn.get('content', ''):
                decision_counter += 1
                assistant_turn = history[i-1]
                assistant_content = json.loads(assistant_turn['content'])
                decision_point = assistant_content.get('decisionPoint', {})
                
                # <<< BUG FIX: Make parsing robust to different dialogue formats
                content_part = turn['content'].split(': ', 1)[1]
                decision_index_str = content_part.split(':')[0].strip()
                decision_index = int(decision_index_str)
                
                chosen_option = decision_point.get('options', [])[decision_index]

                impacts = []
                for impact in chosen_option.get('kc_impacts', []):
                    kc_identifier = impact['kc_identifier']
                    score_change = impact.get('score', 0)
                    
                    kc_row = data_tables['kcs'][data_tables['kcs']['kc_identifier'] == kc_identifier]
                    if not kc_row.empty:
                        kc_id = kc_row.iloc[0]['id']
                        kc_name = kc_row.iloc[0]['name']
                        metric_deltas = get_metric_change(kc_id, score_change, data_tables)
                        impacts.append({
                            'kc_id': kc_id, 'kc_name': kc_name,
                            'score_change': score_change, 'metric_impacts': metric_deltas
                        })
                kc_changes_by_decision[decision_counter] = impacts
    except Exception as e:
        print(f"Warning: Could not parse dialogue history for a goal. Error: {e}")

    return kc_changes_by_decision

def create_cumulative_kc_plot(decision_data): # <<< NEW FEATURE
    """Creates a line chart of cumulative KC scores over decisions."""
    if not decision_data:
        return None
        
    df = pd.DataFrame(decision_data)
    
    # Calculate cumulative scores
    df['cumulative_score'] = df.groupby('kc_id')['score_change'].cumsum()
    
    fig, ax = plt.subplots(figsize=(8, 4))
    
    # Pivot for plotting, using KC ID for shorter labels
    pivot_df = df.pivot_table(index='decision_num', columns='kc_id', values='cumulative_score')
    pivot_df.plot(kind='line', marker='o', ax=ax)
    
    ax.set_title("Cumulative KC Score Movement per Goal")
    ax.set_xlabel("Decision Number")
    ax.set_ylabel("Cumulative KC Score")
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(title='KC ID')
    plt.tight_layout()
    
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    plt.close(fig)
    
    return img_buffer

def generate_report_for_user(user_info, data, peer_stats, leaderboard): # <<< NEW FEATURE
    """Generates a single DOCX report for a given user."""
    user_id = user_info['id']
    user_name = user_info['name']
    
    print(f"\n--- Generating report for {user_name} ---")

    user_goals = data['user_goals'][data['user_goals']['user_id'] == user_id]
    user_kc_scores = data['user_kc_scores'][data['user_kc_scores']['user_id'] == user_id]
    user_metric_scores = data['user_metric_scores'][data['user_metric_scores']['user_id'] == user_id]

    doc = Document()
    doc.add_heading('AI Entrepreneurship Game: Learning Report', 0)
    doc.add_heading(f"Participant: {user_name}", level=1)
    
    # --- Section 1: Executive Summary ---
    doc.add_heading("1. Executive Summary & Overall Performance", level=2)
    doc.add_paragraph().add_run("Final Key Metrics (vs. Peers):").bold = True # <<< NEW FEATURE
    
    for _, row in user_metric_scores.iterrows():
        metric_id = row['metric_id']
        metric_name = data['metrics'].set_index('id').loc[metric_id, 'name']
        if metric_id in peer_stats['metrics'].index:
            peer_stat = peer_stats['metrics'].loc[metric_id]
            doc.add_paragraph(
                f"{metric_name}: {row['current_value']:.2f} "
                f"(Peer Min: {peer_stat['min']:.2f}, Avg: {peer_stat['mean']:.2f}, Max: {peer_stat['max']:.2f})", 
                style='List Bullet'
            )

    doc.add_paragraph().add_run("Key Strengths (Top KCs):").bold = True
    for _, row in user_kc_scores.nlargest(3, 'current_score').iterrows():
        kc_name = data['kcs'].set_index('id').loc[row['kc_id'], 'name']
        doc.add_paragraph(f"{kc_name} - Final Score: {row['current_score']}", style='List Bullet')
        
    doc.add_paragraph().add_run("Areas for Improvement (Lowest KCs):").bold = True
    for _, row in user_kc_scores.nsmallest(3, 'current_score').iterrows():
        kc_name = data['kcs'].set_index('id').loc[row['kc_id'], 'name']
        doc.add_paragraph(f"{kc_name} - Final Score: {row['current_score']}", style='List Bullet')
        
    # --- Section 2: Reconstructed Decision Analysis ---
    doc.add_heading("2. Decision & KC Impact Analysis", level=2)

    for _, goal_row in user_goals.iterrows():
        goal_id = goal_row['goal_id']
        goal_name = data['goals'].set_index('id').loc[goal_id, 'name']
        
        doc.add_heading(f"Goal: {goal_name}", level=3)
        doc.add_paragraph(f"Status: {goal_row['status']}, Attempts: {goal_row['attempts_for_current_goal_cycle']}")
        
        final_attempt_impacts = parse_final_attempt(goal_row['dialogue_history'], data)
        final_attempt_kc_delta = pd.Series(dtype=float)
        for _, impacts in final_attempt_impacts.items():
            for impact in impacts:
                final_attempt_kc_delta[impact['kc_id']] = final_attempt_kc_delta.get(impact['kc_id'], 0) + impact['score_change']
        
        kc_scores_for_goal = user_kc_scores[user_kc_scores['kc_id'].isin(final_attempt_kc_delta.index)]
        kc_delta_for_missing_attempts = pd.Series(dtype=float)
        for _, kc_row in kc_scores_for_goal.iterrows():
            kc_id = kc_row['kc_id']
            final_score = kc_row['current_score']
            change_from_last_attempt = final_attempt_kc_delta.get(kc_id, 0)
            kc_delta_for_missing_attempts[kc_id] = final_score - change_from_last_attempt

        num_attempts = goal_row['attempts_for_current_goal_cycle']
        if num_attempts == 0: num_attempts = 1 # Handle cases where attempts might be 0
        num_decisions_per_attempt = 3
        num_missing_decisions = (num_attempts - 1) * num_decisions_per_attempt
        
        table = doc.add_table(rows=1, cols=4)
        table.style = 'Table Grid'
        hdr_cells = table.rows[0].cells
        hdr_cells[0].text = 'Decision #'
        hdr_cells[1].text = 'KC Impacted'
        hdr_cells[2].text = 'KC Score Change'
        hdr_cells[3].text = 'Metric Change'

        all_decision_data = [] # For plotting

        # Reconstruct
        if num_missing_decisions > 0:
            for decision_num in range(1, num_missing_decisions + 1):
                for kc_id, total_change in kc_delta_for_missing_attempts.items():
                    kc_name = data['kcs'].set_index('id').loc[kc_id, 'name']
                    score_change_per_step = total_change / num_missing_decisions if num_missing_decisions > 0 else 0
                    all_decision_data.append({'decision_num': decision_num, 'kc_id': kc_id, 'score_change': score_change_per_step})
        
        # Add known data
        start_decision = num_missing_decisions + 1
        for i, impacts in sorted(final_attempt_impacts.items()):
            decision_num = start_decision + i - 1
            for impact in impacts:
                all_decision_data.append({'decision_num': decision_num, 'kc_id': impact['kc_id'], 'score_change': impact['score_change']})
        
        # Populate table from combined data
        for item in sorted(all_decision_data, key=lambda x: x['decision_num']):
            kc_name = data['kcs'].set_index('id').loc[item['kc_id'], 'name']
            metric_changes = get_metric_change(item['kc_id'], item['score_change'], data)
            
            row_cells = table.add_row().cells
            row_cells[0].text = str(item['decision_num'])
            row_cells[1].text = kc_name
            row_cells[2].text = f"{item['score_change']:+.2f}"
            row_cells[3].text = "\n".join([f"{mc['change']:+.2f} {mc['metric_name']}" for mc in metric_changes])
            
        # --- Section 2b: Cumulative KC Plot & Legend ---
        # <<< NEW FEATURE
        kc_plot_img = create_cumulative_kc_plot(all_decision_data)
        if kc_plot_img:
            doc.add_paragraph() # spacing
            doc.add_picture(kc_plot_img, width=Inches(6.0))
            
            # Add KC Legend Table
            doc.add_paragraph("KC ID Legend:")
            involved_kcs = pd.DataFrame(all_decision_data)['kc_id'].unique()
            legend_table = doc.add_table(rows=1, cols=2)
            legend_table.style = 'Table Grid'
            legend_table.rows[0].cells[0].text = 'KC ID'
            legend_table.rows[0].cells[1].text = 'KC Name'
            for kc_id in involved_kcs:
                cells = legend_table.add_row().cells
                cells[0].text = str(kc_id)
                cells[1].text = data['kcs'].set_index('id').loc[kc_id, 'name']
        doc.add_paragraph() # spacing

    # --- Section 3: Learning Leaderboard ---
    # <<< NEW FEATURE
    doc.add_heading("3. Learning Performance Leaderboard", level=2)
    doc.add_paragraph("This ranks participants by their total cumulative score across all Knowledge Components.")
    leaderboard_table = doc.add_table(rows=1, cols=3)
    leaderboard_table.style = 'Table Grid'
    hdr_cells = leaderboard_table.rows[0].cells
    hdr_cells[0].text = 'Rank'
    hdr_cells[1].text = 'Participant'
    hdr_cells[2].text = 'Total KC Score'
    
    for i, (_, row) in enumerate(leaderboard.iterrows()):
        cells = leaderboard_table.add_row().cells
        cells[0].text = str(i + 1)
        cells[1].text = row['name']
        cells[2].text = f"{row['current_score']}"
        if row['name'] == user_name: # Highlight the current user
            for cell in cells:
                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        run.bold = True
    
    # Save the document
    file_name = f"Learning_Report_{user_name}.docx"
    doc.save(file_name)
    print(f"Successfully generated report: {file_name}")

if __name__ == "__main__":
    all_data = load_data()
    if all_data:
        all_data['user_kc_scores']['current_score'] = pd.to_numeric(all_data['user_kc_scores']['current_score'])
        all_data['user_metric_scores']['current_value'] = pd.to_numeric(all_data['user_metric_scores']['current_value'])
        
        peer_stats_metrics, leaderboard_data = calculate_peer_stats(all_data['user_metric_scores'], all_data['user_kc_scores'], all_data)
        
        user_map = all_data['users'][all_data['users']['name'].isin(PARTICIPANT_NAMES)]
        
        for _, user_row in user_map.iterrows():
            generate_report_for_user(user_row, all_data, {'metrics': peer_stats_metrics}, leaderboard_data)

Loading data from CSV files...
Data loaded successfully.

--- Generating report for P9 ---
Successfully generated report: Learning_Report_P9.docx

--- Generating report for P8 ---
Successfully generated report: Learning_Report_P8.docx

--- Generating report for P7 ---
Successfully generated report: Learning_Report_P7.docx

--- Generating report for P6 ---
Successfully generated report: Learning_Report_P6.docx

--- Generating report for P5 ---
Successfully generated report: Learning_Report_P5.docx

--- Generating report for P4 ---
Successfully generated report: Learning_Report_P4.docx

--- Generating report for P3 ---
Successfully generated report: Learning_Report_P3.docx

--- Generating report for P2 ---
Successfully generated report: Learning_Report_P2.docx

--- Generating report for P10 ---
Successfully generated report: Learning_Report_P10.docx

--- Generating report for P1 ---
Successfully generated report: Learning_Report_P1.docx


In [13]:
import pandas as pd
from docx import Document
from docx.shared import Pt, Inches
import json
import matplotlib.pyplot as plt
from io import BytesIO

# --- Configuration & Business Logic ---
METRIC_WEIGHTS = {
    "Revenue": 450,
    "Customer Satisfaction": 2,
    "Reputation": 0.15,
    "Ethical Decision Making": 2,
    "Risk-Taking": 2,
    "default": 2
}
PARTICIPANT_NAMES = [f'P{i}' for i in range(1, 11)]

# <<< NEW FEATURE: Map key metrics to their primary learning component for analysis
METRIC_TO_KC_MAP = {
    'Revenue': 'KC6',  # KC6 is 'Calculate revenue, costs, and profit'
    'Reputation': 'KC5', # KC5 is 'Recognize how trust and reputation grow business'
    'Ethical Decision Making': 'KC18' # KC18 is 'Navigate ethical dilemmas'
}

def load_data():
    """Loads all necessary CSV files into pandas DataFrames."""
    print("Loading data from CSV files...")
    try:
        data = {
            'users': pd.read_csv('users_rows.csv'),
            'user_goals': pd.read_csv('user_goals_rows.csv'),
            'user_kc_scores': pd.read_csv('user_kc_scores_rows.csv'),
            'user_metric_scores': pd.read_csv('user_metric_scores_rows.csv'),
            'kcs': pd.read_csv('kcs_rows.csv'),
            'metrics': pd.read_csv('metrics_rows.csv'),
            'goals': pd.read_csv('goals_rows.csv'),
            'kc_metric_effects': pd.read_csv('kc_metric_effects_rows.csv')
        }
        print("Data loaded successfully.")
        return data
    except FileNotFoundError as e:
        print(f"Error: Could not find a required CSV file. {e}")
        return None

def calculate_peer_stats(df_metric_scores, df_kc_scores, data_tables):
    """Calculates min, average, and max scores across all participants."""
    peer_metrics = df_metric_scores.groupby('metric_id')['current_value'].agg(['min', 'mean', 'max'])
    user_total_kc = df_kc_scores.groupby('user_id')['current_score'].sum().reset_index()
    user_total_kc = user_total_kc.merge(data_tables['users'][['id', 'name']], left_on='user_id', right_on='id')
    leaderboard = user_total_kc[['name', 'current_score']].sort_values(by='current_score', ascending=False)
    return peer_metrics, leaderboard

def get_metric_change(kc_id, score_change, data_tables):
    """Calculates the metric changes for a given KC score change based on business logic."""
    metric_changes = []
    affected_links = data_tables['kc_metric_effects'][data_tables['kc_metric_effects']['kc_id'] == kc_id]
    for _, link in affected_links.iterrows():
        metric_id = link['metric_id']
        metric_info = data_tables['metrics'].set_index('id').loc[metric_id]
        metric_name = metric_info['name']
        weight = METRIC_WEIGHTS.get(metric_name, METRIC_WEIGHTS['default'])
        change = score_change * weight
        metric_changes.append({'metric_name': metric_name, 'change': change})
    return metric_changes

def parse_final_attempt(dialogue_history_json, data_tables):
    """Parses the dialogue history for the final attempt to get KC and metric changes."""
    kc_changes_by_decision = {}
    if not isinstance(dialogue_history_json, str): return kc_changes_by_decision
    try:
        history = json.loads(dialogue_history_json)
        decision_counter = 0
        for i, turn in enumerate(history):
            if turn.get('role') == 'user' and 'User chose decision index' in turn.get('content', ''):
                decision_counter += 1
                assistant_turn = history[i-1]
                assistant_content = json.loads(assistant_turn['content'])
                decision_point = assistant_content.get('decisionPoint', {})
                content_part = turn['content'].split(': ', 1)[1]
                decision_index_str = content_part.split(':')[0].strip()
                decision_index = int(decision_index_str)
                chosen_option = decision_point.get('options', [])[decision_index]
                impacts = []
                for impact in chosen_option.get('kc_impacts', []):
                    kc_identifier = impact['kc_identifier']
                    score_change = impact.get('score', 0)
                    kc_row = data_tables['kcs'][data_tables['kcs']['kc_identifier'] == kc_identifier]
                    if not kc_row.empty:
                        kc_id = kc_row.iloc[0]['id']
                        kc_name = kc_row.iloc[0]['name']
                        metric_deltas = get_metric_change(kc_id, score_change, data_tables)
                        impacts.append({'kc_id': kc_id, 'kc_name': kc_name, 'score_change': score_change, 'metric_impacts': metric_deltas})
                kc_changes_by_decision[decision_counter] = impacts
    except Exception as e:
        print(f"Warning: Could not parse dialogue history for a goal. Error: {e}")
    return kc_changes_by_decision

def create_learning_quadrant_plot(decision_data, data_tables): # <<< NEW VISUALIZATION
    """Creates a Learning Quadrant scatter plot."""
    if not decision_data: return None
    df = pd.DataFrame(decision_data)
    
    # Calculate initial and final scores for each KC in this goal
    kc_summary = df.groupby('kc_id').agg(
        score_change=('score_change', 'sum'),
        initial_score=('initial_score', 'first')
    ).reset_index()
    kc_summary['final_score'] = kc_summary['initial_score'] + kc_summary['score_change']
    kc_summary['kc_identifier'] = kc_summary['kc_id'].apply(lambda x: data_tables['kcs'].set_index('id').loc[x, 'kc_identifier'])

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(kc_summary['score_change'], kc_summary['final_score'], s=100, alpha=0.7)

    # Add labels for each point
    for i, row in kc_summary.iterrows():
        ax.text(row['score_change'] + 0.1, row['final_score'], row['kc_identifier'], fontsize=9)

    # Add quadrant lines and labels
    ax.axhline(0, color='grey', lw=0.5)
    ax.axvline(0, color='grey', lw=0.5)
    ax.set_title('Learning Quadrant for this Goal')
    ax.set_xlabel('Growth (KC Score Change during Goal)')
    ax.set_ylabel('Competence (Final KC Score after Goal)')
    ax.grid(True, linestyle='--', alpha=0.6)
    
    # Quadrant labels
    xlim = ax.get_xlim(); ylim = ax.get_ylim()
    ax.text(xlim[1], ylim[1], ' Masters', ha='right', va='top', alpha=0.5, weight='bold')
    ax.text(xlim[0], ylim[1], 'Experts ', ha='left', va='top', alpha=0.5, weight='bold')
    ax.text(xlim[0], ylim[0], ' Struggling', ha='left', va='bottom', alpha=0.5, weight='bold')
    ax.text(xlim[1], ylim[0], 'Emerging ', ha='right', va='bottom', alpha=0.5, weight='bold')

    plt.tight_layout()
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png'); img_buffer.seek(0); plt.close(fig)
    return img_buffer

def create_dual_axis_plot(decision_data, metric_name, data_tables): # <<< NEW VISUALIZATION
    """Creates a dual-axis plot comparing a metric vs. its relevant KC."""
    if not decision_data: return None
    
    # Find the KC identifier for the given metric name
    kc_identifier_to_track = METRIC_TO_KC_MAP.get(metric_name)
    if not kc_identifier_to_track: return None
    
    # Find the KC ID for that identifier
    kc_id_to_track = data_tables['kcs'][data_tables['kcs']['kc_identifier'] == kc_identifier_to_track]['id'].iloc[0]
    
    df = pd.DataFrame(decision_data)

    # Calculate cumulative metric and KC scores
    metric_cumulative = []
    kc_cumulative = []
    current_metric = 0
    current_kc = 0
    for i in range(1, 10):
        step_data = df[df['decision_num'] == i]
        step_metric_change = sum(m['change'] for _, row in step_data.iterrows() for m in get_metric_change(row['kc_id'], row['score_change'], data_tables) if m['metric_name'] == metric_name)
        step_kc_change = step_data[step_data['kc_id'] == kc_id_to_track]['score_change'].sum()
        current_metric += step_metric_change
        current_kc += step_kc_change
        metric_cumulative.append(current_metric)
        kc_cumulative.append(current_kc)
        
    fig, ax1 = plt.subplots(figsize=(8, 4))
    
    # Plot Metric
    ax1.plot(range(1, 10), metric_cumulative, 'b-', marker='o', label=f'{metric_name} Change')
    ax1.set_xlabel('Decision Number')
    ax1.set_ylabel(f'Cumulative {metric_name} Change', color='b')
    ax1.tick_params('y', colors='b')
    
    # Create second y-axis
    ax2 = ax1.twinx()
    ax2.plot(range(1, 10), kc_cumulative, 'r-', marker='s', label=f'{kc_identifier_to_track} Score Change')
    ax2.set_ylabel(f'Cumulative {kc_identifier_to_track} Score Change', color='r')
    ax2.tick_params('y', colors='r')
    
    ax1.set_title(f'Performance ({metric_name}) vs. Learning ({kc_identifier_to_track})')
    fig.tight_layout()
    img_buffer = BytesIO(); plt.savefig(img_buffer, format='png'); img_buffer.seek(0); plt.close(fig)
    return img_buffer


def generate_report_for_user(user_info, data, peer_stats, leaderboard):
    """Main report generation function."""
    user_id = user_info['id']; user_name = user_info['name']
    print(f"\n--- Generating report for {user_name} ---")

    user_goals = data['user_goals'][data['user_goals']['user_id'] == user_id]
    user_kc_scores = data['user_kc_scores'][data['user_kc_scores']['user_id'] == user_id]
    user_metric_scores = data['user_metric_scores'][data['user_metric_scores']['user_id'] == user_id]

    doc = Document()
    doc.add_heading('AI Entrepreneurship Game: Learning Report', 0)
    doc.add_heading(f"Participant: {user_name}", level=1)
    
    # Section 1: Executive Summary
    doc.add_heading("1. Executive Summary & Overall Performance", level=2)
    doc.add_paragraph().add_run("Final Key Metrics (vs. Peers):").bold = True
    for _, row in user_metric_scores.iterrows():
        metric_id = row['metric_id']
        metric_name = data['metrics'].set_index('id').loc[metric_id, 'name']
        if metric_id in peer_stats['metrics'].index:
            peer_stat = peer_stats['metrics'].loc[metric_id]
            doc.add_paragraph(f"{metric_name}: {row['current_value']:.2f} (Peer Min: {peer_stat['min']:.2f}, Avg: {peer_stat['mean']:.2f}, Max: {peer_stat['max']:.2f})", style='List Bullet')
    
    # ... (rest of summary is the same)
    doc.add_paragraph().add_run("Key Strengths (Top KCs):").bold = True
    for _, row in user_kc_scores.nlargest(3, 'current_score').iterrows():
        kc_name = data['kcs'].set_index('id').loc[row['kc_id'], 'name']
        doc.add_paragraph(f"{kc_name} - Final Score: {row['current_score']}", style='List Bullet')
        
    doc.add_paragraph().add_run("Areas for Improvement (Lowest KCs):").bold = True
    for _, row in user_kc_scores.nsmallest(3, 'current_score').iterrows():
        kc_name = data['kcs'].set_index('id').loc[row['kc_id'], 'name']
        doc.add_paragraph(f"{kc_name} - Final Score: {row['current_score']}", style='List Bullet')


    # Section 2: Goal Analysis
    doc.add_heading("2. Goal-by-Goal Analysis", level=2)
    for _, goal_row in user_goals.iterrows():
        goal_id = goal_row['goal_id']
        goal_name = data['goals'].set_index('id').loc[goal_id, 'name']
        doc.add_heading(f"Analysis for Goal: {goal_name}", level=3)

        # Reconstruct the full 9-step decision data
        final_attempt_impacts = parse_final_attempt(goal_row['dialogue_history'], data)
        # ... (reconstruction logic is the same)
        final_attempt_kc_delta = pd.Series(dtype=float)
        for _, impacts in final_attempt_impacts.items():
            for impact in impacts:
                final_attempt_kc_delta[impact['kc_id']] = final_attempt_kc_delta.get(impact['kc_id'], 0) + impact['score_change']
        
        # Get the user's KC scores *before* this goal started
        initial_kc_scores_for_goal = {}
        kc_scores_for_goal = user_kc_scores[user_kc_scores['kc_id'].isin(final_attempt_kc_delta.index)]
        kc_delta_for_missing_attempts = pd.Series(dtype=float)
        for _, kc_row in kc_scores_for_goal.iterrows():
            kc_id = kc_row['kc_id']
            final_score = kc_row['current_score']
            change_from_last_attempt = final_attempt_kc_delta.get(kc_id, 0)
            kc_delta_for_missing_attempts[kc_id] = final_score - change_from_last_attempt
            initial_kc_scores_for_goal[kc_id] = final_score - final_attempt_kc_delta.get(kc_id, 0) - kc_delta_for_missing_attempts.get(kc_id, 0)
        
        num_attempts = goal_row['attempts_for_current_goal_cycle']
        if num_attempts == 0: num_attempts = 1
        num_decisions_per_attempt = 3
        num_missing_decisions = (num_attempts - 1) * num_decisions_per_attempt

        all_decision_data = []
        if num_missing_decisions > 0:
            for decision_num in range(1, num_missing_decisions + 1):
                for kc_id, total_change in kc_delta_for_missing_attempts.items():
                    score_change_per_step = total_change / num_missing_decisions if num_missing_decisions > 0 else 0
                    all_decision_data.append({'decision_num': decision_num, 'kc_id': kc_id, 'score_change': score_change_per_step, 'initial_score': initial_kc_scores_for_goal.get(kc_id, 0)})
        
        start_decision = num_missing_decisions + 1
        for i, impacts in sorted(final_attempt_impacts.items()):
            decision_num = start_decision + i - 1
            for impact in impacts:
                all_decision_data.append({'decision_num': decision_num, 'kc_id': impact['kc_id'], 'score_change': impact['score_change'], 'initial_score': initial_kc_scores_for_goal.get(impact['kc_id'], 0)})
        
        # Add Learning Quadrant
        doc.add_heading("Learning Quadrant", level=4)
        quadrant_plot = create_learning_quadrant_plot(all_decision_data, data)
        if quadrant_plot:
            doc.add_picture(quadrant_plot, width=Inches(6.0))
        
        # Add Performance vs. Learning Charts
        doc.add_heading("Performance vs. Learning Analysis", level=4)
        for metric_name in METRIC_TO_KC_MAP.keys():
            dual_axis_plot = create_dual_axis_plot(all_decision_data, metric_name, data)
            if dual_axis_plot:
                doc.add_picture(dual_axis_plot, width=Inches(6.0))
        doc.add_page_break()

    # Section 3: Leaderboard
    doc.add_heading("3. Learning Performance Leaderboard", level=2)
    doc.add_paragraph("This ranks participants by their total cumulative score across all Knowledge Components.")
    leaderboard_table = doc.add_table(rows=1, cols=3)
    leaderboard_table.style = 'Table Grid'
    hdr_cells = leaderboard_table.rows[0].cells
    hdr_cells[0].text = 'Rank'; hdr_cells[1].text = 'Participant'; hdr_cells[2].text = 'Total KC Score'
    
    for i, (_, row) in enumerate(leaderboard.iterrows()):
        cells = leaderboard_table.add_row().cells
        cells[0].text = str(i + 1); cells[1].text = row['name']; cells[2].text = f"{row['current_score']}"
        if row['name'] == user_name:
            for cell in cells:
                for p in cell.paragraphs:
                    for run in p.runs: run.bold = True
    
    file_name = f"Learning_Report_{user_name}_alternative.docx"
    doc.save(file_name)
    print(f"Successfully generated report: {file_name}")

if __name__ == "__main__":
    all_data = load_data()
    if all_data:
        for key in ['user_kc_scores', 'user_metric_scores']:
            col_name = 'current_score' if 'kc' in key else 'current_value'
            all_data[key][col_name] = pd.to_numeric(all_data[key][col_name], errors='coerce')
            all_data[key].dropna(subset=[col_name], inplace=True)
        
        peer_stats_metrics, leaderboard_data = calculate_peer_stats(all_data['user_metric_scores'], all_data['user_kc_scores'], all_data)
        
        user_map = all_data['users'][all_data['users']['name'].isin(PARTICIPANT_NAMES)]
        
        for _, user_row in user_map.iterrows():
            generate_report_for_user(user_row, all_data, {'metrics': peer_stats_metrics}, leaderboard_data)

Loading data from CSV files...
Data loaded successfully.

--- Generating report for P9 ---
Successfully generated report: Learning_Report_P9_alternative.docx

--- Generating report for P8 ---
Successfully generated report: Learning_Report_P8_alternative.docx

--- Generating report for P7 ---
Successfully generated report: Learning_Report_P7_alternative.docx

--- Generating report for P6 ---
Successfully generated report: Learning_Report_P6_alternative.docx

--- Generating report for P5 ---
Successfully generated report: Learning_Report_P5_alternative.docx

--- Generating report for P4 ---
Successfully generated report: Learning_Report_P4_alternative.docx

--- Generating report for P3 ---
Successfully generated report: Learning_Report_P3_alternative.docx

--- Generating report for P2 ---
Successfully generated report: Learning_Report_P2_alternative.docx

--- Generating report for P10 ---
Successfully generated report: Learning_Report_P10_alternative.docx

--- Generating report for P1 --