In [1]:
import matplotlib.pyplot as plt
import csv

# Load data
with open("FCS1_Team2_Joshua.csv", newline='') as f:
    reader = csv.DictReader(f)
    data = list(reader)

# Convert types
for row in data:
    row['tutorial'] = row['tutorial'].strip()
    row['team'] = int(row['team'])
    row['cgpa'] = float(row['cgpa']) if row['cgpa'] else None
    row['gender'] = row['gender'].strip()
    row['school'] = row['school'].strip()

# Group into tutorial_teams
tutorial_teams = {}
for row in data:
    tut = row['tutorial']
    team = row['team']
    tutorial_teams.setdefault(tut, {})
    tutorial_teams[tut].setdefault(team, [])
    tutorial_teams[tut][team].append(row)

# Helper: count gender for each team
def count_gender(team):
    m = sum(1 for s in team if s['gender'].strip().lower().startswith('m'))
    f = sum(1 for s in team if s['gender'].strip().lower().startswith('f'))
    return m, f


# === MAIN SUMMARY FUNCTION WITH VISUALS ===
def print_summary(tutorial_teams):
    print("\n" + "="*140)
    print(f"{'TEAM DIVERSITY ANALYSIS':^140}")
    print("="*140)
    print(f"{'Tutorial':<10} {'Team':<6} {'Size':<5} {'Gender':<15} {'G-Score':<8} {'Schools':<30} {'S-Score':<8} {'CGPA':<15} {'C-Score':<8} {'OVERALL':<8}")
    print("-"*140)

    tut_keys = list(tutorial_teams.keys())
    tut_keys.sort()  # sort tutorials alphabetically/numerically if names contain numbers

    total_teams = total_students = 0
    gender_score_sum = school_score_sum = cgpa_score_sum = overall_score_sum = 0.0

    # storage for plotting
    tutBalance = {}

    for tut in tut_keys:
        g_scores, s_scores, c_scores, o_scores = [], [], [], []

        for i, team in enumerate(tutorial_teams[tut]):
            total_teams += 1
            total_students += len(team)
            team_size = len(team)

            # === GENDER BALANCE SCORE ===
            m, f = count_gender(team)
            if m > 0 and f > 0:
                best_ratio = 1.0 if team_size % 2 == 0 else (team_size - 1) / (team_size + 1)
                actual_ratio = min(m, f) / max(m, f)
                gender_score = (actual_ratio / best_ratio) * 100
            elif m > 0 or f > 0:
                gender_score = 0
            else:
                gender_score = 0
            gender_str = f"{m}M/{f}F"
            gender_score_sum += gender_score

            # === SCHOOL DIVERSITY SCORE ===
            school_counts = {}
            for st in team:
                sch = st['school'].strip().lower()
                school_counts[sch] = school_counts.get(sch, 0) + 1
            num_unique_schools = len(school_counts)
            uniqueness_score = (num_unique_schools / team_size) * 50 if team_size > 0 else 0
            simpson = sum((count/team_size)**2 for count in school_counts.values()) if team_size > 0 else 0
            evenness_score = (1 - simpson) * 50 if team_size > 0 else 0
            school_score = uniqueness_score + evenness_score
            school_score_sum += school_score
            schools_str = ", ".join([f"{s}:{school_counts[s]}" for s in list(school_counts)[:3]])
            if len(school_counts) > 3:
                schools_str += f"... (+{len(school_counts)-3})"

            # === CGPA BALANCE SCORE ===
            cgpa_list = [s['cgpa'] for s in team if s['cgpa'] is not None]
            mean_cgpa = sum(cgpa_list)/len(cgpa_list) if len(cgpa_list) > 0 else 0
            if len(cgpa_list) > 1:
                variance = sum((x - mean_cgpa)**2 for x in cgpa_list) / (len(cgpa_list) - 1)
                std_dev = variance**0.5
            else:
                std_dev = 0
            if std_dev <= 0.2:
                cgpa_score = 100 - (std_dev / 0.2) * 10
            elif std_dev <= 0.4:
                cgpa_score = 90 - ((std_dev - 0.2) / 0.2) * 20
            elif std_dev <= 0.6:
                cgpa_score = 70 - ((std_dev - 0.4) / 0.2) * 30
            else:
                cgpa_score = max(0, 40 - ((std_dev - 0.6) / 0.4) * 40)
            cgpa_score_sum += cgpa_score
            cgpa_str = f"μ={mean_cgpa:.2f},σ={std_dev:.2f}"

            # === OVERALL DIVERSITY SCORE ===
            overall_score = (gender_score * 0.30) + (school_score * 0.40) + (cgpa_score * 0.30)
            overall_score_sum += overall_score

            # save per-team for plotting
            g_scores.append(gender_score)
            s_scores.append(school_score)
            c_scores.append(cgpa_score)
            o_scores.append(overall_score)

            print(f"{tut:<10} T{i+1:<5} {len(team):<5} {gender_str:<15} {gender_score:>6.1f}% {schools_str:<30} {school_score:>6.1f}% {cgpa_str:<15} {cgpa_score:>6.1f}% {overall_score:>6.1f}%")

        # store tutorial-level averages for plotting
        tutBalance[tut] = {
            'genderStdDev': sum(g_scores)/len(g_scores) if g_scores else 0,
            'schoolStdDev': sum(s_scores)/len(s_scores) if s_scores else 0,
            'cgpaStdDev': sum(c_scores)/len(c_scores) if c_scores else 0,
            'overall': sum(o_scores)/len(o_scores) if o_scores else 0
        }

    # === PLOTS ===
    tutorials = list(tutBalance.keys())
    g_vals = [tutBalance[t]['genderStdDev'] for t in tutorials]
    s_vals = [tutBalance[t]['schoolStdDev'] for t in tutorials]
    c_vals = [tutBalance[t]['cgpaStdDev'] for t in tutorials]
    o_vals = [tutBalance[t]['overall'] for t in tutorials]

    def plot_bar(title, values, ylabel):
        plt.figure(figsize=(10,5))
        plt.bar(tutorials, values, color='skyblue')
        plt.axhline(80, color='green', linestyle='--', label='Excellent ≥80')
        plt.axhline(65, color='orange', linestyle='--', label='Good ≥65')
        plt.title(title)
        plt.xlabel("Tutorial")
        plt.ylabel(ylabel)
        plt.xticks(rotation=90)
        plt.legend()
        plt.tight_layout()
        plt.show()

    plot_bar("Average Gender Diversity Score per Tutorial", g_vals, "Gender Score (%)")
    plot_bar("Average School Diversity Score per Tutorial", s_vals, "School Score (%)")
    plot_bar("Average CGPA Balance Score per Tutorial", c_vals, "CGPA Score (%)")
    plot_bar("Overall Diversity Score per Tutorial", o_vals, "Overall (%)")

    # === Summary ===
    avg_gender = gender_score_sum / total_teams if total_teams else 0
    avg_school = school_score_sum / total_teams if total_teams else 0
    avg_cgpa = cgpa_score_sum / total_teams if total_teams else 0
    avg_overall = overall_score_sum / total_teams if total_teams else 0
    avg_size = total_students / total_teams if total_teams else 0

    print("="*140)
    print(f"{'OVERALL STATISTICS':^140}")
    print("="*140)
    print(f"Total Tutorials: {len(tut_keys)}")
    print(f"Total Teams: {total_teams}")
    print(f"Total Students: {total_students}")
    print(f"Average Team Size: {avg_size:.2f}")
    print()
    print(f"Average Gender Balance Score:    {avg_gender:>6.1f}% (Weight: 30%)")
    print(f"Average School Diversity Score:  {avg_school:>6.1f}% (Weight: 40%)")
    print(f"Average CGPA Balance Score:      {avg_cgpa:>6.1f}% (Weight: 30%)")
    print(f"{'─'*60}")
    print(f"Average Overall Diversity Score: {avg_overall:>6.1f}%")
    print()

    if avg_overall >= 80:
        rating = "EXCELLENT - Highly diverse teams"
    elif avg_overall >= 65:
        rating = "GOOD - Well-balanced teams"
    elif avg_overall >= 50:
        rating = "FAIR - Moderate diversity"
    else:
        rating = "NEEDS IMPROVEMENT - Low diversity"

    print(f"Overall Rating: {rating}")
    print("="*140 + "\n")

    print("SCORING GUIDE:")
    print("-" * 140)
    print("Gender Balance:   Score normalized for team size (even teams: 100% = 2/2, 3/3, etc. | odd teams: 100% = 3/2, 4/3, etc.)")
    print("School Diversity: Based on number of unique schools + evenness of distribution (Simpson's Index)")
    print("CGPA Balance:     100-90% = σ≤0.2 (Excellent) | 90-70% = σ≤0.4 (Good) | 70-40% = σ≤0.6 (Fair) | <40% = σ>0.6 (Poor)")
    print("="*140 + "\n")
    print_summary(tutorial_teams)



KeyError: 'tutorial'