In [16]:
import pandas as pd
import numpy as np
import math
from collections import defaultdict

In [23]:
file_path = 'sample_excel.xlsx'
df=pd.read_excel(file_path)

In [18]:
df.tail()

Unnamed: 0,roll_no.,year,th
187,56,2022,35
188,57,2022,57
189,58,2022,64
190,59,2022,56
191,60,2022,27


In [None]:

# Assuming 'sample_excel.xlsx' has columns: 'year', 'roll_no.', 'th'
df_Endsem_res = pd.read_excel(file_path, sheet_name="Endsem_res_3y")
# Hardcoded maximum marks for each year
#take input from user
max_marks_dict = {
    2022: 80,
    2023: 80,
    2024: 80
}

# --- Calculations for Yearly Data ---

# Create the main summary DataFrame
summary = df_Endsem_res.groupby("year").agg(
    total_students=("roll_no.", "nunique"),
    th_total=("th", "sum")
)

summary["max_marks"] = summary.index.map(max_marks_dict)

# Calculate class average marks for each year
summary["class_avg_marks"] = summary["th_total"] / summary["total_students"]
summary["%_class_avg_marks"] =((summary["class_avg_marks"] / summary["max_marks"]) * 100)

# Calculate Students Who Achieved the Average
df_merged = pd.merge(df_Endsem_res, summary[['class_avg_marks']], on='year', how='left')
achieved_avg_count = df_merged[df_merged['th'] >= df_merged['class_avg_marks']].groupby('year').size()
summary['students_achieved_avg'] = achieved_avg_count
summary['%_students_achieved_avg'] = np.round(
    (summary['students_achieved_avg'] / summary['total_students']) * 100,
    2
)

# Continue with the other calculations

summary["attainment_%"] = np.round(
    (summary["th_total"] * 100) / (summary["total_students"] * summary["max_marks"]),
    2
)
# This column is for the scaled value, initialized with empty values
summary['attainment_scaled_3'] = np.nan


# --- NEW: Calculate and Add Overall Averages for ALL Columns ---

# 1. Calculate the mean of every column from the yearly data
overall_averages = summary.mean()

# 2. The scaled attainment is not a simple mean, so calculate it separately
#    based on the average of the 'attainment_%' column
avg_attainment_scaled = (overall_averages['attainment_%'] * 3) / 100

# 3. Add the 'Overall Average' row using the calculated means
summary.loc['Overall Average'] = overall_averages

# 4. Place the specially calculated scaled value into the correct cell
summary.loc['Overall Average', 'attainment_scaled_3'] = avg_attainment_scaled


# --- Display and Export the Final DataFrame ---

print("--- Final Combined Summary DataFrame ---")
# Reordering columns for better readability
summary = summary[[
    'total_students', 
    'th_total', 
    'class_avg_marks', 
    '%_class_avg_marks', 
    'students_achieved_avg', 
    '%_students_achieved_avg', 
    'max_marks', 
    'attainment_%', 
    'attainment_scaled_3'
]]

# Display the final table
summary



--- Final Combined Summary DataFrame ---


Unnamed: 0_level_0,total_students,th_total,class_avg_marks,%_class_avg_marks,students_achieved_avg,%_students_achieved_avg,max_marks,attainment_%,attainment_scaled_3
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022,60.0,2596.0,43.266667,54.083333,31.0,51.67,80.0,54.08,
2023,62.0,2798.0,45.129032,56.41129,29.0,46.77,80.0,56.41,
2024,70.0,3451.0,49.3,61.625,32.0,45.71,80.0,61.62,
Overall Average,64.0,2948.333333,45.898566,57.373208,30.666667,48.05,80.0,57.37,1.7211


In [20]:
#sheet 2 Goal set
stud_per_achieved_avg=math.ceil((summary['%_students_achieved_avg']['Overall Average']))
class_per_avg_marks=math.ceil((summary['%_class_avg_marks']['Overall Average']))
data = {
    "level":[1,2,3],
    "% of the student": [stud_per_achieved_avg-5,stud_per_achieved_avg,stud_per_achieved_avg+5],
    "% of marks": [class_per_avg_marks,class_per_avg_marks,class_per_avg_marks]
}

goalset = pd.DataFrame(data)
print(goalset)



   level  % of the student  % of marks
0      1                44          58
1      2                49          58
2      3                54          58


In [29]:
import pandas as pd


def calculate_attainment_summary(file_path):

    # ============================
    # 1. LOAD MARKS SHEET
    # ============================
    df = pd.read_excel(file_path, sheet_name="IAT+tools")

    # Extract Max Marks row
    max_marks_row = df[df['Roll_no.'] == 'Max_Marks']
    if max_marks_row.empty:
        raise ValueError("'Max_Marks' row is missing in 'IAT+tools' sheet.")

    max_marks = max_marks_row.iloc[0].to_dict()
    max_marks.pop("Roll_no.")

    # Remove Max Marks Row
    df = df[df['Roll_no.'] != 'Max_Marks']
    total_students = len(df)

    # Convert marks safely
    for col in max_marks:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # ============================
    # 2. LOAD TOOL ASSIGNMENT
    # ============================
    tools_df = pd.read_excel(file_path, sheet_name="Tool Assignment")

    THRESHOLD = class_per_avg_marks
    output = []

    print("\n--- VALIDATION & DEBUG LOG ---")

    # ============================
    # 3. PROCESS EACH CO
    # ============================
    for index, row in tools_df.iterrows():

        co_id = row.get("CO-ID")
        co_statement = row.get("CO Statement")

        print(f"\nProcessing CO: {co_id}")

        # Read tools
        t1 = row.get("Tool1")
        t2 = row.get("Tool2")
        t3 = row.get("Tool3")

        # ============================
        # VALIDATION 1 ‚Äî TOOL ORDER
        # ============================
        if pd.isna(t1) and (not pd.isna(t2) or not pd.isna(t3)):
            print("‚ùå ERROR: Tool1 is empty but Tool2/Tool3 is filled.")
            print("üëâ Fix Excel: Always fill Tool1 first.")
            continue

        if pd.isna(t2) and not pd.isna(t3):
            print("‚ùå ERROR: Tool2 is empty but Tool3 is filled.")
            print("üëâ Fix Excel: Do not skip Tool2.")
            continue

        # ============================
        # VALIDATION 2 ‚Äî MINIMUM TOOLS
        # ============================
        tools = [t for t in [t1, t2, t3] if not pd.isna(t)]

        if len(tools) < 2:
            print("‚ùå ERROR: Less than 2 tools provided.")
            print("üëâ At least TWO tools are required per CO.")
            continue

        # Extract CO number from ID
        try:
            co_number = int(str(co_id).split(".")[-1])
        except:
            print("‚ùå ERROR: CO-ID format invalid.")
            print("üëâ Expected: format like '2343113.1' or 'CO1'")
            continue

        tool_results = []

        # ============================
        # 4. CALCULATION PER SLOT
        # ============================
        for slot_index, tool_name in enumerate(tools, start=1):

            column_name = f"CO{co_number}_tool_{slot_index}"

            print(f" Mapping: {tool_name} ‚Üí {column_name}")

            # ============================
            # VALIDATION 3 ‚Äî COLUMN EXISTS
            # ============================
            if column_name not in df.columns:
                print(f"‚ùå ERROR: Column '{column_name}' NOT FOUND in marks sheet.")
                print("üëâ Check column names in 'IAT+tools' sheet.")
                continue

            max_mark = max_marks.get(column_name)

            # ============================
            # VALIDATION 4 ‚Äî MAX MARKS VALID
            # ============================
            if pd.isna(max_mark) or max_mark == 0:
                print(f"‚ùå ERROR: Invalid max mark for {column_name}.")
                print("üëâ Fix 'Max_Marks' row.")
                continue

            # ============================
            # CALCULATION
            # ============================
            percent_scores = (df[column_name] / max_mark) * 100
            achieved = (percent_scores >= THRESHOLD).sum()
            percent = (achieved / total_students) * 100

            tool_results.append({
                "tool": tool_name,
                "students": achieved,
                "percent": round(percent, 2)
            })

        # ============================
        # VALIDATION 5 ‚Äî BEST TWO SAFETY
        # ============================
        if len(tool_results) < 2:
            print("‚ùå ERROR: Less than 2 valid tools after validation.")
            print("üëâ Check marks sheet & mapping.")
            continue

        # ============================
        # 5. SELECT BEST TWO
        # ============================
        best_two = sorted(tool_results, key=lambda x: x['percent'], reverse=True)[:2]
        best_avg = round(
            (best_two[0]['percent'] + best_two[1]['percent']) / 2,
            2
        )

        # ============================
        # 6. OUTPUT FORMAT
        # ============================
        row_out = {"CO Statements": co_statement}

        for i, tr in enumerate(tool_results):
            row_out[f"Tool {i+1} Total"] = tr["students"]
            row_out[f"Tool {i+1} %"] = tr["percent"]

        row_out["% considering best 2 tools avg"] = best_avg
        output.append(row_out)

    # ============================
    # FINAL TABLE
    # ============================
    print("\n--- PROCESS COMPLETED ---")
    return pd.DataFrame(output)


# ============================
# USER INPUT
# ============================
file_path = "sample_excel.xlsx"
class_per_avg_marks = 58


if __name__ == "__main__":
    result = calculate_attainment_summary(file_path)
    print(result)



--- VALIDATION & DEBUG LOG ---

Processing CO: 2343113.1
 Mapping: IAT1 ‚Üí CO1_tool_1
 Mapping: MCQ ‚Üí CO1_tool_2
 Mapping: Assignment ‚Üí CO1_tool_3

Processing CO: 2343113.2
 Mapping: IAT1 ‚Üí CO2_tool_1
 Mapping: MCQ ‚Üí CO2_tool_2
 Mapping: Mind Mapping ‚Üí CO2_tool_3

Processing CO: 2343113.3
 Mapping: IAT1 ‚Üí CO3_tool_1
 Mapping: MCQ ‚Üí CO3_tool_2
 Mapping: Assignment ‚Üí CO3_tool_3

Processing CO: 2343113.4
 Mapping: IAT2 ‚Üí CO4_tool_1
 Mapping: MCQ ‚Üí CO4_tool_2
 Mapping: Assignment ‚Üí CO4_tool_3

Processing CO: 2343113.5
 Mapping: IAT2 ‚Üí CO5_tool_1
 Mapping: MCQ ‚Üí CO5_tool_2
 Mapping: Assignment ‚Üí CO5_tool_3

Processing CO: 2343113.6
 Mapping: IAT2 ‚Üí CO6_tool_1
 Mapping: MCQ ‚Üí CO6_tool_2
 Mapping: Assignment ‚Üí CO6_tool_3

--- PROCESS COMPLETED ---
                                       CO Statements  Tool 1 Total  Tool 1 %  \
0  Students will be able to discuss the need of a...            36     50.00   
1  Students will be able to analyze and design co... 