In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import jinja2
from openpyxl import load_workbook
from openpyxl.drawing.image import Image as XLImage

def analyze_subject(df, subject_name, subject_grade_col):
    print(f"\nAnalyzing subject: {subject_name}\n")

    subject_table = df[['Name', 'Roll Number', subject_name, subject_grade_col]]
    subject_sorted = subject_table.sort_values(by=subject_name, ascending=False)
    top_10 = subject_sorted.head(10)
    print("Top 10 Scorers:")
    print(top_10.to_string(index=False))

    grade_counts = df[subject_grade_col].value_counts()
    print("\nGrade Distribution:")
    print(grade_counts)

    mean_marks = df[subject_name].mean()
    print(f"\nMean marks in {subject_name}: {mean_marks:.2f}")
    
    imagefilename = f"graph_image_{subject_name}.png"

    grade_counts.plot.pie(
        autopct='%1.1f%%',
        startangle=90,
        shadow=True,
        legend=True
    )
    plt.title(f"Distribution of {subject_name} Grades")
    plt.ylabel('')
    plt.savefig(imagefilename, bbox_inches='tight')
    plt.close()

    filename = f"student_data_{subject_name}.xlsx"
    
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='All Subjects', index=False)
        subject_sorted.to_excel(writer, sheet_name=f'{subject_name} Only', index=False)
        top_10.to_excel(writer, sheet_name=f'{subject_name} Top Scorers', index=False)
        grade_counts.to_excel(writer, sheet_name=f'{subject_name} Grade Count', index=True)

    wb = load_workbook(filename)
    ws = wb[f'{subject_name} Grade Count']
    img = XLImage(imagefilename)
    ws.add_image(img, 'D2')
    wb.save(filename)

In [None]:
students = []

In [None]:
with open("data.txt", "r") as file:
    lines = [line.strip() for line in file if line.strip()]

line.strip() is a Python string method used to remove leading and trailing whitespace characters (spaces, tabs, newlines) from a string

In [None]:
for i in range(0, len(lines) - 1, 2):
    line1 = lines[i]
    line2 = lines[i + 1]

    match = re.match(r"(\d+)\s+([MF])\s+(.+?)\s+(?:\d+\s+){5}", line1)
    if not match:
        continue  

    roll, gender, name = match.groups()

    marks_grades = re.findall(r"(\d{3})\s+([A-D][1-2])", line2)
    
    student = {
    "Roll Number": roll,
    "Name": name.strip(),
    "Gender": gender
    }
    for idx, (marks, grade) in enumerate(marks_grades, start=1):
        student[f"Subject{idx}_Marks"] = int(marks)
        student[f"Subject{idx}_Grade"] = grade
    
    students.append(student)


Continue ⬅ SKIPS to next student if the pattern doesn't match

match.groups() is a method in Python's re module (regular expressions) that returns a tuple containing all the captured groups in a matched string
For instance, if the regular expression is r"(\d+)-([a-z]+)" and the matched string is "123-abc", then match.groups() would return ('123', 'abc'). If the match fails, match.groups() will raise an AttributeError.

In [None]:
df = pd.DataFrame(students)

df = df.rename(columns={
    'Subject1_Marks': 'ENG',
    'Subject1_Grade': 'ENG GRADE',
    'Subject2_Marks': 'LANG II',
    'Subject2_Grade': 'LANG II GRADE',
    'Subject3_Marks': 'MATH',
    'Subject3_Grade': 'MATH GRADE',
    'Subject4_Marks': 'SCI',
    'Subject4_Grade': 'SCI GRADE',
    'Subject5_Marks': 'SOC',
    'Subject5_Grade': 'SOC GRADE'
})


print(df.to_string(index=False))


In [None]:
df.style.set_properties(**{
    'text-align': 'center'
}).set_table_styles([
    {"selector": "th", "props": [("text-align", "center")]}
])


In [None]:
subject = input("Which subject do you want? (Enter short form, all caps): ")
subject_grade = subject + " GRADE"

In [None]:
analyze_subject(df, subject, subject_grade)