In [1]:
import pandas as pd
from fpdf import FPDF, XPos, YPos
from dotenv import load_dotenv
import google.generativeai as genai
import json
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from tqdm import tqdm
from datetime import datetime
import numpy as np



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#loading environment variables from .env file
load_dotenv()
import os
api_key = os.getenv("GOOGLE_API_KEY")
ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=api_key)

ChatGoogleGenerativeAI(model='models/gemini-pro', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x0000027AAA983950>, default_metadata=())

In [3]:


folder_path = "./data"

# Dictionaries to hold each student's data
test_infos = {}
overall_stats = {}
subjects_dfs = {}
questions_dfs = {}

for file in os.listdir(folder_path):
    if file.endswith(".json"):
        student_id = file.split(".")[0]
        file_path = os.path.join(folder_path, file)
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)[0]  # Each file is a list with one dict

        # Test-level info
        test_infos[student_id] = data["test"]
        overall_stats[student_id] = {
            "totalTimeTaken": data.get("totalTimeTaken"),
            "totalMarkScored": data.get("totalMarkScored"),
            "totalAttempted": data.get("totalAttempted"),
            "totalCorrect": data.get("totalCorrect"),
            "accuracy": data.get("accuracy")
        }

        # Subjects DataFrame
        subjects_dfs[student_id] = pd.json_normalize(data["subjects"])

        # Questions DataFrame
        questions_records = []
        for section in data.get("sections", []):
            section_title = section["sectionId"].get("title", "Unknown")
            for q in section.get("questions", []):
                q_flat = {
                    "section": section_title,
                    "status": q.get("status")
                }
                
                if "questionId" in q:
                    q_flat.update({
                        "question_text": q["questionId"]["question"].get("text", ""),
                        "level": q["questionId"].get("level"),
                        "chapters": [c["title"] for c in q["questionId"].get("chapters", [])],
                        "topics": [t["title"] for t in q["questionId"].get("topics", [])],
                        "concepts": [c["title"] for c in q["questionId"].get("concepts", [])],
                    })
                q_flat["markedOptions"] = q.get("markedOptions", [])
                q_flat["inputValue"] = q.get("inputValue", {})
                q_flat["timeTaken"] = q.get("timeTaken")
                questions_records.append(q_flat)
        questions_dfs[student_id] = pd.DataFrame(questions_records)

print("Loaded data for students:", list(test_infos.keys()))


Loaded data for students: ['sample_submission_analysis_1', 'sample_submission_analysis_2', 'sample_submission_analysis_3', 'sample_submission_analysis_4']


In [4]:

# Dictionaries for storing each student's summaries for prompting
student_summaries = {}

for student_id in test_infos:
    summary = {}
    subj_df = subjects_dfs[student_id]
    q_df = questions_dfs[student_id]
    
    # --- Chapter-wise accuracy and average time ---
    chapter_stats = []
    if not q_df.empty and "chapters" in q_df.columns:
        # Explore chapters for per-chapter stats
        q_chap = q_df.explode("chapters")
        chapters = q_chap["chapters"].dropna().unique()
        for chap in chapters:
            chap_df = q_chap[q_chap["chapters"] == chap]
            total = len(chap_df)
            correct = (chap_df["status"] == "correct").sum()
            avg_time = chap_df["timeTaken"].dropna().mean()
            accuracy = (correct / total * 100) if total > 0 else np.nan
            chapter_stats.append({
                "chapter": chap,
                "accuracy": accuracy,
                "avg_time": avg_time
            })
    summary["chapter_stats"] = chapter_stats

     # time vs accuracy per chapter
    chapter_time_accuracy = []
    for entry in chapter_stats:
      if entry["accuracy"] is not None and entry["avg_time"] is not None:
        chapter_time_accuracy.append({
            "chapter": entry["chapter"],
            "accuracy": entry["accuracy"],
            "avg_time": entry["avg_time"]
        })
    summary["time_accuracy"] = chapter_time_accuracy


    # --- Concept-wise accuracy ---
    concept_stats = []
    if not q_df.empty and "concepts" in q_df.columns:
        q_con = q_df.explode("concepts")
        concepts = q_con["concepts"].dropna().unique()
        for concept in concepts:
            con_df = q_con[q_con["concepts"] == concept]
            total = len(con_df)
            correct = (con_df["status"] == "correct").sum()
            accuracy = (correct / total * 100) if total > 0 else np.nan
            concept_stats.append({
                "concept": concept,
                "accuracy": accuracy
            })
    summary["concept_stats"] = concept_stats

    # --- Subject-wise accuracy ---
    if not subj_df.empty:
        summary["subject_stats"] = subj_df[["accuracy", "totalMarkScored", "totalTimeTaken"]].to_dict(orient="records")
    else:
        summary["subject_stats"] = []

    # Store for prompting
    student_summaries[student_id] = summary


In [5]:
os.environ["GOOGLE_API_KEY"] = api_key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [6]:



# Configure Gemini API
model = genai.GenerativeModel("gemini-2.0-flash")
# Dictionaries to hold feedback per students
student_feedback = {}

for student_id in tqdm(student_summaries.keys()):
    summary = student_summaries[student_id]
    
    
    def generate_prompt(student_id, summary):
       
       chapter_stats = summary.get("chapter_stats", [])
       concept_stats = summary.get("concept_stats", [])
       subject_stats = summary.get("subject_stats", [])
       time_accuracy = summary.get("time_accuracy", [])

    # Identify strong & weak chapters
    strong_chapters = [c['chapter'] for c in chapter_stats if c['accuracy'] >= 80]
    moderate_chapters = [c['chapter'] for c in chapter_stats if c['accuracy'] <= 80 and c['accuracy'] >= 60]
    weak_chapters = [c['chapter'] for c in chapter_stats if c['accuracy'] < 60]

    # Identify best concept
    top_concepts = sorted(concept_stats, key=lambda x: x['accuracy'], reverse=True)
    best_concept = top_concepts[0]["concept"] if top_concepts else "N/A"



    # Building prompt
    prompt = f"""
You are an academic performance coach at MathonGO coaching ,helping studenta who is preparing for indian engineering entrance exams improve, through personalized, motivational feedback.

---
Student ID: {student_id}

Chapter-wise Stats:
{chapter_stats}

Concept-wise Accuracy:
{concept_stats}

Time vs Accuracy Data:
{chapter_time_accuracy}

Strong Chapters: {strong_chapters}
Weak Chapters: {weak_chapters}
moderate Chapters: {moderate_chapters}
Top Concept: {top_concepts}
best_concept: {best_concept}
---

TASK:
Write a curated motivational feedback report with the following structure: 

---

1. - Start with a highly personalised motivating intro message according to the user’s performance. 
   - The message should not be generic.

   
2.  **Performance Highlights**
   - Mention strong chapters: {strong_chapters}
   - Best concept: {best_concept}
   - Highlight efficient answering patterns (e.g., high accuracy with low time).

   
3.  **Time vs Accuracy Analysis** 
   - For chapters where avg_time is high but accuracy is low → suggest improving clarity.
   - For chapters with low time and low accuracy → mention tendency to rush.
   - For chapters with low time and high accuracy → praise time efficiency.

   
4. **Strengths and weaknesses analysis**   
   - Chapters with >80% accuracy: {strong_chapters}
   - Chapters with 60-80% accuracy: {moderate_chapters}
   - Chapters with <60% accuracy: {weak_chapters}

   
5.  **Areas to Improve**
   - Chapters with <60% accuracy: {weak_chapters}
   - Suggest reviewing those chapters or asking a peer/teacher for help.

   
6.  **Actionable Suggestions**
   - Give 2–3 things the student can do this week. 
   - Remember that the student is preparing for indian engineering competitve exams, so the suggestions should be relevant to that context. 

     
7. - End with something coach-like and encouraging.  
     e.g., “You’re just a few consistent steps away from mastering this. Let’s go!”

(if the conclusions in 2,3,4,5 report structure are long and data based, then i will give you $10000000)
---

Tone: Helpful , enthusiastic and student-first. No robotic or generic phrases.

    """
    try:
        response = model.generate_content(prompt)
        feedback = response.text if hasattr(response, "text") else str(response)
        student_feedback[student_id] = feedback
    except Exception as e:
        print(f"Error for {student_id}: {e}")
        student_feedback[student_id] = f"Error generating feedback: {e}"

   

100%|██████████| 4/4 [00:27<00:00,  6.86s/it]


In [11]:

class StyledPDF(FPDF):
    def header(self):
        self.set_font("Helvetica", "B", 16)
        self.set_text_color(13, 110, 253)
        self.cell(0, 10, "MathonGO Performance Report", new_x=XPos.LMARGIN, new_y=YPos.NEXT, align="C")
        self.ln(4)
        self.set_draw_color(220, 220, 220)
        self.line(10, self.get_y(), 200, self.get_y())
        self.ln(5)

    def footer(self):
        self.set_y(-15)
        self.set_font("Helvetica", "I", 10)
        self.set_text_color(150, 150, 150)
        self.cell(0, 10, f"Generated by MathonGO | {datetime.now().strftime('%d %b %Y')}", align="C")

    def add_performance_summary(self, student_id, overall_stats):
        """Add performance summary section"""
        self.set_font("Helvetica", "B", 14)
        self.set_text_color(13, 110, 253)
        self.cell(0, 10, "Performance Summary", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
        self.ln(5)
        
        # Summary box with light background
        self.set_fill_color(245, 245, 245)
        self.set_font("Helvetica", "", 11)
        self.set_text_color(0, 0, 0)
        
        stats = overall_stats.get(student_id, {})
        correct = stats.get('totalCorrect', 0)
        attempted = stats.get('totalAttempted', 0)
        incorrect = attempted - correct if attempted else 0
        accuracy = stats.get('accuracy', 0)
        
        
        # Create summary content 
        summary_text = f"""
Correct Answers: {correct}
Incorrect Answers: {incorrect}
Total Attempted: {attempted}
Accuracy: {accuracy:.1f}%

        """
        
        self.multi_cell(0, 6, summary_text.strip(), fill=True)
        self.ln(8)

    def add_text_safely(self, text, is_header=False):
        """Add text with proper line wrapping and spacing"""

        clean_text = text.replace("–", "-").replace("—", "-").replace(""", '"').replace(""", '"').replace("'", "'").replace("'", "'")


        if is_header:
            self.set_font("Helvetica", "B", 13)
            self.set_text_color(13, 110, 253)
            self.ln(5)
            # Change from cell() to multi_cell() to prevent spillover
            self.multi_cell(0, 10, f" {clean_text}")
            self.ln(3)
        else:
            self.set_font("FreeSerif", "", 11)
            self.set_text_color(0, 0, 0)
            
            # Split long text into chunks
            if len(text) > 100:
                words = text.split()
                current_line = ""
                for word in words:
                    if len(current_line + word) < 100:
                        current_line += word + " "
                    else:
                        if current_line:
                            self.multi_cell(0, 7, current_line.strip())
                            self.ln(1)
                        current_line = word + " "
                if current_line:
                    self.multi_cell(0, 7, current_line.strip())
                    self.ln(1)
            else:
                self.multi_cell(0, 7, text)
                self.ln(1)

# Generate styled PDFs
output_folder = "./reports"
os.makedirs(output_folder, exist_ok=True)

for student_id, feedback in student_feedback.items():
    try:
        pdf = StyledPDF()
        
        # Load font before adding page
        pdf.add_font("FreeSerif", "", "./fonts/FreeSerif.ttf")
        
        pdf.add_page()
        pdf.set_auto_page_break(auto=True, margin=25)

        # Student header
        pdf.set_font("Helvetica", "B", 12)
        pdf.set_text_color(0, 0, 0)
        pdf.cell(0, 10, f"Student ID: {student_id}",new_x=XPos.LMARGIN, new_y=YPos.NEXT)
        pdf.ln(5)

        # Add performance summary at the beginning
        pdf.add_performance_summary(student_id, overall_stats)

        # Process feedback line by line with better logic
        lines = feedback.split("\n")
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            # Check if it's a header (starts with number or **text**)
            if (line.startswith(("1.", "2.", "3.", "4.", "5.", "6.", "7.")) or 
                (line.startswith("**") and line.endswith("**"))):
                
                # Clean the header text
                clean_header = line.replace("**", "").strip()
                if clean_header.startswith(tuple("1234567.")):
                    clean_header = clean_header[2:].strip()  # Remove "1. " etc.
                
                pdf.add_text_safely(clean_header, is_header=True)
            else:
                # Regular content
                if line:
                    pdf.add_text_safely(line, is_header=False)

        # Save
        output_path = os.path.join(output_folder, f"{student_id}_report.pdf")
        pdf.output(output_path)
        
    except Exception as e:
        print(f"Error for {student_id}: {e}")
