Performance visualization graph specifically for student 1323607 (Pratipal Kumar Singh)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# --- 1. CONFIGURATION ---
TARGET_ROLL_NO = 1323607  # The student you want to visualize
fileData = 'BCA-609_Data_Visualization_Lab - Dr. Chetna Thakur\ModelTraining_ModelBuilding\student_data.csv'
CSV_FILE_NAME = fileData

In [3]:
# --- 2. DATA CLEANING FUNCTION ---
def clean_grade(value):
    """Converts mixed grade data (strings/RE/Absent) into numbers for plotting."""
    value = str(value).upper().strip()
    
    # List of values to treat as Missing or Zero
    if value in ['NAN', 'ABSENT', 'RL(REGN.)', 'RLS', 'RLA', '', 'nan']:
        return None  # Return None so the graph line breaks (showing gap)
    
    # Treat Re-appears (RE) as 0 to show the drop in performance visually
    if 'RE' in value or '[' in value:
        return 0.0 
        
    try:
        return float(value)
    except ValueError:
        return None

In [4]:
# --- 3. MAIN EXECUTION ---
try:
    # Load the CSV
    df = pd.read_csv(CSV_FILE_NAME)
    
    # Convert Roll No column to numbers to ensure matching works
    df['Roll No.'] = pd.to_numeric(df['Roll No.'], errors='coerce')
    
    # Find the specific student
    student_data = df[df['Roll No.'] == TARGET_ROLL_NO]
    
    if student_data.empty:
        print(f"Error: Student with Roll No {TARGET_ROLL_NO} not found.")
    else:
        # Extract Name for the Graph Title
        student_name = student_data['Name'].values[0]
        
        # Define the semesters to look for
        sem_cols = ['1st Sem.', '2nd Sem.', '3rd Sem.', '4th Sem.', '5th Sem.', '6th Sem.']
        
        # Extract and clean grades
        grades = []
        valid_sems = []
        
        for sem in sem_cols:
            if sem in df.columns:
                raw_val = student_data[sem].values[0]
                cleaned_val = clean_grade(raw_val)
                grades.append(cleaned_val)
                valid_sems.append(sem)
        
        # --- 4. VISUALIZATION ---
        plt.figure(figsize=(10, 6))
        
        # Plot the line graph
        plt.plot(valid_sems, grades, marker='o', linestyle='-', color='b', linewidth=2, markersize=8, label='GPA/Score')
        
        # Add labels for the specific values on top of the dots
        for i, txt in enumerate(grades):
            if txt is not None:
                plt.annotate(f'{txt}', (valid_sems[i], grades[i]), textcoords="offset points", xytext=(0,10), ha='center')

        # Formatting the graph
        plt.title(f'Performance Trend: {student_name} (Roll No: {TARGET_ROLL_NO})', fontsize=14, fontweight='bold')
        plt.xlabel('Semester', fontsize=12)
        plt.ylabel('Score / GPA', fontsize=12)
        plt.ylim(0, 10.5)  # Assuming grades are out of 10
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.axhline(y=4.0, color='r', linestyle=':', label='Passing Threshold (Approx)') # Optional reference line
        plt.legend()
        
        # Show the plot
        plt.show()

except FileNotFoundError:
    print(f"Error: The file '{CSV_FILE_NAME}' was not found. Please check the file name.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

An unexpected error occurred: 'Roll No.'


In [5]:
# --- 3. MAIN EXECUTION ---
try:
    # Load the CSV
    df = pd.read_csv(CSV_FILE_NAME)
    
    # Clean up all column names (remove leading/trailing spaces)
    df.columns = df.columns.str.strip()
    
    # Automatically find the Roll No column
    roll_col_name = find_roll_column(df.columns)
    
    if roll_col_name is None:
        raise ValueError("Could not find a 'Roll No.' column. Please check CSV headers.")
        
    print(f"Found Roll No column: '{roll_col_name}'")

    # Convert Roll No column to numeric, forcing errors to NaN, then drop NaNs
    df[roll_col_name] = pd.to_numeric(df[roll_col_name], errors='coerce')
    
    # Filter for the student
    student_data = df[df[roll_col_name] == TARGET_ROLL_NO]
    
    if student_data.empty:
        print(f"Error: Student with Roll No {TARGET_ROLL_NO} not found.")
    else:
        # Get Name (Assuming 'Name' column exists, otherwise use 'Unknown')
        name_col = 'Name' if 'Name' in df.columns else df.columns[3] # Fallback to 4th col
        student_name = student_data[name_col].values[0]
        
        # Identify Semester Columns (Columns containing 'Sem')
        sem_cols = [col for col in df.columns if 'Sem' in col]
        
        # Extract grades
        grades = []
        valid_sems = []
        
        for sem in sem_cols:
            raw_val = student_data[sem].values[0]
            cleaned_val = clean_grade(raw_val)
            grades.append(cleaned_val)
            valid_sems.append(sem)
        
        # --- 4. VISUALIZATION ---
        if not valid_sems:
             print("Error: No semester columns found.")
        else:
            plt.figure(figsize=(10, 6))
            plt.plot(valid_sems, grades, marker='o', linestyle='-', color='green', linewidth=2, markersize=8)
            
            # Annotate points
            for i, txt in enumerate(grades):
                if txt is not None:
                    plt.annotate(f'{txt}', (valid_sems[i], grades[i]), textcoords="offset points", xytext=(0,10), ha='center')

            plt.title(f'Performance Trend: {student_name} (Roll No: {TARGET_ROLL_NO})', fontsize=14)
            plt.xlabel('Semester')
            plt.ylabel('Score / GPA')
            plt.ylim(0, 10.5)
            plt.grid(True, linestyle='--', alpha=0.5)
            plt.show()

except Exception as e:
    print(f"Error: {e}")

Error: name 'find_roll_column' is not defined
