## Project: Student Performance Analyzer (with Raw Data)

In [105]:
import pandas as pd
import numpy as np

# Loading Data....
df = pd.read_csv('students_raw_data.csv')

# First 5 Rows..........
print("Head_Data")
print(df.head(10))

# Null Values.......
print("\nNll_Values")
print(df.isnull().sum())

# INFO......
print("\nINFO")
print(df.info())


# filtering Name and Gender........
df["Name"].fillna("Unknown")
df["Gender"].fillna("Unknown")


# filtering Age........
df["Age"] = pd.to_numeric(df["Age"], errors="coerce")
df["Age"] = df["Age"].fillna(df["Age"].mean())
df["Age"] = df["Age"].round().astype(int)


# filtering subjects........
df["Science"] = pd.to_numeric(df["Science"], errors="coerce")
df["Science"] = df["Science"].fillna(df["Science"].mean()).round(2)
df["Math"] = df["Math"].fillna(df["Math"].mean()).round(2)
df["English"] = df["English"].fillna(df["English"].mean()).round(2)

# adding columns..........
df["Total_Marks"] = df[["Math", "English", "Science"]].sum(axis=1)
df["Percentage"] = (df["Total_Marks"] / 3).round(2)


def grade(score):
    if score > 85:
        return "A"
    elif score >= 70:
        return "B"
    elif score >= 60:
        return "C"
    else:
        return "Fail"

df["Grade"] = df["Percentage"].apply(grade)

# Average Marks by Gender.........
print("\n----- Average Marks by Gender ------")
print(df.groupby("Gender")[["Math", 'English', 'Science', 'Percentage']].mean())

# Top 5 Students.......
print("\n------ Top 5 Students ------")
print(df.sort_values(by="Percentage", ascending=False).head())

# Subject Toppers.......
print("\n------ Subject Toppers ------")
print(f"Math Topper: {df.loc[df["Math"].idxmax(), "Name"]}")
print(f"English Topper: {df.loc[df["English"].idxmax(), "Name"]}")
print(f"Science Topper: {df.loc[df["Science"].idxmax(), "Name"]}")

# Students who Failed.......
print("\n------ Students who Failed -------")
print(df[df["Percentage"] < 60][["Name", "Percentage"]])

# Hardest Subject..........
print("\n----- Hardest Subject ------")
subject_means = df[["Math", "Science", "English"]].mean()
hardest_subject = subject_means.idxmin()
lowest_avg = subject_means.min()
print(f"{hardest_subject} with class average {lowest_avg:.2f}")

# Highest in each Subject....
print("\n----- Highest in each Subject ------")

subjects = df[["Math", "English", "Science"]]
for sub in subjects:
    idx = df[sub].idxmax()
    print(f"Top in {sub}: {df.loc[idx, 'Name']} with {df.loc[idx, sub]} marks.")

# Saving to a File......
df.to_csv("cleaned_students.csv", index=False)
print("✅ Cleaned data saved as cleaned_students.csv")

Head_Data
   ID    Name   Age Gender  Math  English Science
0   1     Ali  20.0      M  78.0     65.0      72
1   2    Sara  22.0      F  88.0     90.0      85
2   3  Hassan  21.0      M  67.0      NaN      75
3   4  Ayesha  19.0      F  92.0     85.0      89
4   5    Omar  20.0      M  56.0     60.0      58
5   6   Maria  21.0      F  81.0     79.0     NaN
6   7   Usman  22.0      M   NaN     68.0      70
7   8    Nida  20.0      F  85.0      NaN      90
8   9   Ahmed   NaN      M  60.0     55.0      65
9  10    Zara  21.0      F  95.0     92.0      96

Nll_Values
ID          0
Name        1
Age         2
Gender      1
Math        8
English    11
Science     4
dtype: int64

INFO
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   ID       50 non-null     int64  
 1   Name     49 non-null     object 
 2   Age      48 non-null     float64
 3   Gender   49 no