# Student Performance Data Analysis

This notebook analyzes student performance data from the math dataset (student-mat.csv).

## 1. Data Loading & Exploration

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('student-mat.csv')
print("Dataset Shape:", df.shape)
print("\nColumn Names and Data Types:")
print(df.dtypes)
print("\nFirst 5 rows:")
df.head()

## 2. Data Cleaning

In [None]:
print("Missing Values:")
print(df.isnull().sum())
print("\nTotal Missing:", df.isnull().sum().sum())

print("\nDuplicate Rows:", df.duplicated().sum())
df_clean = df.drop_duplicates()
print("Shape after cleaning:", df_clean.shape)

## 3. Analysis Questions

In [None]:
# Q1: Average final grade
avg_grade = df_clean['G3'].mean()
print("Average Final Grade (G3):", round(avg_grade, 2))

In [None]:
# Q2: Students scoring above 15
students_above_15 = df_clean[df_clean['G3'] > 15].shape[0]
print("Students scoring above 15:", students_above_15)
print("Percentage:", round((students_above_15 / len(df_clean)) * 100, 2), "%")

In [None]:
# Q3: Study time correlation
correlation = df_clean['studytime'].corr(df_clean['G3'])
print("Correlation (Study Time vs Grade):", round(correlation, 3))

In [None]:
# Q4: Gender performance
gender_performance = df_clean.groupby('sex')['G3'].mean()
print("Average Grade by Gender:")
print("Female:", round(gender_performance['F'], 2))
print("Male:", round(gender_performance['M'], 2))

## 4. Visualizations

In [None]:
# Histogram of Grades
plt.figure(figsize=(10, 6))
plt.hist(df_clean['G3'], bins=10, color='skyblue', edgecolor='black')
plt.title('Distribution of Final Grades (G3)')
plt.xlabel('Grade')
plt.ylabel('Frequency')
plt.axvline(avg_grade, color='red', linestyle='--', label='Average: ' + str(round(avg_grade, 2)))
plt.legend()
plt.grid(axis='y', alpha=0.7)
plt.show()

In [None]:
# Scatterplot: Study Time vs Grades
plt.figure(figsize=(10, 6))
plt.scatter(df_clean['studytime'], df_clean['G3'], alpha=0.5, color='green')
plt.title('Study Time vs Final Grade')
plt.xlabel('Study Time (1=<2h, 2=2-5h, 3=5-10h, 4=>10h)')
plt.ylabel('Final Grade (G3)')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Bar chart: Gender comparison
plt.figure(figsize=(8, 6))
gender_performance.plot(kind='bar', color=['pink', 'lightblue'], edgecolor='black')
plt.title('Average Final Grade by Gender')
plt.xlabel('Gender')
plt.ylabel('Average Final Grade')
plt.xticks(rotation=0)
plt.grid(axis='y', alpha=0.7)
plt.show()

## Summary

- Average Grade: ROUND(avg_grade, 2)
- Students > 15: students_above_15 (PERCENTAGE%)
- Study Time Correlation: ROUND(correlation, 3)
- Gender: Female = ROUND(gender_performance['F'], 2), Male = ROUND(gender_performance['M'], 2)