In [None]:
import numpy as np
import pandas as pd
import seaborn as sns # pyright: ignore[reportMissingModuleSource]
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('exams.csv')

In [5]:
df.head(10)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,male,group A,high school,standard,completed,67,67,63
1,female,group D,some high school,free/reduced,none,40,59,55
2,male,group E,some college,free/reduced,none,59,60,50
3,male,group B,high school,standard,none,77,78,68
4,male,group E,associate's degree,standard,completed,78,73,68
5,female,group D,high school,standard,none,63,77,76
6,female,group A,bachelor's degree,standard,none,62,59,63
7,male,group E,some college,standard,completed,93,88,84
8,male,group D,high school,standard,none,63,56,65
9,male,group C,some college,free/reduced,none,47,42,45


In [6]:
df.shape

(1000, 8)

In [7]:
df.isnull().sum()

gender                         0
race/ethnicity                 0
parental level of education    0
lunch                          0
test preparation course        0
math score                     0
reading score                  0
writing score                  0
dtype: int64

In [8]:
df.duplicated().sum()

np.int64(1)

In [9]:
#we techinically have to delete the duplicates

In [10]:
print(df[df.duplicated()])

    gender race/ethnicity parental level of education     lunch  \
825   male        group E          associate's degree  standard   

    test preparation course  math score  reading score  writing score  
825               completed         100            100            100  


In [11]:
df = df.drop_duplicates()

In [12]:
df.duplicated().sum()

np.int64(0)

In [13]:
df.describe()

Unnamed: 0,math score,reading score,writing score
count,999.0,999.0,999.0
mean,66.362362,68.970971,67.705706
std,15.373794,14.711935,15.575322
min,13.0,27.0,23.0
25%,56.0,60.0,58.0
50%,66.0,70.0,68.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [14]:
#exploring the data

In [20]:
print("Categories in gender variable: ", end=" ")
print(df['gender'].unique())

print("Categories in race variable: ", end=" ")
print(df['race/ethnicity'].unique())

print("Categories in parental level of education variable: ", end=" ")
print(df['parental level of education'].unique())

print("Categories in lunch variable: ", end=" ")
print(df['lunch'].unique())

print("Categories in test preparation course variable: ", end=" ")
print(df['test preparation course'].unique())

Categories in gender variable:  ['male' 'female']
Categories in race variable:  ['group A' 'group D' 'group E' 'group B' 'group C']
Categories in parental level of education variable:  ['high school' 'some high school' 'some college' "associate's degree"
 "bachelor's degree" "master's degree"]
Categories in lunch variable:  ['standard' 'free/reduced']
Categories in test preparation course variable:  ['completed' 'none']


In [21]:
numeric_features = [feature for feature in df.columns if df[feature].dtype != 'O']
categorical_features = [feature for feature in df.columns if df[feature].dtype == 'O']

In [22]:
numeric_features

['math score', 'reading score', 'writing score']

In [24]:
categorical_features

['gender',
 'race/ethnicity',
 'parental level of education',
 'lunch',
 'test preparation course']

In [25]:
#adding columns for total score and average score

In [26]:
df['total_score'] = df['math score'] + df['reading score']+df['writing score']
df['average'] = df['total_score']/3
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,total_score,average
0,male,group A,high school,standard,completed,67,67,63,197,65.666667
1,female,group D,some high school,free/reduced,none,40,59,55,154,51.333333
2,male,group E,some college,free/reduced,none,59,60,50,169,56.333333
3,male,group B,high school,standard,none,77,78,68,223,74.333333
4,male,group E,associate's degree,standard,completed,78,73,68,219,73.0


In [28]:
max_math_score = df['math score'].max()

# Find the highest reading score in the dataset
max_reading_score = df['reading score'].max()

max_writing_score = df['writing score'].max()

# Find the highest total score in the dataset
max_total_score = df['total_score'].max()

# --- Step 2: Count the number of students with the max score in each category ---
# Count students with the maximum math score
students_with_max_math = len(df[df['math score'] == max_math_score])

# Count students with the maximum reading score
students_with_max_reading = len(df[df['reading score'] == max_reading_score])

students_with_max_writing = len(df[df['writing score'] == max_writing_score])
# Count students with the maximum total score
students_with_max_total = len(df[df['total_score'] == max_total_score])

# --- Step 3: Print the results ---
print(f"Maximum Scores:")
print(f"  Max Math Score: {max_math_score}")
print(f"  Max Reading Score: {max_reading_score}")
print(f" Max writing Score: {max_writing_score}")
print(f"  Max Total Score: {max_total_score}")
print("\nNumber of Students with Maximum Scores:")
print(f"  Students with max math score: {students_with_max_math}")
print(f"  Students with max reading score: {students_with_max_reading}")
print(f"  Students with max writing score: {students_with_max_writing}")

print(f"  Students with max total score: {students_with_max_total}")

# You can also print the rows of the students who achieved the max scores
print("\nDetails of students with max math score:")
print(df[df['math score'] == max_math_score])

print("\nDetails of students with max reading score:")
print(df[df['reading score'] == max_reading_score])

print("\nDetails of students with max writing score:")
print(df[df['writing score'] == max_writing_score])

print("\nDetails of students with max total score:")
print(df[df['total_score'] == max_total_score])

Maximum Scores:
  Max Math Score: 100
  Max Reading Score: 100
 Max writing Score: 100
  Max Total Score: 300

Number of Students with Maximum Scores:
  Students with max math score: 13
  Students with max reading score: 18
  Students with max writing score: 17
  Students with max total score: 4

Details of students with max math score:
     gender race/ethnicity parental level of education     lunch  \
44     male        group E                 high school  standard   
128    male        group E                some college  standard   
148  female        group E           bachelor's degree  standard   
235    male        group C          associate's degree  standard   
281    male        group E          associate's degree  standard   
550  female        group E                some college  standard   
584    male        group C           bachelor's degree  standard   
593    male        group D           bachelor's degree  standard   
634  female        group D             master's d