In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.express as px


In [2]:
df=pd.read_csv("/content/StudentsPerformance.csv")

In [3]:
df.head()



Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


In [5]:
df.shape

(1000, 8)

In [6]:
df['average score'] = (df['math score'] + df['reading score'] + df['writing score']) / 3

In [7]:
df.describe()

Unnamed: 0,math score,reading score,writing score,average score
count,1000.0,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054,67.770667
std,15.16308,14.600192,15.195657,14.257326
min,0.0,17.0,10.0,9.0
25%,57.0,59.0,57.75,58.333333
50%,66.0,70.0,69.0,68.333333
75%,77.0,79.0,79.0,77.666667
max,100.0,100.0,100.0,100.0


In [8]:
fig = px.histogram(df, x="gender", color="gender", title="Gender Distribution")
fig.show()

In [9]:
fig = px.box(df, x="gender", y="math score", color="gender",
             title="Math Score Distribution by Gender")
fig.show()

In [10]:
fig = px.box(df, x="gender", y="reading score", color="gender",
             title="Reading Score Distribution by Gender")
fig.show()

fig = px.box(df, x="gender", y="writing score", color="gender",
             title="Writing Score Distribution by Gender")
fig.show()

In [11]:
fig = px.scatter(df, x="reading score", y="writing score", color="gender",
                 title="Reading vs Writing Score Correlation",
                 trendline="ols")
fig.show()

In [12]:
df_avg = df.groupby("parental level of education")[["math score","reading score","writing score"]].mean().reset_index()

fig = px.bar(df_avg, x="parental level of education",
             y=["math score","reading score","writing score"],
             barmode="group",
             title="Average Scores by Parental Education Level")
fig.show()

In [13]:
fig = px.box(df, x="lunch", y="math score", color="lunch",
             title="Lunch Type vs Math Score")
fig.show()

In [14]:
fig = px.box(df, x="test preparation course", y="math score", color="test preparation course",
             title="Test Preparation vs Math Score")
fig.show()

In [15]:
corr = df[["math score","reading score","writing score"]].corr()

fig = px.imshow(corr, text_auto=True, color_continuous_scale="Blues",
                title="Correlation Heatmap of Scores")
fig.show()

In [16]:
px.violin(df, x="gender", y="math score", color="gender", box=True,
          title="Math Score Violin by Gender").show()

px.violin(df, x="race/ethnicity", y="reading score", color="race/ethnicity", box=True,
          title="Reading Score by Ethnicity").show()


In [17]:
df_avg = df.groupby("parental level of education")[["math score","reading score","writing score"]].mean().reset_index()

px.bar(df_avg, x="parental level of education",
       y=["math score","reading score","writing score"],
       barmode="group",
       title="Average Scores by Parental Education").show()


In [18]:
px.histogram(df, x="race/ethnicity", color="gender",
             title="Ethnicity vs Gender Count").show()

In [19]:
px.strip(df, x="race/ethnicity", y="math score", color="race/ethnicity",
         title="Math Score Strip Plot by Ethnicity").show()


In [21]:
px.scatter_3d(df, x="math score", y="reading score", z="writing score",
              color="gender", title="3D Score Relationship").show()

In [22]:
px.pie(df, names="gender", title="Gender Percentage").show()
px.pie(df, names="race/ethnicity", title="Ethnicity Percentage").show()

px.sunburst(df, path=["race/ethnicity", "gender", "lunch"],
            title="Sunburst: Demographics Breakdown").show()

px.treemap(df, path=["parental level of education", "gender"],
           title="Treemap: Parents Education & Gender").show()

In [23]:
px.parallel_coordinates(df,
                        dimensions=["math score","reading score","writing score"],
                        color=df["math score"],
                        title="Parallel Coordinates Score Comparison").show()

In [24]:
px.parallel_categories(df,
                       dimensions=["gender","race/ethnicity","lunch","test preparation course"],
                       title="Parallel Categories: Student Profile Flow").show()


In [25]:
radar_df = df_avg.melt(id_vars="parental level of education",
                       var_name="Subject", value_name="Score")

fig = px.line_polar(radar_df, r="Score", theta="Subject",
                    color="parental level of education",
                    line_close=True,
                    title="Radar Chart: Scores by Parent Education Level")
fig.show()

In [26]:
df_funnel = pd.DataFrame({
    "Stage":["All Students", "Score > 60", "Score > 80", "Score > 90"],
    "Count":[len(df),
             sum(df["math score"]>60),
             sum(df["math score"]>80),
             sum(df["math score"]>90)]
})

px.funnel(df_funnel, x="Count", y="Stage",
          title="Performance Funnel Based on Math Score").show()