In [3]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Load the cleaned dataset
file_path = "Cleaned_StudentsPerformance.csv"
data = pd.read_csv(file_path)
data.columns = data.columns.str.lower().str.replace(" ", "_")

# Histogram of Scores
fig_hist = px.histogram(data,
                        x=["math_score", "reading_score", "writing_score"],
                        barmode='overlay',
                        title='Distribution of Math, Reading, and Writing Scores')
fig_hist.update_layout(bargap=0.1)
fig_hist.show()

# Box Plot Score Distribution by Gender
fig_box = px.box(data,
                 x='gender',
                 y='math_score',
                 title='Math Score Distribution by Gender',
                 points="all")
fig_box.show()

# Correlation Heatmap
corr = data[['math_score', 'reading_score', 'writing_score']].corr().round(2)
fig_heatmap = ff.create_annotated_heatmap(
    z=corr.values,
    x=list(corr.columns),
    y=list(corr.index),
    annotation_text=corr.values,
    colorscale='Viridis'
)
fig_heatmap.update_layout(title='Correlation Heatmap of Scores')
fig_heatmap.show()

# Bar Plot Average Scores by Parental Level of Education
mean_scores = data.groupby('parental_level_of_education')[['math_score', 'reading_score', 'writing_score']].mean().reset_index()
fig_bar = px.bar(mean_scores,
                 x='parental_level_of_education',
                 y=['math_score', 'reading_score', 'writing_score'],
                 title='Average Scores by Parental Level of Education',
                 barmode='group')
fig_bar.update_layout(xaxis_title='Parental Level of Education', yaxis_title='Average Score')
fig_bar.show()

# Scatter Plot
fig_scatter = px.scatter(data,
                         x='math_score',
                         y='reading_score',
                         trendline='ols',
                         title='Math Score vs. Reading Score',
                         labels={'math_score': 'Math Score', 'reading_score': 'Reading Score'},
                         hover_data=['writing_score', 'gender'], color_discrete_sequence=['Blue'])
fig_scatter.update_traces(line=dict(color='red'))
fig_scatter.show()