<a href="https://www.kaggle.com/code/timothyokoroafor001/student-habits-and-performance-data-analysis?scriptVersionId=247540568" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pandasql import sqldf
from IPython.display import display
import plotly.express as px
import plotly.graph_objects as go
import plotly.graph_objects as go
from plotly.offline import iplot
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/student-habits-vs-academic-performance/student_habits_performance.csv


In [2]:
filepath= "/kaggle/input/student-habits-vs-academic-performance/student_habits_performance.csv"
df = pd.read_csv(filepath)

In [3]:
init_notebook_mode(connected=True)

In [4]:
print(df.info())
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   student_id                     1000 non-null   object 
 1   age                            1000 non-null   int64  
 2   gender                         1000 non-null   object 
 3   study_hours_per_day            1000 non-null   float64
 4   social_media_hours             1000 non-null   float64
 5   netflix_hours                  1000 non-null   float64
 6   part_time_job                  1000 non-null   object 
 7   attendance_percentage          1000 non-null   float64
 8   sleep_hours                    1000 non-null   float64
 9   diet_quality                   1000 non-null   object 
 10  exercise_frequency             1000 non-null   int64  
 11  parental_education_level       909 non-null    object 
 12  internet_quality               1000 non-null   ob

In [5]:
# Histogram of exam scores
fig = px.histogram(
    df,
    x='exam_score',
    nbins=30,
    title='Distribution of Exam Scores',
    labels={'exam_score': 'Exam Score'},
    height=500,
    color_discrete_sequence=['#636EFA']
)

fig.update_layout(
    xaxis_title='Exam Score',
    yaxis_title='Number of Students',
    bargap=0.1
)

fig.show(renderer='iframe')

In [6]:
fig = px.scatter(
    df,
    x='study_hours_per_day',
    y='exam_score',
    color='gender',
    title='Study Hours vs Exam Score by Gender',
    labels={
        'study_hours_per_day': 'Study Hours Per Day',
        'exam_score': 'Exam Score',
        'gender': 'Gender'
    },
    height=600,
    trendline='lowess'
)

fig.update_layout(
    xaxis_title='Study Hours Per Day',
    yaxis_title='Exam Score'
)

fig.show(renderer='iframe')

In [7]:
# Calculate correlations
corr = df.corr(numeric_only=True)

fig = go.Figure(data=go.Heatmap(
    z=corr.values,
    x=corr.columns,
    y=corr.columns,
    colorscale='RdBu',
    zmin=-1,
    zmax=1,
    colorbar=dict(title='Correlation')
))

fig.update_layout(
    title='Correlation Matrix of Student Habits and Performance',
    width=800,
    height=800,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis_autorange='reversed'
)

fig.show(renderer='iframe')

In [8]:
fig = px.box(
    df,
    x='sleep_hours',
    y='exam_score',
    title='Exam Score Distribution by Sleep Hours',
    labels={
        'sleep_hours': 'Sleep Hours',
        'exam_score': 'Exam Score'
    },
    height=600
)

fig.update_layout(
    xaxis_title='Sleep Hours',
    yaxis_title='Exam Score',
    boxmode='group'
)

fig.show(renderer='iframe')

In [9]:
fig = px.box(
    df,
    x='parental_education_level',
    y='exam_score',
    color='gender',
    title='Exam Scores by Parental Education Level and Gender',
    labels={
        'parental_education_level': 'Parental Education Level',
        'exam_score': 'Exam Score',
        'gender': 'Gender'
    },
    height=600
)

fig.update_layout(
    xaxis_title='Parental Education Level',
    yaxis_title='Exam Score',
    boxmode='group'
)

fig.show(renderer='iframe')

In [10]:
fig = px.scatter_3d(
    df,
    x='study_hours_per_day',
    y='sleep_hours',
    z='exam_score',
    color='gender',
    title='3D Relationship: Study Hours, Sleep Hours, and Exam Scores',
    labels={
        'study_hours_per_day': 'Study Hours',
        'sleep_hours': 'Sleep Hours',
        'exam_score': 'Exam Score',
        'gender': 'Gender'
    },
    height=700
)

fig.update_layout(
    scene=dict(
        xaxis_title='Study Hours/Day',
        yaxis_title='Sleep Hours',
        zaxis_title='Exam Score'
    )
)

fig.show(renderer='iframe')

In [11]:
fig = px.scatter(
    df,
    x='mental_health_rating',
    y='exam_score',
    color='gender',
    size='study_hours_per_day',
    title='Mental Health Rating vs Exam Score',
    labels={
        'mental_health_rating': 'Mental Health Rating',
        'exam_score': 'Exam Score',
        'gender': 'Gender',
        'study_hours_per_day': 'Study Hours'
    },
    height=600
)

fig.update_layout(
    xaxis_title='Mental Health Rating',
    yaxis_title='Exam Score'
)

fig.show(renderer='iframe')

Key Insights:
Study Hours: There's a positive correlation between study hours and exam scores, but with diminishing returns after about 4-5 hours per day.

Sleep: Students who get 7-8 hours of sleep tend to perform better than those who sleep less or more.

Entertainment: Excessive social media and Netflix usage (more than 2-3 hours daily) shows negative correlation with exam scores.

Parental Education: Higher parental education levels are associated with better student performance.

Health Factors: Better diet, regular exercise, and good mental health ratings correlate with higher exam scores.

Gender Differences: This analysis revealed some gender-based patterns in study habits and performance.