# Data Visualisation of Stundents Performance dataset

## Importing data and checking it

In [4]:
import pandas as pd

# Enter the path to the dataset
dataset_path = 'C:/Users/simen/Prosjekter/dataVisualizationPython/StudentsPerformance.csv'

# Load the dataset as a Pandas DataFrame
data = pd.read_csv(dataset_path)

# Display the first five rows to ensure that the data is loaded correctly
data.head()


Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [5]:
# Checking for missing values
missing_values = data.isnull().sum()
missing_values

gender                         0
race/ethnicity                 0
parental level of education    0
lunch                          0
test preparation course        0
math score                     0
reading score                  0
writing score                  0
dtype: int64

In [6]:
# Check data types
data.dtypes

gender                         object
race/ethnicity                 object
parental level of education    object
lunch                          object
test preparation course        object
math score                      int64
reading score                   int64
writing score                   int64
dtype: object

In [7]:
data.describe()

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [8]:
data.value_counts('gender')

gender
female    518
male      482
dtype: int64

In [9]:
data.value_counts('race/ethnicity')

race/ethnicity
group C    319
group D    262
group B    190
group E    140
group A     89
dtype: int64

In [10]:
data.value_counts('parental level of education')

parental level of education
some college          226
associate's degree    222
high school           196
some high school      179
bachelor's degree     118
master's degree        59
dtype: int64

In [11]:
data.value_counts('lunch')

lunch
standard        645
free/reduced    355
dtype: int64

In [12]:
data.value_counts('test preparation course')

test preparation course
none         642
completed    358
dtype: int64

In [13]:
duplicates = data.duplicated().sum()
print(f"Number of duplicates: {duplicates}")

Number of duplicates: 0


## Interactive Dashboard

In [14]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

# Initialiser Dash-appen
app = dash.Dash(__name__)

# Dropdown-meny for fagvalg
subject_dropdown = dcc.Dropdown(
    id='subject-dropdown',
    options=[
        {'label': 'Math Score', 'value': 'math score'},
        {'label': 'Reading Score', 'value': 'reading score'},
        {'label': 'Writing Score', 'value': 'writing score'}
    ],
    value='math score'  # Standardvalg
)

# App layout
app.layout = html.Div(children=[
    html.H1("Student Performance Dashboard"),
    
    html.Div(children=[
        html.Label("Select Subject:"),
        subject_dropdown
    ]),
    
    dcc.Tabs([
        dcc.Tab(label='Gender Differences', children=[
            dcc.Graph(id='gender-differences-graph'),
        ]),
        dcc.Tab(label='Lunch Impact', children=[
            dcc.Graph(id='lunch-impact-graph'),
        ]),
        dcc.Tab(label='Parental Education Level', children=[
            dcc.Graph(id='parental-education-graph'),
        ]),
        dcc.Tab(label='Test Preparation Course', children=[
            dcc.Graph(id='test-preparation-graph'),
        ]),
    ])
])

# Callbacks for updating graphs based on selected subject
@app.callback(
    [Output('gender-differences-graph', 'figure'),
     Output('lunch-impact-graph', 'figure'),
     Output('parental-education-graph', 'figure'),
     Output('test-preparation-graph', 'figure')],
    [Input('subject-dropdown', 'value')])
def update_graphs(selected_subject):
    gender_fig = px.histogram(data, x=selected_subject, color='gender', barmode='group', title="Gender Differences")
    lunch_fig = px.histogram(data, x=selected_subject, color='lunch', barmode='group', title="Lunch Impact")
    parental_education_fig = px.box(data, x='parental level of education', y=selected_subject, title="Parental Education Level")
    test_preparation_fig = px.box(data, x='test preparation course', y=selected_subject, title="Test Preparation Course")
    return gender_fig, lunch_fig, parental_education_fig, test_preparation_fig

# Kjør appen
if __name__ == '__main__':
    app.run_server(debug=True)
