In [10]:
pip install dash

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [18]:
import dash
from dash import dash_table
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

In [12]:
students_data = pd.read_csv(r"C:\Users\EWURA\Desktop\Vacation Project\archive (3)\students.csv")

In [13]:
# Display basic information
print("Dataset Preview:")
print(students_data.head())
print("\nDataset Info:")
print(students_data.info())
print("\nMissing Values Summary:")
print(students_data.isnull().sum())

Dataset Preview:
   StudentID               Name  Age                          Email  \
0       3336       David Palmer   19             sean43@hotmail.com   
1       8774       Andrew Roach   23             vbecker@harvey.com   
2       1396  Jonathan Gonzalez   22           hollydavis@gmail.com   
3       6716     Kenneth Morrow   24  ganderson@wheeler-atkins.info   
4       8830   Kaitlyn Martinez   18           hayesdiane@gmail.com   

    Department   GPA  GraduationYear  
0  Mathematics  3.16            2026  
1    Chemistry  3.75            2027  
2      Physics  2.95            2027  
3      Physics  3.55            2029  
4    Chemistry  2.29            2025  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   StudentID       200 non-null    int64  
 1   Name            200 non-null    object 
 2   Age             20

In [14]:
# Clean and preprocess data
# Check for duplicate StudentIDs
duplicates = students_data.duplicated(subset="StudentID").sum()
print(f"Duplicate Student IDs: {duplicates}")

# Ensure GraduationYear and GPA are valid
invalid_graduation = students_data[students_data['GraduationYear'] < 2023]
print(f"Invalid Graduation Years: {len(invalid_graduation)}")

# Basic statistics
summary_stats = {
    "Total Students": students_data.shape[0],
    "Average GPA": students_data['GPA'].mean(),
    "Median GPA": students_data['GPA'].median(),
    "GPA Range": (students_data['GPA'].min(), students_data['GPA'].max())
}
print("Summary Statistics:", summary_stats)


Duplicate Student IDs: 0
Invalid Graduation Years: 0
Summary Statistics: {'Total Students': 200, 'Average GPA': 3.027849999999999, 'Median GPA': 3.08, 'GPA Range': (2.0, 3.99)}


In [19]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Layout: Defines the structure and appearance of the dashboard
app.layout = html.Div([
    # Dashboard Title
    html.H1("Student Performance Dashboard", style={'textAlign': 'center'}),
    
    # Dropdown filter for Department
    html.Div([
        html.Label("Filter by Department:"),
        dcc.Dropdown(
            id='department-filter',
            options=[{'label': dept, 'value': dept} for dept in students_data['Department'].unique()],
            value=None,  # Default value (no selection)
            placeholder="Select a department"
        )
    ], style={'width': '50%', 'margin': 'auto'}),

    # GPA Distribution Graph
    dcc.Graph(id='gpa-distribution'),

    # Graduation Year Trends Graph
    dcc.Graph(id='graduation-trends'),

    # Average GPA by Department
    dcc.Graph(id='avg-gpa-department'),

    # Top Performers Section
    html.Div([
        html.H3("Top Performing Students"),
        dash_table.DataTable(
            id='top-performers',
            columns=[
                {'name': 'Name', 'id': 'Name'},
                {'name': 'Department', 'id': 'Department'},
                {'name': 'GPA', 'id': 'GPA'}
            ],
            page_size=5,  # Show 5 rows per page
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'left', 'padding': '10px'},
            style_header={
                'backgroundColor': 'rgb(230, 230, 230)',
                'fontWeight': 'bold'
            },
        )
    ], style={'margin': '20px'}),

    # Data Table
    html.Div([
        html.H3("Student Data Table"),
        dash_table.DataTable(
            id='data-table',
            columns=[{'name': col, 'id': col} for col in students_data.columns],
            data=students_data.to_dict('records'),  # Populate table with all data
            page_size=10,  # Show 10 rows per page
            filter_action='native',  # Allow filtering
            sort_action='native',  # Allow sorting
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'left', 'padding': '10px'},
            style_header={
                'backgroundColor': 'rgb(230, 230, 230)',
                'fontWeight': 'bold'
            },
        )
    ], style={'margin': '20px'})
])

# Callback: Controls interactivity for filtering and visualizations
@app.callback(
    [Output('gpa-distribution', 'figure'),
     Output('graduation-trends', 'figure'),
     Output('avg-gpa-department', 'figure'),
     Output('top-performers', 'data')],
    [Input('department-filter', 'value')]
)
def update_graphs_and_table(selected_department):
    # Filter data based on dropdown selection
    filtered_data = students_data
    if selected_department:
        filtered_data = filtered_data[students_data['Department'] == selected_department]

    # GPA Distribution Graph
    gpa_fig = px.histogram(
        filtered_data, x='GPA', nbins=20,
        title='GPA Distribution',
        labels={'GPA': 'GPA'},
        color_discrete_sequence=['blue']  # Optional color
    )

    # Graduation Year Trends Graph
    grad_fig = px.bar(
        filtered_data.groupby('GraduationYear').size().reset_index(name='Count'),
        x='GraduationYear', y='Count',
        title='Graduation Year Trends'
    )

    # Average GPA by Department
    avg_gpa_fig = px.bar(
        filtered_data.groupby('Department')['GPA'].mean().reset_index(),
        x='Department', y='GPA',
        title='Average GPA by Department',
        labels={'GPA': 'Average GPA'}
    )

    # Top Performing Students Table
    top_students = filtered_data.nlargest(5, 'GPA')[['Name', 'Department', 'GPA']]
    top_students_data = top_students.to_dict('records')

    return gpa_fig, grad_fig, avg_gpa_fig, top_students_data

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
