### Import Libraries

In [29]:
import dash 
# from jupyter_dash import JupyterDash
from dash import html 
from dash import dcc
from dash.dependencies import Output, Input, State
import plotly.graph_objects as go

import plotly.express as px

import pandas as pd
import numpy as np 

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

### Data:

1. `Gender` - student's gender (nominal: 'Male' or 'Female’)
2. `Nationality`- student's nationality (nominal:’ Kuwait’,’ Lebanon’,’ Egypt’,’ SaudiArabia’,’ USA’,’ Jordan’,’
   Venezuela’,’ Iran’,’ Tunis’,’ Morocco’,’ Syria’,’ Palestine’,’ Iraq’,’ Lybia’)
3. `Place of birth`- student's Place of birth (nominal:’ Kuwait’,’ Lebanon’,’ Egypt’,’ SaudiArabia’,’ USA’,’ Jordan’,’
   Venezuela’,’ Iran’,’ Tunis’,’ Morocco’,’ Syria’,’ Palestine’,’ Iraq’,’ Lybia’)
4. `Educational Stages`- educational level student belongs (nominal: ‘lowerlevel’,’MiddleSchool’,’HighSchool’)
5. `Grade Levels`- grade student belongs (nominal: ‘G-01’, ‘G-02’, ‘G-03’, ‘G-04’, ‘G-05’, ‘G-06’, ‘G-07’, ‘G-08’, ‘G-09’,      ‘G-10’, ‘G-11’, ‘G-12 ‘)
6. `Section ID`- classroom student belongs (nominal:’A’,’B’,’C’)
7. `Topic`- course topic (nominal:’ English’,’ Spanish’, ‘French’,’ Arabic’,’ IT’,’ Math’,’ Chemistry’, ‘Biology’,              ‘Science’,’ History’,’ Quran’,’ Geology’)
8. `Semester`- school year semester (nominal:’ First’,’ Second’)
9. `Parent responsible` for student (nominal:’mom’,’father’)
10. `Raised hand`- how many times the student raises his/her hand on classroom (numeric:0-100)
11. `Visited resources`- how many times the student visits a course content(numeric:0-100)
12. `Viewing announcements`-how many times the student checks the new announcements(numeric:0-100)
13.`Discussion groups`- how many times the student participate on discussion groups (numeric:0-100)
14. `Parent Answering Survey`- parent answered the surveys which are provided from school or not
    (nominal:’Yes’,’No’)
15. `Parent School Satisfaction`- the Degree of parent satisfaction from school(nominal:’Yes’,’No’)
16. `Student Absence Days`-the number of absence days for each student (nominal: above-7, under-7)

The students are classified into three numerical intervals `Class` based on their total grade/mark:
1. Low-Level: interval includes values from 0 to 69,
2. Middle-Level: interval includes values from 70 to 89,
3. High-Level: interval includes values from 90-100.

### Load data and Explore it:

In [30]:
df = pd.read_csv('xAPI-Edu-Data.csv')
df
print('Shape of xAPI-Edu-Data data is ',df.shape)

Shape of xAPI-Edu-Data data is  (480, 17)


In [31]:
df.head()

Unnamed: 0,gender,NationalITy,PlaceofBirth,StageID,GradeID,SectionID,Topic,Semester,Relation,raisedhands,VisITedResources,AnnouncementsView,Discussion,ParentAnsweringSurvey,ParentschoolSatisfaction,StudentAbsenceDays,Class
0,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,15,16,2,20,Yes,Good,Under-7,M
1,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,20,20,3,25,Yes,Good,Under-7,M
2,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,10,7,0,30,No,Bad,Above-7,L
3,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,30,25,5,35,No,Bad,Above-7,L
4,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,40,50,12,50,No,Bad,Above-7,M


In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 480 entries, 0 to 479
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   gender                    480 non-null    object
 1   NationalITy               480 non-null    object
 2   PlaceofBirth              480 non-null    object
 3   StageID                   480 non-null    object
 4   GradeID                   480 non-null    object
 5   SectionID                 480 non-null    object
 6   Topic                     480 non-null    object
 7   Semester                  480 non-null    object
 8   Relation                  480 non-null    object
 9   raisedhands               480 non-null    int64 
 10  VisITedResources          480 non-null    int64 
 11  AnnouncementsView         480 non-null    int64 
 12  Discussion                480 non-null    int64 
 13  ParentAnsweringSurvey     480 non-null    object
 14  ParentschoolSatisfaction  

In [33]:
df = df.drop_duplicates()

### Class Labels levels:

In [34]:
counts = df['Class'].value_counts()
Class_count = px.bar(x=counts.index,
                     y=counts,
                     color_discrete_sequence=['rgb(29, 105, 150)'])
Class_count.update_layout(title_x=0.5,
                          title_font_color='rgb(42, 1, 52)',
                          xaxis_title="Performance Class",
                          yaxis_title="Performance Count",
                          font=dict(
                              family="Courier New, monospace",
                              size=18,
                              color="Black"
                          ))

### Male/Female Performance Levels:

In [35]:
male_female_count = px.histogram(df,
                                 x='gender',
                                 color='Class',
                                 color_discrete_sequence=['rgb(29, 105, 150)', 'rgb(47, 138, 196)', 'rgb(136, 204, 238)'])
male_female_count.update_layout(title_x=0.5,
                                title_font_color='rgb(42, 1, 52)',
                                xaxis_title="Gender",
                                yaxis_title="Performance Count",
                                font=dict(
                                    family="Courier New, monospace",
                                    size=18,
                                    color="Black"
                                ))

### Father/Mother Performance Levels:

In [36]:
mother_father_count = px.histogram(df,
                      x='Relation',
                      color='Class',
                      color_discrete_sequence=['rgb(29, 105, 150)', 'rgb(47, 138, 196)', 'rgb(136, 204, 238)'])
mother_father_count.update_layout(title_x=0.5,
                                  title_font_color='rgb(42, 1, 52)',
                                  xaxis_title="Relation",
                                  yaxis_title="Performance Count",
                                  font=dict(
                                      family="Courier New, monospace",
                                      size=18,
                                      color="Black"
                                  ))

### Nationality effect on Performance Levels:

In [37]:
counts = df[df['Class']=='L']['PlaceofBirth'].value_counts()
place_grades = px.bar(x=counts.index, 
                      y=counts,
                      color_discrete_sequence=['rgb(29, 105, 150)'])
place_grades.update_layout(title_x=0.5,
                           title_font_color='rgb(42, 1, 52)',
                           xaxis_title="Nationality",
                           yaxis_title="Performance Count",
                           font=dict(
                               family="Courier New, monospace",
                               size=18,
                               color="Black"
                           ))

### Success/Failure values W.R.T each Topic:

In [38]:
counts = df[df['Class']=='L']['Topic'].value_counts()
place_grades = px.bar(x=counts.index, 
                      y=counts,
                      color_discrete_sequence=['rgb(29, 105, 150)'])
place_grades.update_layout(title_x=0.5,
                           title_font_color='rgb(42, 1, 52)',
                           xaxis_title="Topic",
                           yaxis_title="Performance Count",
                           font=dict(
                               family="Courier New, monospace",
                               size=18,
                               color="Black"
                           ))

### Creating DashBoard:

In [39]:
df['Class_encoded'] = df['Class'].replace({'L':0, 'M': 1, 'H':2})

In [40]:
app = dash.Dash(__name__,external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'])

### Low performance figure:

In [41]:
Low_Performance = go.Figure(go.Indicator(
    mode = "number",
    value = ((df[df['Class'] == 'L']['Class'].count())/(df['Class'].count())) * 100,
    number = {'prefix': "%"},
    title = {"text": "Low Performance Percentage"},
    domain = {'x': [0.5, 0.5], 'y': [0.15, 0.35]}))

### Medium performance figure:

In [42]:
Medium_Performance = go.Figure(go.Indicator(
    mode = "number",
    value = ((df[df['Class'] == 'M']['Class'].count())/(df['Class'].count())) * 100,
    number = {'prefix': "%"},
    title = {"text": "Medium Performance Percentage"},
    domain = {'x': [0.5, 0.5], 'y': [0.15, 0.35]}))


### High performance figure:

In [43]:
High_Performance = go.Figure(go.Indicator(
    mode = "number",
    value = ((df[df['Class'] == 'H']['Class'].count())/(df['Class'].count())) * 100,
    number = {'prefix': "%"},
    title = {"text": "High Performance Percentage"},
    domain = {'x': [0.5, 0.5], 'y': [0.15, 0.35]}))


### Preparing Data for Training:

In [44]:
df_copy = df.copy()
num_col = ['raisedhands', 'VisITedResources', 'AnnouncementsView', 'Discussion']
cat_col = ['gender', 'NationalITy', 'PlaceofBirth', 'StageID', 'GradeID','SectionID', 'Topic', 'Semester', 
           'Relation','ParentAnsweringSurvey', 'ParentschoolSatisfaction', 'StudentAbsenceDays']

# Encode Categorical Columns
le = LabelEncoder()
df_copy[cat_col] = df_copy[cat_col].apply(le.fit_transform)

print (df_copy.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 478 entries, 0 to 479
Data columns (total 18 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   gender                    478 non-null    int32 
 1   NationalITy               478 non-null    int32 
 2   PlaceofBirth              478 non-null    int32 
 3   StageID                   478 non-null    int32 
 4   GradeID                   478 non-null    int32 
 5   SectionID                 478 non-null    int32 
 6   Topic                     478 non-null    int32 
 7   Semester                  478 non-null    int32 
 8   Relation                  478 non-null    int32 
 9   raisedhands               478 non-null    int64 
 10  VisITedResources          478 non-null    int64 
 11  AnnouncementsView         478 non-null    int64 
 12  Discussion                478 non-null    int64 
 13  ParentAnsweringSurvey     478 non-null    int32 
 14  ParentschoolSatisfaction  

### Split data:

In [45]:
X = df_copy.drop(columns=['Class_encoded', 'Class'])
y = df_copy['Class_encoded']

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y)

print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

Train set: (382, 16) (382,)
Test set: (96, 16) (96,)


### RandomForestClassifier

In [46]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(max_depth=6, random_state=42)
rfc.fit(X_train, y_train)
rfc_accuracy = (rfc.predict(X_test)==y_test).sum() / len(y_test)
rfc_accuracy

0.8020833333333334

#### RandomForestClassifier accuracy

In [47]:
rfc_accuracy = go.Figure(go.Indicator(
                        mode = "number",
                        value = rfc_accuracy * 100,
                        number = {'prefix': "%"},
                        title = {"text": "RandomForestClassifier accuracy"},
                        domain = {'row': 0, 'column': 0}))

#### RandomForestClassifier feature importance graph

In [48]:
rfc_feature_importance = px.bar(x=X_train.columns, 
                                y=rfc.feature_importances_,
                                color_discrete_sequence=['rgb(29, 105, 150)'])
rfc_feature_importance.update_layout(title_x=0.5,
                                     title_font_color='rgb(42, 1, 52)',
                                     xaxis_title="Features",
                                     yaxis_title="Feature Importances",
                                     font=dict(
                                         family="Courier New, monospace",
                                         size=15,
                                         color="Black"
                                     ))

### DecisionTreeClassifier

In [49]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=4, random_state=42)
dt.fit(X_train, y_train)
dt_accuracy = (dt.predict(X_test)==y_test).sum() / len(y_test)
dt_accuracy

0.7708333333333334

#### DecisionTreeClassifier accuracy

In [50]:
dt_accuracy = go.Figure(go.Indicator(
                        mode = "number",
                        value = dt_accuracy * 100,
                        number = {'prefix': "%"},
                        title = {"text": "DecisionTreeClassifier accuracy"},
                        domain = {'row': 0, 'column': 0}))

#### DecisionTreeClassifier feature importance graph

In [51]:
dt_feature_importance = px.bar(x=X_train.columns, 
                               y=dt.feature_importances_,
                               color_discrete_sequence=['rgb(29, 105, 150)'])
dt_feature_importance.update_layout(title_x=0.5,
                                    title_font_color='rgb(42, 1, 52)',
                                    xaxis_title="Features",
                                    yaxis_title="Feature Importances",
                                    font=dict(
                                        family="Courier New, monospace",
                                        size=15,
                                        color="Black"
                                    ))

### Create App Layout:

In [52]:
app.layout = html.Div([
    
    # Title Dashboard
    html.H1("Students' Academic Performance", style = {'color':'rgb(29, 105, 150)',"border":"1px black solid", 'backgroundColor':'lightgray','fontsize':40,'textAlign':'center'}),
    
    #Indicators
    html.Div([
        
        html.Div([
            dcc.Graph(figure= Low_Performance),
        ],className='four columns'),
        
        
        html.Div([
            dcc.Graph(figure= Medium_Performance),
        ],className='four columns'),        
        
        
        html.Div([
            dcc.Graph(figure= High_Performance),
        ],className='four columns'),        
        
    ],style={'size':'50px', 'color':'rgb(29, 105, 150)'}),
    
     # Bar_1 Charts
     html.Div([ 
        html.Div([
            html.H1('Performance levels count',style={'padding':'5px',"border":"1px black solid",'color':'rgb(47, 138, 196)','text-align':'center', 'font-size':30 ,'background-color':'lightgray'}),
            dcc.Graph(figure= Class_count),
        ],className='four columns'), 
        
        
        html.Div([
            html.H1('Male/Female Performance levels',style={'padding':'5px',"border":"1px black solid",'color':'rgb(47, 138, 196)','text-align':'center', 'font-size':30 ,'background-color':'lightgray'}),
            dcc.Graph(figure= male_female_count),
        ],className='four columns'),        
        
        
        html.Div([
            html.H1('Father/Mother Performance levels',style={'padding':'3px',"border":"1px black solid",'color':'rgb(47, 138, 196)','text-align':'center', 'font-size':30 ,'background-color':'lightgray'}),
            dcc.Graph(figure= mother_father_count),
        ],className='four columns'),        
    ]), 
         
    
    # Bar_2 Charts
    html.Div([ 
        html.Div([
            html.H1('Nationality Performance levels',style={'padding':'5px',"border":"1px black solid",'color':'rgb(47, 138, 196)','text-align':'center', 'font-size':30 ,'background-color':'lightgray'}),
            
            dcc.Slider(id='my_slider_1',
                       min = 0,
                       max = 2,
                       step=None,
                       marks={"0":'Low', "1":'Medium', "2":'High'},
                       value=0),
            
            dcc.Graph("graph_1", figure={}),
        ],className='six columns'), 
        
        
        html.Div([
            html.H1('Topic Performance levels',style={'padding':'5px',"border":"1px black solid",'color':'rgb(47, 138, 196)','text-align':'center', 'font-size':30 ,'background-color':'lightgray'}),
            
            dcc.Slider(id='my_slider_2',
                       min = 0,
                       max = 2,
                       step=None,
                       marks={"0":'Low', "1":'Medium', "2":'High'},
                       value=0),
            
            dcc.Graph("graph_2", figure={}),
        ],className='six columns')
    ]),
    
    # Model Charts
    html.Div([ 
        html.Div([            
            dcc.Dropdown(id='models_dropdown',options=[
            {'value': 'RFC', 'label': 'RandomForestClassifier'},
            {'value': 'DT' , 'label': 'DecisionTreeClassifier'},
            ],value= 'DT'),
            
            dcc.Input(id='max_depth', type='number',  placeholder="Max depth", min=1, max=100, step=1, style=dict(width='220px',size='100px')),
            html.Button('Train',id='model_train', n_clicks=0, style=dict(width='220px', size='100px')),
            
            dcc.Graph(id='graph_3', figure = dt_accuracy),
        ],className='six columns'),
        
        
        html.Div([
            html.H1('Model Feature Importance',style={'padding':'5px',"border":"1px black solid",'color':'rgb(47, 138, 196)','text-align':'center', 'font-size':30 ,'background-color':'lightgray'}),
            dcc.Graph(id='graph_4', figure= dt_feature_importance),
        ],className='six columns'),             
    ]), 
    
])

### Nationality Callback:

In [53]:
@app.callback(
    
Output(component_id='graph_1',component_property='figure'),
Input(component_id='my_slider_1',component_property='value')
)


def update_My_Div(value):
    counts = df[df['Class_encoded']==value]['PlaceofBirth'].value_counts()
    fig = px.bar(x=counts.index, 
                 y=counts,
                 color_discrete_sequence=['rgb(29, 105, 150)'])
    fig.update_layout(title_x=0.5,
                      title_font_color='rgb(42, 1, 52)',
                      xaxis_title="PlaceofBirth",
                      yaxis_title="Performance Count",
                      font=dict(
                          family="Courier New, monospace",
                          size=18,
                          color="Black"
                      ))
    return fig

### Topic Callback:

In [54]:
@app.callback(
    
Output(component_id='graph_2',component_property='figure'),
Input(component_id='my_slider_2',component_property='value')
)

def update_My_Div(value):
    counts = df[df['Class_encoded']==value]['Topic'].value_counts()
    fig = px.bar(x=counts.index, 
                 y=counts,
                 color_discrete_sequence=['rgb(29, 105, 150)'])
    fig.update_layout(title_x=0.5,
                      title_font_color='rgb(42, 1, 52)',
                      xaxis_title="Topic",
                      yaxis_title="Performance Count",
                      font=dict(
                          family="Courier New, monospace",
                          size=18,
                          color="Black"
                      ))
    return fig

### Model Callback:

In [55]:
def train_evaluate_selected_model(model, depth, X_train, y_train, X_test, y_test):
    selected_model = model(max_depth = depth, random_state=42)
    selected_model.fit(X_train, y_train)
    model_accuracy = (selected_model.predict(X_test)==y_test).sum() / len(y_test)
    return model_accuracy, selected_model.feature_importances_

@app.callback(
    
    Output(component_id='graph_3',component_property='figure'),
    Output(component_id='graph_4',component_property='figure'),
    
    Input(component_id='model_train',component_property='n_clicks'),
        
    State(component_id='models_dropdown',component_property='value'),
    State(component_id='max_depth',component_property='value'),
)

def update_My_Div(n_clicks, model_to_train, depth):
    
    # select the model and train it using user configs
    if depth == None:
        depth = 4
        
    if model_to_train == 'DT':
        model_accuracy, model_feature_importances = train_evaluate_selected_model(DecisionTreeClassifier, depth, 
                                                                                  X_train, y_train, X_test, y_test)
        label = 'DecisionTreeClassifier'
        
    elif model_to_train == 'RFC':
        model_accuracy, model_feature_importances = train_evaluate_selected_model(RandomForestClassifier, depth, 
                                                                                  X_train, y_train, X_test, y_test)
        label = 'RandomForestClassifier'

    accuracy_fig = go.Figure(go.Indicator(
                             mode = "number",
                             value = model_accuracy * 100,
                             number = {'prefix': "%"},
                             title = {"text": label + " accuracy"},
                             domain = {'row': 0, 'column': 0}))
    
    model_feature_importance_fig = px.bar(x=X_train.columns, 
                                          y=model_feature_importances,
                                          color_discrete_sequence=['rgb(29, 105, 150)'])
    
    model_feature_importance_fig.update_layout(title_x=0.5,
                                               title_font_color='rgb(42, 1, 52)',
                                               xaxis_title="Features",
                                               yaxis_title="Feature Importances",
                                               font=dict(
                                                   family="Courier New, monospace",
                                                   size=15,
                                                   color="Black"
                                               ))
    
    return accuracy_fig, model_feature_importance_fig

### Run the Server:

In [None]:
# Initiating the server
app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Nov/2021 09:16:58] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:16:59] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:16:59] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mGET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mGET /_dash-component-suites/dash/dcc/async-slider.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mGET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [04/Nov/2021 09:17:00] "[37mPOST /_da