In [None]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

# Control Sub-Tree Analysis

## 0. Get necessary data from Database

In [None]:
db_connection_str = 'mysql+pymysql://root:db@localhost/GDE'
db_connection = create_engine(db_connection_str)

In [None]:
control_subtree_subjects = ['EM306', 'EM404', 'ES601', 'EM607', 'ES710', 'ES626', 'ES827', 'ES728', 'ES828']
control_subtree_regex = [subject_codename + '.*' for subject_codename in control_subtree_subjects]

control_subtree_df = pd.read_sql("SELECT * FROM Subject WHERE SubjectName REGEXP '"+"|".join(control_subtree_regex) + "'", db_connection)
control_subtree_df

In [None]:
target_subject_ids = ','.join(str(subject_db_id) for subject_db_id in control_subtree_df['ID'].to_list())
subjects_scores = pd.read_sql("SELECT ProfessorID, SubjectID, Coerente, ExplicaBem, Facilidade FROM ProfessorRankings WHERE SubjectID IN ({ids})".format(ids = target_subject_ids), db_connection)
subjects_scores

## 1. Determining Criteria Weights - Fuzzy SWARA

![](../docs/imgs/Fuzzy-SWARA.png)

In [98]:
decision_makers_aggregated_opinions = {'Explanation Score' : np.array([0,0,0]), 'Coherence Score' : np.array([2/9, 1/4, 2/7]), 'Easiness Score' : np.array([2/7, 1/3, 2/5])}
decision_makers_aggregated_optinions_df = pd.DataFrame(decision_makers_aggregated_opinions).transpose()
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df.rename_axis('Criteria').rename(columns={0:'l',1: 'm',2: 'u'})
decision_makers_aggregated_optinions_df

Unnamed: 0_level_0,l,m,u
Criteria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Explanation Score,0.0,0.0,0.0
Coherence Score,0.222222,0.25,0.285714
Easiness Score,0.285714,0.333333,0.4


## 2. Subject Ranking - Fuzzy Topsis

The first analysis we will do is, through the use of fuzzy numbers to represent the 5-star scores, aggregate all the professors' scores for a subject and compare to others related subjects (in our case subjects that are part of the control systems subtree). 

This analysis might give us a deeper insight into which subject students might find the most challenging (be it due to the subject itself or the professors teaching methods), and identify benchmark subjects to help improve other subjects/professor's technics.

### 2.1 Cleaning Data

Some professors history in a specific subject may not have the minimum number of entries for scores, so they appear as NaN on the scores dataframe and need to be removed.

In [None]:
subjects_scores = subjects_scores.dropna()
subjects_scores

### 2.2 Expanding 5-Star Review to Triangular Fuzzy Numbers

![](../docs/imgs/Linguistic-five-Likert-scale-using-triangular-fuzzy-number.png)

[Reference](https://www.researchgate.net/figure/Linguistic-five-Likert-scale-using-triangular-fuzzy-number_tbl1_365957303)

In [None]:
crisp_to_fuzzy = {1.0 : np.array([1, 1, 2]), 2.0: np.array([1, 2, 3]), 3.0: np.array([2, 3, 4]), 4.0:  np.array([3, 4, 5]), 5.0: np.array([4, 5, 5])}

In [None]:
fuzzy_subjects_scores = subjects_scores.copy()

for col in ['Coerente', 'ExplicaBem', 'Facilidade']:
    fuzzy_subjects_scores[col] = subjects_scores[col].apply(lambda val: crisp_to_fuzzy[val])

In [None]:
fuzzy_subjects_scores

### 2.3 Aggregate All the scores for a subject

In [51]:
fuzzy_control_subtree_df = control_subtree_df.copy()
easy_values = []
coherent_values = []
explanation_values = []

for subject_id in fuzzy_control_subtree_df['ID']:
    subject_fuzzy_scores = fuzzy_subjects_scores.loc[fuzzy_subjects_scores['SubjectID'] == subject_id]

    n_scores = len(subject_fuzzy_scores)
    easy_values.append((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Facilidade']), axis=0))
    coherent_values.append((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Coerente']), axis=0))
    explanation_values.append((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['ExplicaBem']), axis=0))


In [55]:
fuzzy_control_subtree_df['Easiness Score'] = easy_values
fuzzy_control_subtree_df['Coherence Score'] = coherent_values
fuzzy_control_subtree_df['Explanation Score'] = explanation_values

In [56]:
fuzzy_control_subtree_df

Unnamed: 0,ID,SubjectName,Easiness Score,Coherence Score,Explanation Score
0,2093,EM306 - Estática,"[1.526315789473684, 2.4210526315789473, 3.4210...","[2.2105263157894735, 3.2105263157894735, 4.210...","[2.0, 3.0, 4.0]"
1,2099,EM404 - Dinâmica,"[1.0909090909090908, 2.090909090909091, 3.0909...","[2.1818181818181817, 3.181818181818182, 4.1818...","[1.8181818181818183, 2.7272727272727275, 3.727..."
2,2120,EM607 - Vibrações de Sistemas Mecânicos,"[1.5, 2.375, 3.375]","[2.375, 3.375, 4.375]","[1.875, 2.875, 3.875]"
3,2593,ES601 - Análise Linear de Sistemas,"[1.3333333333333333, 2.0, 3.0]","[1.6666666666666665, 2.6666666666666665, 3.666...","[1.0, 2.0, 3.0]"
4,2596,ES626 - Modelagem de Dispositivos Eletromecânicos,"[1.75, 2.5, 3.5]","[1.75, 2.5, 3.5]","[1.5, 2.0, 3.0]"
5,2605,ES710 - Controle de Sistemas Mecânicos,"[1.5714285714285714, 2.4285714285714284, 3.428...","[2.4285714285714284, 3.4285714285714284, 4.428...","[2.2857142857142856, 3.2857142857142856, 4.285..."
6,2612,ES728 - Controle Avançado de Sistemas,"[1.25, 1.75, 2.75]","[1.25, 2.25, 3.25]","[1.5, 2.5, 3.5]"
7,2621,ES827 - Robótica Industrial,"[2.0, 3.0, 4.0]","[2.0, 3.0, 4.0]","[1.25, 1.75, 2.75]"
8,2622,ES828 - Laboratório de Controle de Sistemas,"[1.6666666666666665, 2.6666666666666665, 3.666...","[2.333333333333333, 3.333333333333333, 4.33333...","[2.333333333333333, 3.333333333333333, 4.33333..."


### 2.4 Normalize Aggregate Judgement Matrix

In [75]:
for criteria in ['Coherence Score', 'Easiness Score', 'Explanation Score']:
    scores_matrix = np.stack(fuzzy_control_subtree_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    fuzzy_control_subtree_df[criteria] = fuzzy_control_subtree_df[criteria] / u_max

In [76]:
fuzzy_control_subtree_df

Unnamed: 0,ID,SubjectName,Easiness Score,Coherence Score,Explanation Score
0,2093,EM306 - Estática,"[0.381578947368421, 0.6052631578947368, 0.8552...","[0.499151103565365, 0.7249575551782683, 0.9507...","[0.46153846153846156, 0.6923076923076924, 0.92..."
1,2099,EM404 - Dinâmica,"[0.2727272727272727, 0.5227272727272727, 0.772...","[0.49266862170087977, 0.7184750733137831, 0.94...","[0.41958041958041964, 0.6293706293706295, 0.86..."
2,2120,EM607 - Vibrações de Sistemas Mecânicos,"[0.375, 0.59375, 0.84375]","[0.5362903225806452, 0.7620967741935485, 0.987...","[0.4326923076923077, 0.6634615384615385, 0.894..."
3,2593,ES601 - Análise Linear de Sistemas,"[0.3333333333333333, 0.5, 0.75]","[0.3763440860215054, 0.6021505376344086, 0.827...","[0.23076923076923078, 0.46153846153846156, 0.6..."
4,2596,ES626 - Modelagem de Dispositivos Eletromecânicos,"[0.4375, 0.625, 0.875]","[0.3951612903225807, 0.5645161290322581, 0.790...","[0.3461538461538462, 0.46153846153846156, 0.69..."
5,2605,ES710 - Controle de Sistemas Mecânicos,"[0.39285714285714285, 0.6071428571428571, 0.85...","[0.5483870967741936, 0.7741935483870969, 1.0]","[0.5274725274725275, 0.7582417582417582, 0.989..."
6,2612,ES728 - Controle Avançado de Sistemas,"[0.3125, 0.4375, 0.6875]","[0.28225806451612906, 0.5080645161290324, 0.73...","[0.3461538461538462, 0.576923076923077, 0.8076..."
7,2621,ES827 - Robótica Industrial,"[0.5, 0.75, 1.0]","[0.4516129032258065, 0.6774193548387097, 0.903...","[0.2884615384615385, 0.40384615384615385, 0.63..."
8,2622,ES828 - Laboratório de Controle de Sistemas,"[0.41666666666666663, 0.6666666666666666, 0.91...","[0.5268817204301075, 0.7526881720430108, 0.978...","[0.5384615384615384, 0.7692307692307692, 1.0]"
