In [57]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

# Control Sub-Tree Analysis

## 0. Get necessary data from Database

In [58]:
db_connection_str = 'mysql+pymysql://root:db@localhost/GDE'
db_connection = create_engine(db_connection_str)

In [59]:
control_subtree_subjects = ['EM306', 'EM404', 'ES601', 'EM607', 'ES710', 'ES626', 'ES827', 'ES728', 'ES828']
control_subtree_regex = [subject_codename + '.*' for subject_codename in control_subtree_subjects]

control_subtree_df = pd.read_sql("SELECT * FROM Subject WHERE SubjectName REGEXP '"+"|".join(control_subtree_regex) + "'", db_connection)
control_subtree_df

Unnamed: 0,ID,SubjectName
0,2093,EM306 - Estática
1,2099,EM404 - Dinâmica
2,2120,EM607 - Vibrações de Sistemas Mecânicos
3,2593,ES601 - Análise Linear de Sistemas
4,2596,ES626 - Modelagem de Dispositivos Eletromecânicos
5,2605,ES710 - Controle de Sistemas Mecânicos
6,2612,ES728 - Controle Avançado de Sistemas
7,2621,ES827 - Robótica Industrial
8,2622,ES828 - Laboratório de Controle de Sistemas


In [60]:
target_subject_ids = ','.join(str(subject_db_id) for subject_db_id in control_subtree_df['ID'].to_list())
subjects_scores = pd.read_sql("SELECT ProfessorID, SubjectID, Coerente, ExplicaBem, Facilidade FROM ProfessorRankings WHERE SubjectID IN ({ids})".format(ids = target_subject_ids), db_connection)
subjects_scores

Unnamed: 0,ProfessorID,SubjectID,Coerente,ExplicaBem,Facilidade
0,35,2093,3.0,2.0,3.0
1,408,2093,3.0,4.0,2.0
2,410,2093,4.0,2.0,4.0
3,405,2093,2.0,2.0,1.0
4,411,2093,4.0,4.0,3.0
...,...,...,...,...,...
71,1332,2622,,,
72,3014,2622,4.0,4.0,2.0
73,3111,2622,,,
74,3288,2622,,,


## 1. Determining Criteria Weights - Fuzzy SWARA

![](../docs/imgs/Fuzzy-SWARA.png)

In [110]:
decision_makers_aggregated_opinions = {'Quality Of Explanation Score' : np.array([0,0,0]), 'Coherence Score' : np.array([2/9, 1/4, 2/7]), 'Easiness Score' : np.array([1/4, 2/7, 1/3])}
decision_makers_aggregated_optinions_df = pd.DataFrame(decision_makers_aggregated_opinions).transpose()
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df.rename_axis('Criteria').rename(columns={0:'l',1: 'm',2: 'u'})

# Calculate the k matrix
k_matrix = decision_makers_aggregated_optinions_df+1

# Calculate q Matrix
l = k_matrix.columns.get_loc('l')
m = k_matrix.columns.get_loc('m')
u = k_matrix.columns.get_loc('u')

q_matrix = k_matrix.copy()
for criteria_index in range(k_matrix.shape[0]):
    q_matrix.iloc[criteria_index, l] = 1 if criteria_index == 0 else q_matrix.iloc[criteria_index - 1, l] / k_matrix.iloc[criteria_index, u] 
    q_matrix.iloc[criteria_index, m] = 1 if criteria_index == 0 else q_matrix.iloc[criteria_index - 1, m] / k_matrix.iloc[criteria_index, m] 
    q_matrix.iloc[criteria_index, u] = 1 if criteria_index == 0 else q_matrix.iloc[criteria_index - 1, u] / k_matrix.iloc[criteria_index, l] 

# Calculate weights by normalizing the q matrix
decision_makers_aggregated_optinions_df['l'] = q_matrix['l'] / q_matrix['u'].sum()
decision_makers_aggregated_optinions_df['m'] = q_matrix['m'] / q_matrix['m'].sum()
decision_makers_aggregated_optinions_df['u'] = q_matrix['u'] / q_matrix['l'].sum()

                                     l         m         u
Criteria                                                  
Quality Of Explanation Score  0.000000  0.000000  0.000000
Coherence Score               0.222222  0.250000  0.285714
Easiness Score                0.250000  0.285714  0.333333
i 0
u 1
l 1.0
i 1
u 1.0
l 1.2222222222222223
i 2
u 0.8181818181818181
l 1.25


In [111]:
decision_makers_aggregated_optinions_df

Unnamed: 0_level_0,l,m,u
Criteria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Quality Of Explanation Score,0.404412,0.412844,0.423529
Coherence Score,0.314542,0.330275,0.346524
Easiness Score,0.235907,0.256881,0.277219


In [91]:
decision_makers_aggregated_optinions_df

Unnamed: 0_level_0,l,m,u
Criteria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Quality Of Explanation Score,0.222727,0.412844,1.901561
Coherence Score,0.173232,0.330275,2.444864
Easiness Score,0.129924,0.256881,4.191195


## 2. Professor Ranking - Fuzzy Topsis

The first analysis we will do is, through the use of fuzzy numbers to represent the 5-star scores, aggregate all the scores for the subjects the professor teaches (in our case subjects that are part of the control systems subtree). 

### 2.1 Cleaning Data

Some professors history in a specific subject may not have the minimum number of entries for scores, so they appear as NaN on the scores dataframe and need to be removed.

In [63]:
subjects_scores = subjects_scores.dropna()
subjects_scores

Unnamed: 0,ProfessorID,SubjectID,Coerente,ExplicaBem,Facilidade
0,35,2093,3.0,2.0,3.0
1,408,2093,3.0,4.0,2.0
2,410,2093,4.0,2.0,4.0
3,405,2093,2.0,2.0,1.0
4,411,2093,4.0,4.0,3.0
...,...,...,...,...,...
66,3275,2621,2.0,1.0,2.0
67,4131,2621,4.0,3.0,3.0
69,443,2622,3.0,3.0,3.0
70,467,2622,3.0,3.0,3.0


### 2.2 Expanding 5-Star Review to Triangular Fuzzy Numbers

![](../docs/imgs/Linguistic-five-Likert-scale-using-triangular-fuzzy-number.png)

[Reference](https://www.researchgate.net/figure/Linguistic-five-Likert-scale-using-triangular-fuzzy-number_tbl1_365957303)

In [64]:
crisp_to_fuzzy = {1.0 : np.array([1, 1, 2]), 2.0: np.array([1, 2, 3]), 3.0: np.array([2, 3, 4]), 4.0:  np.array([3, 4, 5]), 5.0: np.array([4, 5, 5])}

In [65]:
fuzzy_subjects_scores = subjects_scores.copy()

for col in ['Coerente', 'ExplicaBem', 'Facilidade']:
    fuzzy_subjects_scores[col] = subjects_scores[col].apply(lambda val: crisp_to_fuzzy[val])

In [66]:
fuzzy_subjects_scores

Unnamed: 0,ProfessorID,SubjectID,Coerente,ExplicaBem,Facilidade
0,35,2093,"[2, 3, 4]","[1, 2, 3]","[2, 3, 4]"
1,408,2093,"[2, 3, 4]","[3, 4, 5]","[1, 2, 3]"
2,410,2093,"[3, 4, 5]","[1, 2, 3]","[3, 4, 5]"
3,405,2093,"[1, 2, 3]","[1, 2, 3]","[1, 1, 2]"
4,411,2093,"[3, 4, 5]","[3, 4, 5]","[2, 3, 4]"
...,...,...,...,...,...
66,3275,2621,"[1, 2, 3]","[1, 1, 2]","[1, 2, 3]"
67,4131,2621,"[3, 4, 5]","[2, 3, 4]","[2, 3, 4]"
69,443,2622,"[2, 3, 4]","[2, 3, 4]","[2, 3, 4]"
70,467,2622,"[2, 3, 4]","[2, 3, 4]","[2, 3, 4]"


### 2.3 Aggregate All the scores for a professor - Aggregated Judgement Matrix

In [85]:
professors_scores_fuzzy_aggregation = []

for professor_id in fuzzy_subjects_scores['ProfessorID'].unique():
    subject_fuzzy_scores = fuzzy_subjects_scores.loc[fuzzy_subjects_scores['ProfessorID'] == professor_id]

    n_scores = len(subject_fuzzy_scores)
    easy_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Facilidade']), axis=0))
    coherent_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Coerente']), axis=0))
    explanation_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['ExplicaBem']), axis=0))

    professors_scores_fuzzy_aggregation.append({'Professor ID': professor_id, 'Easiness Score': easy_values, 'Coherence Score': coherent_values, 'Quality Of Explanation Score': explanation_values})


In [86]:
professors_scores_fuzzy_aggregation_df = pd.DataFrame(professors_scores_fuzzy_aggregation)
professors_scores_fuzzy_aggregation_df

Unnamed: 0,Professor ID,Easiness Score,Coherence Score,Quality Of Explanation Score
0,35,"[2.0, 3.0, 4.0]","[2.0, 3.0, 4.0]","[1.0, 2.0, 3.0]"
1,408,"[1.0, 2.0, 3.0]","[2.0, 3.0, 4.0]","[2.333333333333333, 3.333333333333333, 4.33333..."
2,410,"[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]","[1.0, 2.0, 3.0]"
3,405,"[1.0, 1.0, 2.0]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]"
4,411,"[1.5, 2.5, 3.5]","[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]"
5,412,"[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]","[2.0, 3.0, 4.0]"
6,419,"[1.0, 2.0, 3.0]","[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]"
7,425,"[1.0, 2.0, 3.0]","[2.0, 3.0, 4.0]","[1.5, 2.5, 3.5]"
8,429,"[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]"
9,432,"[1.0, 2.0, 3.0]","[2.0, 3.0, 4.0]","[2.0, 3.0, 4.0]"


### 2.4 Normalize Aggregated Judgement Matrix

In [87]:
normalized_professors_scores_fuzzy_aggregation_df = professors_scores_fuzzy_aggregation_df.copy()
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    normalized_professors_scores_fuzzy_aggregation_df[criteria] = professors_scores_fuzzy_aggregation_df[criteria] / u_max

In [88]:
normalized_professors_scores_fuzzy_aggregation_df

Unnamed: 0,Professor ID,Easiness Score,Coherence Score,Quality Of Explanation Score
0,35,"[0.4, 0.6, 0.8]","[0.4, 0.6, 0.8]","[0.2, 0.4, 0.6]"
1,408,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.4666666666666666, 0.6666666666666666, 0.866..."
2,410,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.2, 0.4, 0.6]"
3,405,"[0.2, 0.2, 0.4]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"
4,411,"[0.3, 0.5, 0.7]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
5,412,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.4, 0.6, 0.8]"
6,419,"[0.2, 0.4, 0.6]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
7,425,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.3, 0.5, 0.7]"
8,429,"[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"
9,432,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.4, 0.6, 0.8]"


### 2.5 Weighted and Normalized Aggregated Judgement Matrix

In [71]:
weighted_professors_scores = normalized_professors_scores_fuzzy_aggregation_df.copy()

for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    fuzzy_weight = np.stack(decision_makers_aggregated_optinions_df.loc[criteria])
    criteria_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    weighted_criteria_matrix = np.multiply(criteria_matrix, fuzzy_weight)

    for row_index, value in enumerate(weighted_criteria_matrix):
        weighted_professors_scores.at[row_index, criteria] = value

In [72]:
weighted_professors_scores

Unnamed: 0,Professor ID,Easiness Score,Coherence Score,Quality Of Explanation Score
0,35,"[0.49411764705882355, 0.7706422018348624, 1.96...","[0.6588235294117647, 0.9908256880733943, 1.145...","[0.4235294117647058, 0.8256880733944953, 0.668..."
1,408,"[0.24705882352941178, 0.5137614678899083, 1.47...","[0.6588235294117647, 0.9908256880733943, 1.145...","[0.9882352941176468, 1.3761467889908254, 0.965..."
2,410,"[0.7411764705882353, 1.0275229357798166, 2.454...","[0.9882352941176471, 1.3211009174311925, 1.431...","[0.4235294117647058, 0.8256880733944953, 0.668..."
3,405,"[0.24705882352941178, 0.25688073394495414, 0.9...","[0.32941176470588235, 0.6605504587155963, 0.85...","[0.4235294117647058, 0.8256880733944953, 0.668..."
4,411,"[0.37058823529411766, 0.6422018348623854, 1.71...","[0.9882352941176471, 1.3211009174311925, 1.431...","[1.2705882352941176, 1.6513761467889907, 1.113..."
5,412,"[0.7411764705882353, 1.0275229357798166, 2.454...","[0.9882352941176471, 1.3211009174311925, 1.431...","[0.8470588235294116, 1.238532110091743, 0.8909..."
6,419,"[0.24705882352941178, 0.5137614678899083, 1.47...","[0.9882352941176471, 1.3211009174311925, 1.431...","[1.2705882352941176, 1.6513761467889907, 1.113..."
7,425,"[0.24705882352941178, 0.5137614678899083, 1.47...","[0.6588235294117647, 0.9908256880733943, 1.145...","[0.6352941176470588, 1.0321100917431192, 0.779..."
8,429,"[0.24705882352941178, 0.5137614678899083, 1.47...","[0.32941176470588235, 0.6605504587155963, 0.85...","[0.4235294117647058, 0.8256880733944953, 0.668..."
9,432,"[0.24705882352941178, 0.5137614678899083, 1.47...","[0.6588235294117647, 0.9908256880733943, 1.145...","[0.8470588235294116, 1.238532110091743, 0.8909..."


### 2.6 Define Classes

#### 2.6.1 Class Definiction

In [73]:
classes_description = {'Easiness Score': [crisp_to_fuzzy[5.0], crisp_to_fuzzy[4.0], crisp_to_fuzzy[2.0]], 'Coherence Score':[crisp_to_fuzzy[5.0], crisp_to_fuzzy[4.0], crisp_to_fuzzy[2.0]], 'Quality Of Explanation Score':[crisp_to_fuzzy[5.0], crisp_to_fuzzy[4.0], crisp_to_fuzzy[2.0]]}
classes_df = pd.DataFrame(classes_description)
classes_df = classes_df.set_axis(['Otimo Aproveitamento', 'Aproveitamento Mediano', 'Baixo Aproveitamento'])
classes_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
Otimo Aproveitamento,"[4, 5, 5]","[4, 5, 5]","[4, 5, 5]"
Aproveitamento Mediano,"[3, 4, 5]","[3, 4, 5]","[3, 4, 5]"
Baixo Aproveitamento,"[1, 2, 3]","[1, 2, 3]","[1, 2, 3]"


#### 2.6.2 Class Normalization

In [74]:
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    classes_df[criteria] = classes_df[criteria] / u_max

In [75]:
classes_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
Otimo Aproveitamento,"[0.8, 1.0, 1.0]","[0.8, 1.0, 1.0]","[0.8, 1.0, 1.0]"
Aproveitamento Mediano,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
Baixo Aproveitamento,"[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"


#### 2.6.3 Weighted And Normalized Class Matrix

In [76]:
weighted_classes_df = classes_df.copy()
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    fuzzy_weight = np.stack(decision_makers_aggregated_optinions_df.loc[criteria])
    class_matrix = np.stack(classes_df[criteria])
    weighted_criteria_matrix = np.multiply(class_matrix, fuzzy_weight)

    for row_index, value in enumerate(weighted_criteria_matrix):
        weighted_classes_df.at[weighted_classes_df.index[row_index], criteria] = value

In [77]:
weighted_classes_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
Otimo Aproveitamento,"[0.19764705882352943, 0.25688073394495414, 0.4...","[0.2635294117647059, 0.33027522935779813, 0.28...","[0.3388235294117647, 0.4128440366972477, 0.222..."
Aproveitamento Mediano,"[0.14823529411764705, 0.20550458715596331, 0.4...","[0.1976470588235294, 0.2642201834862385, 0.286...","[0.2541176470588235, 0.3302752293577982, 0.222..."
Baixo Aproveitamento,"[0.049411764705882356, 0.10275229357798166, 0....","[0.06588235294117648, 0.13211009174311925, 0.1...","[0.08470588235294117, 0.1651376146788991, 0.13..."


#### 2.6.4 Define Ideal Solutions

**Ideal Solutions For "otimo Aproveitamento" Class**

In [78]:
great_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
great_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Otimo Aproveitamento']
great_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Baixo Aproveitamento']
great_class_ideal_solutions_df = great_class_ideal_solutions_df.set_axis(['A+', 'A-'])
great_class_ideal_solutions_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
A+,"[0.19764705882352943, 0.25688073394495414, 0.4...","[0.2635294117647059, 0.33027522935779813, 0.28...","[0.3388235294117647, 0.4128440366972477, 0.222..."
A-,"[0.049411764705882356, 0.10275229357798166, 0....","[0.06588235294117648, 0.13211009174311925, 0.1...","[0.08470588235294117, 0.1651376146788991, 0.13..."


**Ideal Solutions For "Aproveitamento Mediano" Class**

In [79]:
medium_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
medium_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Aproveitamento Mediano']
medium_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Baixo Aproveitamento']
medium_class_ideal_solutions_df = medium_class_ideal_solutions_df.set_axis(['A+', 'A-'])
medium_class_ideal_solutions_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
A+,"[0.14823529411764705, 0.20550458715596331, 0.4...","[0.1976470588235294, 0.2642201834862385, 0.286...","[0.2541176470588235, 0.3302752293577982, 0.222..."
A-,"[0.049411764705882356, 0.10275229357798166, 0....","[0.06588235294117648, 0.13211009174311925, 0.1...","[0.08470588235294117, 0.1651376146788991, 0.13..."


**Ideal Solutions For "Baixo Aproveitamento" Class**

In [80]:
low_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
low_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Baixo Aproveitamento']
low_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Otimo Aproveitamento']
low_class_ideal_solutions_df = low_class_ideal_solutions_df.set_axis(['A+', 'A-'])
low_class_ideal_solutions_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
A+,"[0.049411764705882356, 0.10275229357798166, 0....","[0.06588235294117648, 0.13211009174311925, 0.1...","[0.08470588235294117, 0.1651376146788991, 0.13..."
A-,"[0.19764705882352943, 0.25688073394495414, 0.4...","[0.2635294117647059, 0.33027522935779813, 0.28...","[0.3388235294117647, 0.4128440366972477, 0.222..."


#### 2.6.5 Calculate Distance from option to Ideal solution and Closness Coeficient

In [81]:
closeness_coeficientes = {}
ideal_cases = {'Otimo Aproveitamento':great_class_ideal_solutions_df, 'Aproveitamento Mediano': medium_class_ideal_solutions_df, 'Baixo Aproveitamento':low_class_ideal_solutions_df}
distance_calculation = lambda ideal_solution, alternative: ( (1/3) * np.sum((alternative-ideal_solution)**2) )**(1/2)

for ideal_case_name, ideal_case_df in ideal_cases.items():

    closeness_coeficientes[ideal_case_name] = []

    for professor_entry in normalized_professors_scores_fuzzy_aggregation_df.iterrows():
        entry_index = professor_entry[0]
        professor_id = professor_entry[1].iloc[0]
        professor_scores = professor_entry[1].iloc[1:]

        d_plus = []
        d_minus = []

        for criteria in professor_scores.index:
            d_plus.append(distance_calculation(ideal_case_df.loc['A+', criteria], professor_scores[criteria]))
            d_minus.append(distance_calculation(ideal_case_df.loc['A-', criteria], professor_scores[criteria]))

        information = {'ProfessID': professor_id, 'Overall Score':sum(d_minus) / (sum(d_minus) + sum(d_plus))}
        closeness_coeficientes[ideal_case_name].append(information)

In [82]:
pd.DataFrame(closeness_coeficientes['Otimo Aproveitamento']).sort_values(by='Overall Score', ascending=False)
# closeness_coeficientes['Otimo Aproveitamento']

Unnamed: 0,ProfessID,Overall Score
28,467,0.639771
10,458,0.632741
8,429,0.623946
11,1323,0.621148
12,1332,0.620949
20,428,0.612506
7,425,0.612506
9,432,0.609101
22,450,0.609101
21,443,0.608189


In [83]:
pd.DataFrame(closeness_coeficientes['Aproveitamento Mediano']).sort_values(by='Overall Score', ascending=False)

Unnamed: 0,ProfessID,Overall Score
28,467,0.614695
36,3275,0.613771
8,429,0.61169
10,458,0.61077
11,1323,0.605675
3,405,0.602727
12,1332,0.599209
19,418,0.59557
27,469,0.594927
7,425,0.590569


In [84]:
pd.DataFrame(closeness_coeficientes['Baixo Aproveitamento']).sort_values(by='Overall Score', ascending=False)

Unnamed: 0,ProfessID,Overall Score
35,470,0.434704
2,410,0.431058
5,412,0.427412
31,5179,0.423341
16,4435,0.423341
15,3534,0.423341
24,3421,0.420011
4,411,0.418604
26,4130,0.417345
17,4593,0.417345
