In [2]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

# Control Sub-Tree Analysis

## 0. Get necessary data from Database

In [3]:
db_connection_str = 'mysql+pymysql://root:db@localhost/GDE'
db_connection = create_engine(db_connection_str)

In [4]:
control_subtree_subjects = ['EM306', 'EM404', 'ES601', 'EM607', 'ES710', 'ES626', 'ES827', 'ES728', 'ES828']
control_subtree_regex = [subject_codename + '.*' for subject_codename in control_subtree_subjects]

control_subtree_df = pd.read_sql("SELECT * FROM Subject WHERE SubjectName REGEXP '"+"|".join(control_subtree_regex) + "'", db_connection)
control_subtree_df

Unnamed: 0,ID,SubjectName
0,2093,EM306 - Estática
1,2099,EM404 - Dinâmica
2,2120,EM607 - Vibrações de Sistemas Mecânicos
3,2593,ES601 - Análise Linear de Sistemas
4,2596,ES626 - Modelagem de Dispositivos Eletromecânicos
5,2605,ES710 - Controle de Sistemas Mecânicos
6,2612,ES728 - Controle Avançado de Sistemas
7,2621,ES827 - Robótica Industrial
8,2622,ES828 - Laboratório de Controle de Sistemas


In [5]:
target_subject_ids = ','.join(str(subject_db_id) for subject_db_id in control_subtree_df['ID'].to_list())
subjects_scores = pd.read_sql("SELECT ProfessorID, SubjectID, Coerente, ExplicaBem, Facilidade FROM ProfessorRankings WHERE SubjectID IN ({ids})".format(ids = target_subject_ids), db_connection)
subjects_scores

Unnamed: 0,ProfessorID,SubjectID,Coerente,ExplicaBem,Facilidade
0,35,2093,3.0,2.0,3.0
1,408,2093,3.0,4.0,2.0
2,410,2093,4.0,2.0,4.0
3,405,2093,2.0,2.0,1.0
4,411,2093,4.0,4.0,3.0
...,...,...,...,...,...
71,1332,2622,,,
72,3014,2622,4.0,4.0,2.0
73,3111,2622,,,
74,3288,2622,,,


## 1. Determining Criteria Weights - Fuzzy SWARA

![](../docs/imgs/Fuzzy-SWARA.png)

In [None]:
decision_makers_aggregated_opinions = {'Explanation Score' : np.array([0,0,0]), 'Coherence Score' : np.array([2/9, 1/4, 2/7]), 'Easiness Score' : np.array([2/7, 1/3, 2/5])}
decision_makers_aggregated_optinions_df = pd.DataFrame(decision_makers_aggregated_opinions).transpose()
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df.rename_axis('Criteria').rename(columns={0:'l',1: 'm',2: 'u'})

# Calculate the k matrix
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df+1
decision_makers_aggregated_optinions_df

## 2. Professor Ranking - Fuzzy Topsis

The first analysis we will do is, through the use of fuzzy numbers to represent the 5-star scores, aggregate all the scores for the subjects the professor teaches (in our case subjects that are part of the control systems subtree). 

### 2.1 Cleaning Data

Some professors history in a specific subject may not have the minimum number of entries for scores, so they appear as NaN on the scores dataframe and need to be removed.

In [6]:
subjects_scores = subjects_scores.dropna()
subjects_scores

Unnamed: 0,ProfessorID,SubjectID,Coerente,ExplicaBem,Facilidade
0,35,2093,3.0,2.0,3.0
1,408,2093,3.0,4.0,2.0
2,410,2093,4.0,2.0,4.0
3,405,2093,2.0,2.0,1.0
4,411,2093,4.0,4.0,3.0
...,...,...,...,...,...
66,3275,2621,2.0,1.0,2.0
67,4131,2621,4.0,3.0,3.0
69,443,2622,3.0,3.0,3.0
70,467,2622,3.0,3.0,3.0


### 2.2 Expanding 5-Star Review to Triangular Fuzzy Numbers

![](../docs/imgs/Linguistic-five-Likert-scale-using-triangular-fuzzy-number.png)

[Reference](https://www.researchgate.net/figure/Linguistic-five-Likert-scale-using-triangular-fuzzy-number_tbl1_365957303)

In [7]:
crisp_to_fuzzy = {1.0 : np.array([1, 1, 2]), 2.0: np.array([1, 2, 3]), 3.0: np.array([2, 3, 4]), 4.0:  np.array([3, 4, 5]), 5.0: np.array([4, 5, 5])}

In [8]:
fuzzy_subjects_scores = subjects_scores.copy()

for col in ['Coerente', 'ExplicaBem', 'Facilidade']:
    fuzzy_subjects_scores[col] = subjects_scores[col].apply(lambda val: crisp_to_fuzzy[val])

In [9]:
fuzzy_subjects_scores

Unnamed: 0,ProfessorID,SubjectID,Coerente,ExplicaBem,Facilidade
0,35,2093,"[2, 3, 4]","[1, 2, 3]","[2, 3, 4]"
1,408,2093,"[2, 3, 4]","[3, 4, 5]","[1, 2, 3]"
2,410,2093,"[3, 4, 5]","[1, 2, 3]","[3, 4, 5]"
3,405,2093,"[1, 2, 3]","[1, 2, 3]","[1, 1, 2]"
4,411,2093,"[3, 4, 5]","[3, 4, 5]","[2, 3, 4]"
...,...,...,...,...,...
66,3275,2621,"[1, 2, 3]","[1, 1, 2]","[1, 2, 3]"
67,4131,2621,"[3, 4, 5]","[2, 3, 4]","[2, 3, 4]"
69,443,2622,"[2, 3, 4]","[2, 3, 4]","[2, 3, 4]"
70,467,2622,"[2, 3, 4]","[2, 3, 4]","[2, 3, 4]"


### 2.3 Aggregate All the scores for a professor - Aggregated Judgement Matrix

In [13]:
professors_scores_fuzzy_aggregation = []

for professor_id in fuzzy_subjects_scores['ProfessorID'].unique():
    subject_fuzzy_scores = fuzzy_subjects_scores.loc[fuzzy_subjects_scores['ProfessorID'] == professor_id]

    n_scores = len(subject_fuzzy_scores)
    easy_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Facilidade']), axis=0))
    coherent_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Coerente']), axis=0))
    explanation_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['ExplicaBem']), axis=0))

    professors_scores_fuzzy_aggregation.append({'Professor ID': professor_id, 'Easiness Score': easy_values, 'Coherence Score': coherent_values, 'Quality Of Explanation Score': explanation_values})


In [14]:
professors_scores_fuzzy_aggregation_df = pd.DataFrame(professors_scores_fuzzy_aggregation)
professors_scores_fuzzy_aggregation_df

Unnamed: 0,Professor ID,Easiness Score,Coherence Score,Quality Of Explanation Score
0,35,"[2.0, 3.0, 4.0]","[2.0, 3.0, 4.0]","[1.0, 2.0, 3.0]"
1,408,"[1.0, 2.0, 3.0]","[2.0, 3.0, 4.0]","[2.333333333333333, 3.333333333333333, 4.33333..."
2,410,"[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]","[1.0, 2.0, 3.0]"
3,405,"[1.0, 1.0, 2.0]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]"
4,411,"[1.5, 2.5, 3.5]","[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]"
5,412,"[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]","[2.0, 3.0, 4.0]"
6,419,"[1.0, 2.0, 3.0]","[3.0, 4.0, 5.0]","[3.0, 4.0, 5.0]"
7,425,"[1.0, 2.0, 3.0]","[2.0, 3.0, 4.0]","[1.5, 2.5, 3.5]"
8,429,"[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]","[1.0, 2.0, 3.0]"
9,432,"[1.0, 2.0, 3.0]","[2.0, 3.0, 4.0]","[2.0, 3.0, 4.0]"


### 2.4 Normalize Aggregated Judgement Matrix

In [18]:
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    professors_scores_fuzzy_aggregation_df[criteria] = professors_scores_fuzzy_aggregation_df[criteria] / u_max

In [20]:
professors_scores_fuzzy_aggregation_df

Unnamed: 0,Professor ID,Easiness Score,Coherence Score,Quality Of Explanation Score
0,35,"[0.4, 0.6, 0.8]","[0.4, 0.6, 0.8]","[0.2, 0.4, 0.6]"
1,408,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.4666666666666666, 0.6666666666666666, 0.866..."
2,410,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.2, 0.4, 0.6]"
3,405,"[0.2, 0.2, 0.4]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"
4,411,"[0.3, 0.5, 0.7]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
5,412,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.4, 0.6, 0.8]"
6,419,"[0.2, 0.4, 0.6]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
7,425,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.3, 0.5, 0.7]"
8,429,"[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"
9,432,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.4, 0.6, 0.8]"
