In [None]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

# Control Sub-Tree Analysis

## 0. Get necessary data from Database

In [None]:
db_connection_str = 'mysql+pymysql://root:db@localhost/GDE'
db_connection = create_engine(db_connection_str)

In [None]:
control_subtree_subjects = ['EM306', 'EM404', 'ES601', 'EM607', 'ES710', 'ES626', 'ES827', 'ES728', 'ES828']
control_subtree_regex = [subject_codename + '.*' for subject_codename in control_subtree_subjects]

control_subtree_df = pd.read_sql("SELECT * FROM Subject WHERE SubjectName REGEXP '"+"|".join(control_subtree_regex) + "'", db_connection)
control_subtree_df

In [None]:
target_subject_ids = ','.join(str(subject_db_id) for subject_db_id in control_subtree_df['ID'].to_list())
subjects_scores = pd.read_sql("SELECT ProfessorID, SubjectID, Coerente, ExplicaBem, Facilidade FROM ProfessorRankings WHERE SubjectID IN ({ids})".format(ids = target_subject_ids), db_connection)
subjects_scores

## 1. Determining Criteria Weights - Fuzzy SWARA

![](../docs/imgs/Fuzzy-SWARA.png)

In [None]:
decision_makers_aggregated_opinions = {'Quality Of Explanation Score' : np.array([0,0,0]), 'Coherence Score' : np.array([2/9, 1/4, 2/7]), 'Easiness Score' : np.array([2/7, 1/3, 2/5])}
decision_makers_aggregated_optinions_df = pd.DataFrame(decision_makers_aggregated_opinions).transpose()
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df.rename_axis('Criteria').rename(columns={0:'l',1: 'm',2: 'u'})

# Calculate the k matrix
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df+1
decision_makers_aggregated_optinions_df

# Calculate q Matrix
l = decision_makers_aggregated_optinions_df.columns.get_loc('l')
m = decision_makers_aggregated_optinions_df.columns.get_loc('m')
u = decision_makers_aggregated_optinions_df.columns.get_loc('u')

for criteria_index in range(decision_makers_aggregated_optinions_df.shape[0]):
    decision_makers_aggregated_optinions_df.iloc[criteria_index, l] = 1 if criteria_index == 0 else decision_makers_aggregated_optinions_df.iloc[criteria_index - 1, l] / decision_makers_aggregated_optinions_df.iloc[criteria_index, u] 
    decision_makers_aggregated_optinions_df.iloc[criteria_index, m] = 1 if criteria_index == 0 else decision_makers_aggregated_optinions_df.iloc[criteria_index - 1, m] / decision_makers_aggregated_optinions_df.iloc[criteria_index, m] 
    decision_makers_aggregated_optinions_df.iloc[criteria_index, u] = 1 if criteria_index == 0 else decision_makers_aggregated_optinions_df.iloc[criteria_index - 1, u] / decision_makers_aggregated_optinions_df.iloc[criteria_index, l] 

# Calculate weights by normalizing the q matrix
decision_makers_aggregated_optinions_df['l'] = decision_makers_aggregated_optinions_df['l'] / decision_makers_aggregated_optinions_df['l'].sum()
decision_makers_aggregated_optinions_df['m'] = decision_makers_aggregated_optinions_df['m'] / decision_makers_aggregated_optinions_df['m'].sum()
decision_makers_aggregated_optinions_df['u'] = decision_makers_aggregated_optinions_df['u'] / decision_makers_aggregated_optinions_df['u'].sum()

In [None]:
decision_makers_aggregated_optinions_df

## 2. Professor Ranking - Fuzzy Topsis

The first analysis we will do is, through the use of fuzzy numbers to represent the 5-star scores, aggregate all the scores for the subjects the professor teaches (in our case subjects that are part of the control systems subtree). 

### 2.1 Cleaning Data

Some professors history in a specific subject may not have the minimum number of entries for scores, so they appear as NaN on the scores dataframe and need to be removed.

In [None]:
subjects_scores = subjects_scores.dropna()
subjects_scores

### 2.2 Expanding 5-Star Review to Triangular Fuzzy Numbers

![](../docs/imgs/Linguistic-five-Likert-scale-using-triangular-fuzzy-number.png)

[Reference](https://www.researchgate.net/figure/Linguistic-five-Likert-scale-using-triangular-fuzzy-number_tbl1_365957303)

In [None]:
crisp_to_fuzzy = {1.0 : np.array([1, 1, 2]), 2.0: np.array([1, 2, 3]), 3.0: np.array([2, 3, 4]), 4.0:  np.array([3, 4, 5]), 5.0: np.array([4, 5, 5])}

In [None]:
fuzzy_subjects_scores = subjects_scores.copy()

for col in ['Coerente', 'ExplicaBem', 'Facilidade']:
    fuzzy_subjects_scores[col] = subjects_scores[col].apply(lambda val: crisp_to_fuzzy[val])

In [None]:
fuzzy_subjects_scores

### 2.3 Aggregate All the scores for a professor - Aggregated Judgement Matrix

In [None]:
professors_scores_fuzzy_aggregation = []

for professor_id in fuzzy_subjects_scores['ProfessorID'].unique():
    subject_fuzzy_scores = fuzzy_subjects_scores.loc[fuzzy_subjects_scores['ProfessorID'] == professor_id]

    n_scores = len(subject_fuzzy_scores)
    easy_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Facilidade']), axis=0))
    coherent_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Coerente']), axis=0))
    explanation_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['ExplicaBem']), axis=0))

    professors_scores_fuzzy_aggregation.append({'Professor ID': professor_id, 'Easiness Score': easy_values, 'Coherence Score': coherent_values, 'Quality Of Explanation Score': explanation_values})


In [None]:
professors_scores_fuzzy_aggregation_df = pd.DataFrame(professors_scores_fuzzy_aggregation)
professors_scores_fuzzy_aggregation_df

### 2.4 Normalize Aggregated Judgement Matrix

In [None]:
normalized_professors_scores_fuzzy_aggregation_df = professors_scores_fuzzy_aggregation_df.copy()
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    normalized_professors_scores_fuzzy_aggregation_df[criteria] = professors_scores_fuzzy_aggregation_df[criteria] / u_max

In [165]:
normalized_professors_scores_fuzzy_aggregation_df

Unnamed: 0,Professor ID,Easiness Score,Coherence Score,Quality Of Explanation Score
0,35,"[0.4, 0.6, 0.8]","[0.4, 0.6, 0.8]","[0.2, 0.4, 0.6]"
1,408,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.4666666666666666, 0.6666666666666666, 0.866..."
2,410,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.2, 0.4, 0.6]"
3,405,"[0.2, 0.2, 0.4]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"
4,411,"[0.3, 0.5, 0.7]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
5,412,"[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]","[0.4, 0.6, 0.8]"
6,419,"[0.2, 0.4, 0.6]","[0.6, 0.8, 1.0]","[0.6, 0.8, 1.0]"
7,425,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.3, 0.5, 0.7]"
8,429,"[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]","[0.2, 0.4, 0.6]"
9,432,"[0.2, 0.4, 0.6]","[0.4, 0.6, 0.8]","[0.4, 0.6, 0.8]"


### 2.5 Weighted and Normalized Aggregated Judgement Matrix

In [None]:
weighted_professors_scores = normalized_professors_scores_fuzzy_aggregation_df.copy()

for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    fuzzy_weight = np.stack(decision_makers_aggregated_optinions_df.loc[criteria])
    criteria_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    weighted_criteria_matrix = np.multiply(criteria_matrix, fuzzy_weight)

    for row_index, value in enumerate(weighted_criteria_matrix):
        weighted_professors_scores.at[row_index, criteria] = value

In [None]:
weighted_professors_scores

### 2.6 Define Classes

#### 2.6.1 Class Definiction

In [None]:
classes_description = {'Easiness Score': [crisp_to_fuzzy[5.0], crisp_to_fuzzy[3.0], crisp_to_fuzzy[1.0]], 'Coherence Score':[crisp_to_fuzzy[5.0], crisp_to_fuzzy[3.0], crisp_to_fuzzy[1.0]], 'Quality Of Explanation Score':[crisp_to_fuzzy[5.0], crisp_to_fuzzy[3.0], crisp_to_fuzzy[1.0]]}
classes_df = pd.DataFrame(classes_description)
classes_df = classes_df.set_axis(['Otimo Aproveitamento', 'Aproveitamento Mediano', 'Baixo Aproveitamento'])
classes_df

#### 2.6.2 Class Normalization

In [None]:
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    classes_df[criteria] = classes_df[criteria] / u_max

In [None]:
classes_df

#### 2.6.3 Weighted And Normalized Class Matrix

In [None]:
weighted_classes_df = classes_df.copy()
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    fuzzy_weight = np.stack(decision_makers_aggregated_optinions_df.loc[criteria])
    class_matrix = np.stack(classes_df[criteria])
    weighted_criteria_matrix = np.multiply(class_matrix, fuzzy_weight)

    for row_index, value in enumerate(weighted_criteria_matrix):
        weighted_classes_df.at[weighted_classes_df.index[row_index], criteria] = value

In [163]:
weighted_classes_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
Otimo Aproveitamento,"[0.19047619047619052, 0.25, 0.5031055900621116]","[0.2666666666666667, 0.3333333333333333, 0.279...","[0.34285714285714286, 0.41666666666666663, 0.2..."
Aproveitamento Mediano,"[0.09523809523809526, 0.15, 0.40248447204968935]","[0.13333333333333336, 0.19999999999999998, 0.2...","[0.17142857142857143, 0.24999999999999997, 0.1..."
Baixo Aproveitamento,"[0.04761904761904763, 0.05, 0.20124223602484467]","[0.06666666666666668, 0.06666666666666667, 0.1...","[0.08571428571428572, 0.08333333333333333, 0.0..."


#### 2.6.4 Define Ideal Solutions

**Ideal Solutions For "otimo Aproveitamento" Class**

In [187]:
great_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
great_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Otimo Aproveitamento']
great_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Baixo Aproveitamento']
great_class_ideal_solutions_df = great_class_ideal_solutions_df.set_axis(['A+', 'A-'])
great_class_ideal_solutions_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
A+,"[0.19047619047619052, 0.25, 0.5031055900621116]","[0.2666666666666667, 0.3333333333333333, 0.279...","[0.34285714285714286, 0.41666666666666663, 0.2..."
A-,"[0.04761904761904763, 0.05, 0.20124223602484467]","[0.06666666666666668, 0.06666666666666667, 0.1...","[0.08571428571428572, 0.08333333333333333, 0.0..."


**Ideal Solutions For "Aproveitamento Mediano" Class**

In [None]:
medium_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
medium_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Aproveitamento Mediano']
medium_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Baixo Aproveitamento']
medium_class_ideal_solutions_df = medium_class_ideal_solutions_df.set_axis(['A+', 'A-'])
medium_class_ideal_solutions_df

**Ideal Solutions For "Baixo Aproveitamento" Class**

In [164]:
low_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
low_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Baixo Aproveitamento']
low_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Otimo Aproveitamento']
low_class_ideal_solutions_df = low_class_ideal_solutions_df.set_axis(['A+', 'A-'])
low_class_ideal_solutions_df

Unnamed: 0,Easiness Score,Coherence Score,Quality Of Explanation Score
A+,"[0.04761904761904763, 0.05, 0.20124223602484467]","[0.06666666666666668, 0.06666666666666667, 0.1...","[0.08571428571428572, 0.08333333333333333, 0.0..."
A-,"[0.19047619047619052, 0.25, 0.5031055900621116]","[0.2666666666666667, 0.3333333333333333, 0.279...","[0.34285714285714286, 0.41666666666666663, 0.2..."


#### 2.6.5 Calculate Distance from option to Ideal solution and Closness Coeficient

In [192]:
closeness_coeficientes = {}
distance_calculation = lambda ideal_solution, alternative: ( (1/3) * np.sum((alternative-ideal_solution)**2) )**(1/2)
for professor_entry in normalized_professors_scores_fuzzy_aggregation_df.iterrows():
    entry_index = professor_entry[0]
    professor_id = professor_entry[1].iloc[0]
    professor_scores = professor_entry[1].iloc[1:]

    d_plus = []
    # print(professor_scores)
    for criteria in professor_scores.index:
        d_p =  distance_calculation(great_class_ideal_solutions_df.loc['A+', criteria], professor_scores[criteria])
        d_m = distance_calculation(great_class_ideal_solutions_df.loc['A-', criteria], professor_scores[criteria])
        print(criteria)
        print(great_class_ideal_solutions_df.loc['A+', criteria])
        print(professor_scores[criteria])
        print(d_p)
        ...
    break

Easiness Score
[0.19047619 0.25       0.50310559]
[0.4 0.6 0.8]
0.29128824064245856
Coherence Score
[0.26666667 0.33333333 0.27950311]
[0.4 0.6 0.8]
0.34631676539418793
Quality Of Explanation Score
[0.34285714 0.41666667 0.2173913 ]
[0.2 0.4 0.6]
0.23599106976871087
