In [None]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

# Control Sub-Tree Analysis

## 0. Get necessary data from Database

In [None]:
db_connection_str = 'mysql+pymysql://root:db@localhost/GDE'
db_connection = create_engine(db_connection_str)

In [None]:
control_subtree_subjects = ['EM306', 'EM404', 'ES601', 'EM607', 'ES710', 'ES626', 'ES827', 'ES728', 'ES828']
control_subtree_regex = [subject_codename + '.*' for subject_codename in control_subtree_subjects]

control_subtree_df = pd.read_sql("SELECT * FROM Subject WHERE SubjectName REGEXP '"+"|".join(control_subtree_regex) + "'", db_connection)
control_subtree_df

In [None]:
target_subject_ids = ','.join(str(subject_db_id) for subject_db_id in control_subtree_df['ID'].to_list())
subjects_scores = pd.read_sql("SELECT ProfessorID, SubjectID, Coerente, ExplicaBem, Facilidade FROM ProfessorRankings WHERE SubjectID IN ({ids})".format(ids = target_subject_ids), db_connection)
subjects_scores

## 1. Determining Criteria Weights - Fuzzy SWARA

![](../docs/imgs/Fuzzy-SWARA.png)

In [None]:
decision_makers_aggregated_opinions = {'Quality Of Explanation Score' : np.array([0,0,0]), 'Coherence Score' : np.array([2/9, 1/4, 2/7]), 'Easiness Score' : np.array([0,0,0])}
decision_makers_aggregated_optinions_df = pd.DataFrame(decision_makers_aggregated_opinions).transpose()
decision_makers_aggregated_optinions_df = decision_makers_aggregated_optinions_df.rename_axis('Criteria').rename(columns={0:'l',1: 'm',2: 'u'})

# Calculate the k matrix
k_matrix = decision_makers_aggregated_optinions_df+1

# Calculate q Matrix
l = k_matrix.columns.get_loc('l')
m = k_matrix.columns.get_loc('m')
u = k_matrix.columns.get_loc('u')

q_matrix = k_matrix.copy()
for criteria_index in range(k_matrix.shape[0]):
    q_matrix.iloc[criteria_index, l] = 1 if criteria_index == 0 else q_matrix.iloc[criteria_index - 1, l] / k_matrix.iloc[criteria_index, u] 
    q_matrix.iloc[criteria_index, m] = 1 if criteria_index == 0 else q_matrix.iloc[criteria_index - 1, m] / k_matrix.iloc[criteria_index, m] 
    q_matrix.iloc[criteria_index, u] = 1 if criteria_index == 0 else q_matrix.iloc[criteria_index - 1, u] / k_matrix.iloc[criteria_index, l] 

# Calculate weights by normalizing the q matrix
decision_makers_aggregated_optinions_df['l'] = q_matrix['l'] / q_matrix['u'].sum()
decision_makers_aggregated_optinions_df['m'] = q_matrix['m'] / q_matrix['m'].sum()
decision_makers_aggregated_optinions_df['u'] = q_matrix['u'] / q_matrix['l'].sum()

In [None]:
decision_makers_aggregated_optinions_df

## 2. Professor Ranking - Fuzzy Topsis

The first analysis we will do is, through the use of fuzzy numbers to represent the 5-star scores, aggregate all the scores for the subjects the professor teaches (in our case subjects that are part of the control systems subtree). 

### 2.1 Cleaning Data

Some professors history in a specific subject may not have the minimum number of entries for scores, so they appear as NaN on the scores dataframe and need to be removed.

In [None]:
subjects_scores = subjects_scores.dropna()
subjects_scores

### 2.2 Expanding 5-Star Review to Triangular Fuzzy Numbers

![](../docs/imgs/Linguistic-five-Likert-scale-using-triangular-fuzzy-number.png)

[Reference](https://www.researchgate.net/figure/Linguistic-five-Likert-scale-using-triangular-fuzzy-number_tbl1_365957303)

In [None]:
crisp_to_fuzzy = {1.0 : np.array([1, 1, 2]), 2.0: np.array([1, 2, 3]), 3.0: np.array([2, 3, 4]), 4.0:  np.array([3, 4, 5]), 5.0: np.array([4, 5, 5])}

In [None]:
fuzzy_subjects_scores = subjects_scores.copy()

for col in ['Coerente', 'ExplicaBem', 'Facilidade']:
    fuzzy_subjects_scores[col] = subjects_scores[col].apply(lambda val: crisp_to_fuzzy[val])

In [None]:
fuzzy_subjects_scores

### 2.3 Aggregate All the scores for a professor - Aggregated Judgement Matrix

In [None]:
professors_scores_fuzzy_aggregation = []

for professor_id in fuzzy_subjects_scores['ProfessorID'].unique():
    subject_fuzzy_scores = fuzzy_subjects_scores.loc[fuzzy_subjects_scores['ProfessorID'] == professor_id]

    n_scores = len(subject_fuzzy_scores)
    easy_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Facilidade']), axis=0))
    coherent_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['Coerente']), axis=0))
    explanation_values = ((1/n_scores) * np.sum(np.array(subject_fuzzy_scores['ExplicaBem']), axis=0))

    professors_scores_fuzzy_aggregation.append({'Professor ID': professor_id, 'Easiness Score': easy_values, 'Coherence Score': coherent_values, 'Quality Of Explanation Score': explanation_values})


In [None]:
professors_scores_fuzzy_aggregation_df = pd.DataFrame(professors_scores_fuzzy_aggregation)
professors_scores_fuzzy_aggregation_df

### 2.4 Normalize Aggregated Judgement Matrix

In [None]:
normalized_professors_scores_fuzzy_aggregation_df = professors_scores_fuzzy_aggregation_df.copy()
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    normalized_professors_scores_fuzzy_aggregation_df[criteria] = professors_scores_fuzzy_aggregation_df[criteria] / u_max

In [None]:
normalized_professors_scores_fuzzy_aggregation_df

### 2.5 Weighted and Normalized Aggregated Judgement Matrix

In [None]:
weighted_professors_scores = normalized_professors_scores_fuzzy_aggregation_df.copy()

for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    fuzzy_weight = np.stack(decision_makers_aggregated_optinions_df.loc[criteria])
    criteria_matrix = np.stack(normalized_professors_scores_fuzzy_aggregation_df[criteria])
    weighted_criteria_matrix = np.multiply(criteria_matrix, fuzzy_weight)

    for row_index, value in enumerate(weighted_criteria_matrix):
        weighted_professors_scores.at[row_index, criteria] = value

In [None]:
weighted_professors_scores

### 2.6 Define Classes

#### 2.6.1 Class Definiction

In [None]:
classes_description = {'Easiness Score': [crisp_to_fuzzy[4.0], crisp_to_fuzzy[3.0], crisp_to_fuzzy[2.0]], 'Coherence Score':[crisp_to_fuzzy[4.0], crisp_to_fuzzy[3.0], crisp_to_fuzzy[2.0]], 'Quality Of Explanation Score':[crisp_to_fuzzy[4.0], crisp_to_fuzzy[3.0], crisp_to_fuzzy[2.0]]}
classes_df = pd.DataFrame(classes_description)
classes_df = classes_df.set_axis(['Otimo Aproveitamento', 'Aproveitamento Mediano', 'Baixo Aproveitamento'])
classes_df

#### 2.6.2 Class Normalization

In [None]:
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    scores_matrix = np.stack(professors_scores_fuzzy_aggregation_df[criteria])
    u_max = np.max(scores_matrix, axis=0)[2]
    classes_df[criteria] = classes_df[criteria] / u_max

In [None]:
classes_df

#### 2.6.3 Weighted And Normalized Class Matrix

In [None]:
weighted_classes_df = classes_df.copy()
for criteria in ['Coherence Score', 'Easiness Score', 'Quality Of Explanation Score']:
    fuzzy_weight = np.stack(decision_makers_aggregated_optinions_df.loc[criteria])
    class_matrix = np.stack(classes_df[criteria])
    weighted_criteria_matrix = np.multiply(class_matrix, fuzzy_weight)

    for row_index, value in enumerate(weighted_criteria_matrix):
        weighted_classes_df.at[weighted_classes_df.index[row_index], criteria] = value

In [None]:
weighted_classes_df

#### 2.6.4 Define Ideal Solutions

**Ideal Solutions For "otimo Aproveitamento" Class**

In [None]:
great_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
great_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Otimo Aproveitamento']
great_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Baixo Aproveitamento']
great_class_ideal_solutions_df = great_class_ideal_solutions_df.set_axis(['A+', 'A-'])
great_class_ideal_solutions_df

**Ideal Solutions For "Aproveitamento Mediano" Class**

In [None]:
medium_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
medium_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Aproveitamento Mediano']
medium_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Baixo Aproveitamento']
medium_class_ideal_solutions_df = medium_class_ideal_solutions_df.set_axis(['A+', 'A-'])
medium_class_ideal_solutions_df

**Ideal Solutions For "Baixo Aproveitamento" Class**

In [None]:
low_class_ideal_solutions_df = pd.DataFrame(columns=weighted_classes_df.columns)
low_class_ideal_solutions_df.loc[0] = weighted_classes_df.loc['Baixo Aproveitamento']
low_class_ideal_solutions_df.loc[1] = weighted_classes_df.loc['Otimo Aproveitamento']
low_class_ideal_solutions_df = low_class_ideal_solutions_df.set_axis(['A+', 'A-'])
low_class_ideal_solutions_df

#### 2.6.5 Calculate Distance from option to Ideal solution and Closness Coeficient

In [None]:
closeness_coeficientes = {}
ideal_cases = {'Otimo Aproveitamento':great_class_ideal_solutions_df, 'Aproveitamento Mediano': medium_class_ideal_solutions_df, 'Baixo Aproveitamento':low_class_ideal_solutions_df}
distance_calculation = lambda ideal_solution, alternative: ( (1/3) * np.sum((alternative-ideal_solution)**2) )**(1/2)

for ideal_case_name, ideal_case_df in ideal_cases.items():

    closeness_coeficientes[ideal_case_name] = []

    for professor_entry in weighted_professors_scores.iterrows():
        entry_index = professor_entry[0]
        professor_id = professor_entry[1].iloc[0]
        professor_scores = professor_entry[1].iloc[1:]

        d_plus = []
        d_minus = []

        for criteria in professor_scores.index:
            d_plus.append(distance_calculation(ideal_case_df.loc['A+', criteria], professor_scores[criteria]))
            d_minus.append(distance_calculation(ideal_case_df.loc['A-', criteria], professor_scores[criteria]))

        information = {'ProfessorID': professor_id, 'Overall Score':sum(d_minus) / (sum(d_minus) + sum(d_plus))}
        closeness_coeficientes[ideal_case_name].append(information)

In [None]:
great_df = pd.DataFrame(closeness_coeficientes['Otimo Aproveitamento']).sort_values(by='Overall Score', ascending=False).rename(columns={'Overall Score':'Otimo Aproveitamento'})

In [None]:
medium_df = pd.DataFrame(closeness_coeficientes['Aproveitamento Mediano']).sort_values(by='Overall Score', ascending=False).rename(columns={'Overall Score':'Aproveitamento Mediano'})

In [None]:
low_df = pd.DataFrame(closeness_coeficientes['Baixo Aproveitamento']).sort_values(by='Overall Score', ascending=False).rename(columns={'Overall Score':'Aproveitamento Baixo'})

In [None]:
overall_ranking_df = pd.merge(left=great_df, right=medium_df, left_on='ProfessorID', right_on='ProfessorID')
overall_ranking_df = pd.merge(left=overall_ranking_df, right=low_df, left_on='ProfessorID', right_on='ProfessorID')

In [None]:
overall_ranking_df['Category']= overall_ranking_df[['Otimo Aproveitamento', 'Aproveitamento Mediano', 'Aproveitamento Baixo']].idxmax(axis=1)

In [32]:
overall_ranking_df

Unnamed: 0,ProfessorID,Otimo Aproveitamento,Aproveitamento Mediano,Aproveitamento Baixo,Category
0,5179,0.84621,0.709658,0.15379,Otimo Aproveitamento
1,4435,0.84621,0.709658,0.15379,Otimo Aproveitamento
2,3534,0.84621,0.709658,0.15379,Otimo Aproveitamento
3,412,0.80758,0.724183,0.19242,Otimo Aproveitamento
4,411,0.769315,0.645171,0.230685,Otimo Aproveitamento
5,3421,0.75,0.75,0.25,Aproveitamento Mediano
6,3303,0.69242,0.580685,0.30758,Otimo Aproveitamento
7,4968,0.69242,0.580685,0.30758,Otimo Aproveitamento
8,419,0.69242,0.580685,0.30758,Otimo Aproveitamento
9,4130,0.65379,0.809567,0.34621,Aproveitamento Mediano


In [35]:
professors_ids = ','.join(str(professor_id) for professor_id in overall_ranking_df['ProfessorID'].to_list())
professors_ids_df = pd.read_sql("SELECT * FROM Professor WHERE ID IN ({ids})".format(ids = professors_ids), db_connection)
professors_ids_df

Unnamed: 0,ID,NAME
0,35,Eduardo Paiva Okabe
1,405,Paulo Roberto Zampieri
2,408,Milton Dias Junior
3,410,Paulo Roberto Mei
4,411,Robson Pederiva
5,412,Eder Lima De Albuquerque
6,418,Paulo Roberto Gardel Kurka
7,419,Renato Pavanello
8,425,Marco Lucio Bittencourt
9,428,Franco Giuseppe Dedini


In [36]:
pd.merge(overall_ranking_df, professors_ids_df, left_on='ProfessorID', right_on='ID')

Unnamed: 0,ProfessorID,Otimo Aproveitamento,Aproveitamento Mediano,Aproveitamento Baixo,Category,ID,NAME
0,5179,0.84621,0.709658,0.15379,Otimo Aproveitamento,5179,Marcelo Vinicius De Paula
1,4435,0.84621,0.709658,0.15379,Otimo Aproveitamento,4435,Vinícius Gabriel Segala Simionatto
2,3534,0.84621,0.709658,0.15379,Otimo Aproveitamento,3534,William Martins Vicente
3,412,0.80758,0.724183,0.19242,Otimo Aproveitamento,412,Eder Lima De Albuquerque
4,411,0.769315,0.645171,0.230685,Otimo Aproveitamento,411,Robson Pederiva
5,3421,0.75,0.75,0.25,Aproveitamento Mediano,3421,Tiago Henrique Machado
6,3303,0.69242,0.580685,0.30758,Otimo Aproveitamento,3303,Freddy Franco
7,4968,0.69242,0.580685,0.30758,Otimo Aproveitamento,4968,Thales Freitas Peixoto
8,419,0.69242,0.580685,0.30758,Otimo Aproveitamento,419,Renato Pavanello
9,4130,0.65379,0.809567,0.34621,Aproveitamento Mediano,4130,Hugo Heidy Miyasato
