[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QPsU3xbgoY25jiMgoWamYhxKLT_iMHYA#scrollTo=XDspGgUS55Xo)

# Codice per montare GDrive su GColab

In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
%cd gdrive/MyDrive/ProjectAI/

/content/gdrive/.shortcut-targets-by-id/1A0bC_-pQ-oWUI2c5-EqQVAZ6051G0tKg/ProjectAI


In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Import delle librerire fondamentali per l'analisi dei dati

In [4]:
import pandas as pd
import numpy as np
import re

# Utility per tipi e conversioni

In [24]:
from typing import List, Dict, Tuple
string_list = List[str]
one_hot_list = Tuple[int]
one_hot_encoding = Dict[str, one_hot_list]

## Mappe e funzioni per conversioni valori del dataset in valori numerici o booleani

In [25]:
def from_categorical_to_one_hot(categorical_data: string_list) -> one_hot_encoding:
    dictionary_to_return = {}
    indexes = range(len(categorical_data))
    for index, key in enumerate(categorical_data):
        dictionary_to_return[key] = tuple([1 if index == i else 0 for i in indexes])
    
    return dictionary_to_return

#Features categorica -> One Hot Encoding
list_MESI = ["Gennaio", "Febbraio", "Marzo", "Aprile", "Maggio", "Giugno", "Luglio", "Agosto", "Settembre", "Ottobre", "Novembre", "Dicembre", "Non disponibile"]
MESI = from_categorical_to_one_hot(list_MESI)

#Features categorica -> One Hot Encoding
list_ANNI = ["2000", "2001", "1999", "1998", "<=1997", ">=2002", "Non disponibile"]
ANNI = from_categorical_to_one_hot(list_ANNI)

def convert_question_result(result: str):
    return result == "Corretta" # alternativamente result == "Errata"

#Features categorica -> One Hot Encoding
list_REGOLARITA = ['Regolare', 'Posticipatario', 'Anticipatario', 'Dato mancante']
REGOLARITA = from_categorical_to_one_hot(list_REGOLARITA)

#Features categorica -> One Hot Encoding
list_AREA_GEOGRAFICA_5_ISTAT = ['Sud', 'Nord est', 'Centro', 'Nord ovest', 'Isole']
AREA_GEOGRAFICA_5_ISTAT = from_categorical_to_one_hot(list_AREA_GEOGRAFICA_5_ISTAT)

#Features categorica -> One Hot Encoding
list_AREA_GEOGRAFICA_5 = ['Sud', 'Nord est', 'Centro', 'Nord ovest', 'Sud e isole']
AREA_GEOGRAFICA_5 = from_categorical_to_one_hot(list_AREA_GEOGRAFICA_5)

#Features categorica -> One Hot Encoding
list_AREA_GEOGRAFICA_4 = ['Mezzogiorno', 'Nord est', 'Centro', 'Nord ovest']
AREA_GEOGRAFICA_4 = from_categorical_to_one_hot(list_AREA_GEOGRAFICA_4)

#Features categorica -> One Hot Encoding
list_AREA_GEOGRAFICA_3 = ['Mezzogiorno', 'Nord', 'Centro']
AREA_GEOGRAFICA_3 = from_categorical_to_one_hot(list_AREA_GEOGRAFICA_3)

#Features categorica -> One Hot Encoding
list_REGIONI = ['Campania', 'Emilia-Romagna', 'Lazio', 'Piemonte', 'Puglia', 'Lombardia', 'Veneto', 'Sicilia', 'Prov. Aut. Trento', 'Friuli-Venezia Giulia', 'Abruzzo', 'Liguria', 'Toscana', 'Sardegna', 'Calabria', 'Molise', 'Marche', 'Umbria', 'Basilicata', 'Prov. Aut. Bolzano (l. it.)']
REGIONI = from_categorical_to_one_hot(list_REGIONI)

#Features categorica -> One Hot Encoding
list_PROVINCE = ['', 'RE', 'FR', 'TO', 'BA', 'CO', 'LE', 'RO', 'CT', 'RM', 'TA', 'BS', 'SA', 'TN', 'UD', 'FG', 'LT', 'AG', 'CH', 'PC', 'TS', 'SR', 'SP', 'PD', 'SI', 'PA', 'TP', 'BO', 'CA', 'CN', 'RC', 'TE', 'MI', 'LC', 'LU', 'FI', 'AQ', 'TV', 'RG', 'VA', 'GO', 'MO', 'GE', 'AL', 'CB', 'PR', 'OR', 'VE', 'MC', 'NO', 'PT', 'MN', 'VR', 'PI', 'AP', 'LO', 'VI', 'SV', 'PU', 'BG', 'AR', 'VT', 'LI', 'SS', 'BR', 'RA', 'TR', 'SO', 'IM', 'PZ', 'GR', 'AN', 'PN', 'ME', 'CR', 'FE', 'BI', 'PV', 'PG', 'VB', 'BL', 'PE', 'CS', 'CZ', 'AV', 'RN', 'CL', 'AT', 'MS', 'KR', 'RI', 'EN', 'CE', 'MT', 'VV', 'VC', 'NU', 'FC', 'PO', 'BZ', 'BN', 'IS', 
            'NA', # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
            'PS', # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
            'FO', # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
            'LB', # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
]
PROVINCE = from_categorical_to_one_hot(list_PROVINCE)

#Features categorica -> One Hot Encoding
list_CITTADINANZA = ['Italiano', 'Straniero II generazione', 'Straniero I generazione', 'Dato mancante']
CITTADINANZA = from_categorical_to_one_hot(list_CITTADINANZA)

list_VOTI_NUMERICI = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
def voto_orale_decode(voto_orale: str):
    if voto_orale in list_VOTI_NUMERICI:
        return int(voto_orale)
    elif voto_orale == 'Non disponibile':
        return np.nan
    elif voto_orale == 'Non classificato': 
        return 0

list_VOTI_NAN = ['Non disponibile', 'Senza voto scritto']
def voto_scritto_decode(voto_scritto: str):
    if voto_scritto in list_VOTI_NUMERICI:
        return int(voto_scritto)
    elif voto_scritto in list_VOTI_NAN:
        return np.nan
    elif voto_scritto == 'Non classificato': 
        return 0
    
#Features categorica -> One Hot Encoding
list_PROFESSIONI = ['1. Disoccupato/a', '2. Casalingo/a', '3. Dirigente, docente universitario, funzionario o ufficiale militare', '4. Imprenditore/proprietario agricolo', '5. Professionista dipendente, sottuff. militare o libero profession. (medico, av', '6. Lavoratore in proprio (commerciante, coltivatore diretto, artigiano, meccanic', '7. Insegnante, impiegato, militare graduato', '8. Operaio, addetto ai servizi/socio di cooperativa', '9. Pensionato/a', '10. Non disponibile']
PROFESSIONI = from_categorical_to_one_hot(list_PROFESSIONI)

#Features categorica -> One Hot Encoding
list_TITOLI = ['1. Licenza elementare', '2. Licenza media', '3. Qualifica professionale triennale', '4. Diploma di maturità', '5. Altro titolo di studio superiore al diploma (I.S.E.F., Accademia di Belle Art', '6. Laurea o titolo superiore (ad esempio Dottorato di Ricerca)', '7. Non disponibile']
TITOLI = from_categorical_to_one_hot(list_TITOLI)

#Features categorica -> One Hot Encoding
list_LUOGHI_GENITORI = ['Italia (o Repubblica di San Marino)', 'Unione Europea', 'Paese europeo Non UE', 'Altro', 'Non disponibile']
LUOGHI_GENITORI = from_categorical_to_one_hot(list_LUOGHI_GENITORI)

#Features categorica -> One Hot Encoding
list_FREQUENZA_SCUOLA = ['No', 'Sì', 'Non disponibile']
FREQUENZA_SCUOLA = from_categorical_to_one_hot(list_FREQUENZA_SCUOLA)

#Features categorica -> One Hot Encoding
list_LUOGO_DI_NASCITA = ['Italia (o Repubblica di San Marino)', 'Unione Europea', 'Paese europeo Non UE', 'Altro', 'Non disponibile']
LUOGO_DI_NASCITA = from_categorical_to_one_hot(list_LUOGO_DI_NASCITA)

## Gestione conversione valori del dataset in tipi più facili da utilizzare

In [26]:
columns_converters = {
    "CODICE_SCUOLA": str, #identificativo della scuola
    "CODICE_PLESSO": str, #identificativo del plesso
    "CODICE_CLASSE": str, #identificato della classe
    "macrotipologia": str, #categoria di scuola
    "campione": int, #campione di riferimento
    "livello": int,
    "prog": int,
    "CODICE_STUDENTE": str, #codice dello studente
    "sesso": str, #sesso dello studente
    "mese": lambda month: MESI[month], #mese di nascita
    "anno": lambda year: ANNI[year], #anno di nascita
    "luogo": lambda luogo: LUOGO_DI_NASCITA[luogo],
    "eta": str, # cosa vuol dire eta?
    "codice_orario": lambda _: np.nan, # unico dato: Mancante di sistema
    "freq_asilo_nido": lambda frequenza: FREQUENZA_SCUOLA[frequenza],
    "freq_scuola_materna": lambda frequenza: FREQUENZA_SCUOLA[frequenza],
    "luogo_padre": lambda luogo: LUOGHI_GENITORI[luogo],
    "titolo_padre": lambda titolo: TITOLI[titolo],
    "prof_padre": lambda professione: PROFESSIONI[professione],
    "luogo_madre": lambda luogo: LUOGHI_GENITORI[luogo],
    "titolo_madre": lambda titolo: TITOLI[titolo],
    "prof_madre": lambda professione: PROFESSIONI[professione],
    "voto_scritto_ita": lambda voto: voto_scritto_decode(voto),
    "voto_orale_ita": lambda voto: voto_orale_decode(voto),
    "voto_scritto_mat": lambda voto: voto_scritto_decode(voto),
    "voto_orale_mat": lambda voto: voto_orale_decode(voto),
    "D1": lambda result: convert_question_result(result),
    "D2": lambda result: convert_question_result(result),
    "D3_a": lambda result: convert_question_result(result),
    "D3_b": lambda result: convert_question_result(result),
    "D4_a": lambda result: convert_question_result(result),
    "D4_b": lambda result: convert_question_result(result),
    "D4_c": lambda result: convert_question_result(result),
    "D4_d": lambda result: convert_question_result(result),
    "D5_a": lambda result: convert_question_result(result),
    "D5_b": lambda result: convert_question_result(result),
    "D6": lambda result: convert_question_result(result),
    "D7_a": lambda result: convert_question_result(result),
    "D7_b": lambda result: convert_question_result(result),
    "D8": lambda result: convert_question_result(result),
    "D9": lambda result: convert_question_result(result),
    "D10_a": lambda result: convert_question_result(result),
    "D10_b1": lambda result: convert_question_result(result),
    "D10_b2": lambda result: convert_question_result(result),
    "D10_b3": lambda result: convert_question_result(result),
    "D11_a": lambda result: convert_question_result(result),
    "D11_b": lambda result: convert_question_result(result),
    "D12_a": lambda result: convert_question_result(result),
    "D12_b": lambda result: convert_question_result(result),
    "D13_a": lambda result: convert_question_result(result),
    "D13_b": lambda result: convert_question_result(result),
    "D13_c": lambda result: convert_question_result(result),
    "D14": lambda result: convert_question_result(result),
    "D15": lambda result: convert_question_result(result),
    "D16_a": lambda result: convert_question_result(result),
    "D16_b": lambda result: convert_question_result(result),
    "D16_c": lambda result: convert_question_result(result),
    "D16_d": lambda result: convert_question_result(result),
    "D17_a": lambda result: convert_question_result(result),
    "D17_b": lambda result: convert_question_result(result),
    "D18": lambda result: convert_question_result(result),
    "D19_a": lambda result: convert_question_result(result),
    "D19_b": lambda result: convert_question_result(result),
    "D20": lambda result: convert_question_result(result),
    "D21": lambda result: convert_question_result(result),
    "D22": lambda result: convert_question_result(result),
    "D23_a": lambda result: convert_question_result(result),
    "D23_b": lambda result: convert_question_result(result),
    "D23_c": lambda result: convert_question_result(result),
    "D23_d": lambda result: convert_question_result(result),
    "D24_a": lambda result: convert_question_result(result),
    "D24_b": lambda result: convert_question_result(result),
    "D25": lambda result: convert_question_result(result),
    "D26_a": lambda result: convert_question_result(result),
    "D26_b": lambda result: convert_question_result(result),
    "D26_c": lambda result: convert_question_result(result),
    "D26_d": lambda result: convert_question_result(result),
    "regolarità": lambda regular: REGOLARITA[regular],
    "cittadinanza": lambda cittadinanza: CITTADINANZA[cittadinanza],
    "cod_provincia_ISTAT": lambda province_istat: PROVINCE[province_istat.upper()],
    "sigla_provincia_istat": lambda province: PROVINCE[province],
    "Nome_reg": lambda regione: REGIONI[" ".join(regione.split())],
    "Cod_reg": lambda codice: REGIONI[codice],
    "Areageo_3": lambda area: AREA_GEOGRAFICA_3[area],
    "Areageo_4": lambda area: AREA_GEOGRAFICA_4[area],
    "Areageo_5": lambda area: AREA_GEOGRAFICA_5[area],
    "Areageo_5_Istat": lambda area: AREA_GEOGRAFICA_5_ISTAT[area],
    "Pon": lambda pon: True if pon == "Area_Pon" else False, # lo studente appartiene all'aera Pon oppure no
    "pu_ma_gr": int,
    "pu_ma_no": float,
    "Fattore_correzione_new": float,
    "Cheating": float,
    "PesoClasse": lambda val: float(val) if val != "" else np.nan,
    "PesoScuola": lambda val: float(val) if val != "" else np.nan,
    "PesoTotale_Matematica": lambda val: float(val) if val != "" else np.nan,
    "WLE_MAT": float,
    "WLE_MAT_200": float,
    "WLE_MAT_200_CORR": float,
    "pu_ma_no_corr": float,
    "n_stud_prev": lambda val: int(float(val)),
    "n_classi_prev": lambda val: int(float(val)),
    "LIVELLI": int,
    "DROPOUT": bool,
}

# Lettura del dataset

In [27]:
dataset = pd.read_csv("Data/invalsi_mat_2014.csv", 
                      sep=';', 
                      converters=columns_converters)

# Matrici correlazione tra domande

In [29]:
questions_columns = [col for col in columns_converters.keys() if re.search("^D\d", col)]
questions_dataset = dataset[questions_columns]

## Standard correlation coefficient (pearson method)

In [30]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='pearson')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.159962,0.117898,0.122366,0.110744,0.120688,0.111168,0.090119,0.141095,0.13284,0.092501,0.16226,0.156937,0.114144,0.137984,0.05174,0.072593,0.090781,0.074167,0.100242,0.11005,0.155488,0.145865,0.104897,0.112098,0.148164,0.109666,0.153948,0.059813,0.049402,0.105747,0.041508,0.152072,0.153062,0.134426,0.13477,0.166495,0.163874,0.128623,0.120701,0.094943,0.07036,0.106159,0.092624,0.148165,0.155592,0.100861,0.05854,0.112296,0.043786,0.045914
D2,0.159962,1.0,0.127355,0.115453,0.107185,0.123625,0.116068,0.085145,0.156427,0.144796,0.097521,0.167598,0.165464,0.099342,0.135203,0.063148,0.084418,0.104845,0.067833,0.097236,0.120287,0.159201,0.151826,0.114048,0.12417,0.151979,0.116963,0.141717,0.065317,0.06203,0.113806,0.04756,0.139755,0.141667,0.132132,0.136285,0.155686,0.167342,0.109348,0.113168,0.084681,0.062102,0.107689,0.086031,0.143441,0.155675,0.114016,0.055231,0.111418,0.033378,0.038129
D3_a,0.117898,0.127355,1.0,0.16435,0.093999,0.102824,0.100124,0.075344,0.115609,0.114272,0.100369,0.138519,0.157017,0.088901,0.128263,0.070578,0.063826,0.08364,0.072489,0.089704,0.13326,0.130668,0.135893,0.136484,0.111671,0.140222,0.115144,0.117141,0.067264,0.047743,0.09249,0.043972,0.130433,0.131943,0.123215,0.113853,0.135972,0.144457,0.103196,0.103436,0.073166,0.056038,0.085141,0.072087,0.14267,0.133937,0.123459,0.055894,0.091226,0.034187,0.039141
D3_b,0.122366,0.115453,0.16435,1.0,0.080663,0.08281,0.079639,0.06444,0.102201,0.090864,0.09522,0.142141,0.142324,0.099218,0.105829,0.050737,0.047643,0.076841,0.055321,0.077192,0.097252,0.126303,0.135465,0.076703,0.094614,0.1071,0.107128,0.120877,0.060221,0.037456,0.076204,0.036649,0.118962,0.131351,0.115114,0.107417,0.137407,0.126343,0.097087,0.086762,0.07087,0.046162,0.071725,0.05869,0.120678,0.117574,0.077571,0.038398,0.078307,0.025828,0.031192
D4_a,0.110744,0.107185,0.093999,0.080663,1.0,0.456729,0.54146,0.201024,0.100737,0.10701,0.099606,0.117602,0.135622,0.068639,0.108041,0.044718,0.077009,0.081148,0.076499,0.087598,0.119104,0.124756,0.111296,0.093088,0.099104,0.131657,0.102371,0.103647,0.058078,0.049274,0.10231,0.037122,0.117073,0.145262,0.138158,0.105413,0.161411,0.129432,0.092179,0.101252,0.084268,0.059309,0.092728,0.084023,0.12438,0.134125,0.105965,0.072902,0.110437,0.040425,0.044465
D4_b,0.120688,0.123625,0.102824,0.08281,0.456729,1.0,0.322752,0.464205,0.109857,0.109942,0.114791,0.123361,0.143548,0.067163,0.126853,0.043393,0.090215,0.082088,0.085947,0.0941,0.123554,0.138308,0.126969,0.101312,0.10574,0.146649,0.113778,0.116214,0.072831,0.04864,0.118705,0.042031,0.133976,0.16882,0.148576,0.115849,0.183918,0.147872,0.105762,0.109053,0.084958,0.055979,0.109084,0.095962,0.133955,0.14693,0.109042,0.076773,0.117818,0.042556,0.045142
D4_c,0.111168,0.116068,0.100124,0.079639,0.54146,0.322752,1.0,0.290724,0.112901,0.115076,0.10755,0.131348,0.148456,0.063705,0.108592,0.050076,0.088481,0.079896,0.068339,0.081483,0.127304,0.128313,0.115412,0.105587,0.11546,0.141836,0.113321,0.10543,0.060873,0.06131,0.113643,0.044599,0.116458,0.151262,0.140219,0.107099,0.165851,0.131161,0.08894,0.096646,0.079245,0.055508,0.108046,0.090688,0.12922,0.133364,0.119196,0.068319,0.113168,0.046992,0.047882
D4_d,0.090119,0.085145,0.075344,0.06444,0.201024,0.464205,0.290724,1.0,0.056138,0.060351,0.08633,0.064151,0.079615,0.066743,0.111214,0.015591,0.056401,0.059031,0.07235,0.089018,0.070252,0.099486,0.095921,0.053631,0.047977,0.092677,0.069109,0.090329,0.062125,0.025824,0.074612,0.030113,0.113551,0.129508,0.094959,0.087818,0.136913,0.114976,0.103952,0.096069,0.066441,0.043613,0.0715,0.055009,0.094804,0.112197,0.04829,0.055956,0.087019,0.044523,0.036568
D5_a,0.141095,0.156427,0.115609,0.102201,0.100737,0.109857,0.112901,0.056138,1.0,0.246935,0.088119,0.208602,0.196014,0.102484,0.108142,0.08041,0.099544,0.128741,0.073748,0.092682,0.137126,0.169236,0.165489,0.123806,0.154307,0.168529,0.130108,0.138934,0.05502,0.077723,0.139551,0.055823,0.12615,0.134514,0.135548,0.136257,0.146343,0.157701,0.079157,0.100498,0.082491,0.061871,0.12216,0.087926,0.123548,0.145367,0.152409,0.053531,0.119586,0.01312,0.026196
D5_b,0.13284,0.144796,0.114272,0.090864,0.10701,0.109942,0.115076,0.060351,0.246935,1.0,0.092956,0.171029,0.178149,0.08247,0.105408,0.08044,0.0957,0.106401,0.070168,0.086166,0.141502,0.13564,0.119319,0.13442,0.148389,0.163288,0.128379,0.11889,0.057226,0.070607,0.12223,0.055341,0.123041,0.13566,0.132823,0.114968,0.146091,0.137408,0.088519,0.099215,0.086817,0.07163,0.11238,0.101806,0.13357,0.141657,0.1454,0.071285,0.112515,0.037611,0.047535


## Kendall Tau correlation coefficient

In [31]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='kendall')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.159962,0.117898,0.122366,0.110744,0.120688,0.111168,0.090119,0.141095,0.13284,0.092501,0.16226,0.156937,0.114144,0.137984,0.05174,0.072593,0.090781,0.074167,0.100242,0.11005,0.155488,0.145865,0.104897,0.112098,0.148164,0.109666,0.153948,0.059813,0.049402,0.105747,0.041508,0.152072,0.153062,0.134426,0.13477,0.166495,0.163874,0.128623,0.120701,0.094943,0.07036,0.106159,0.092624,0.148165,0.155592,0.100861,0.05854,0.112296,0.043786,0.045914
D2,0.159962,1.0,0.127355,0.115453,0.107185,0.123625,0.116068,0.085145,0.156427,0.144796,0.097521,0.167598,0.165464,0.099342,0.135203,0.063148,0.084418,0.104845,0.067833,0.097236,0.120287,0.159201,0.151826,0.114048,0.12417,0.151979,0.116963,0.141717,0.065317,0.06203,0.113806,0.04756,0.139755,0.141667,0.132132,0.136285,0.155686,0.167342,0.109348,0.113168,0.084681,0.062102,0.107689,0.086031,0.143441,0.155675,0.114016,0.055231,0.111418,0.033378,0.038129
D3_a,0.117898,0.127355,1.0,0.16435,0.093999,0.102824,0.100124,0.075344,0.115609,0.114272,0.100369,0.138519,0.157017,0.088901,0.128263,0.070578,0.063826,0.08364,0.072489,0.089704,0.13326,0.130668,0.135893,0.136484,0.111671,0.140222,0.115144,0.117141,0.067264,0.047743,0.09249,0.043972,0.130433,0.131943,0.123215,0.113853,0.135972,0.144457,0.103196,0.103436,0.073166,0.056038,0.085141,0.072087,0.14267,0.133937,0.123459,0.055894,0.091226,0.034187,0.039141
D3_b,0.122366,0.115453,0.16435,1.0,0.080663,0.08281,0.079639,0.06444,0.102201,0.090864,0.09522,0.142141,0.142324,0.099218,0.105829,0.050737,0.047643,0.076841,0.055321,0.077192,0.097252,0.126303,0.135465,0.076703,0.094614,0.1071,0.107128,0.120877,0.060221,0.037456,0.076204,0.036649,0.118962,0.131351,0.115114,0.107417,0.137407,0.126343,0.097087,0.086762,0.07087,0.046162,0.071725,0.05869,0.120678,0.117574,0.077571,0.038398,0.078307,0.025828,0.031192
D4_a,0.110744,0.107185,0.093999,0.080663,1.0,0.456729,0.54146,0.201024,0.100737,0.10701,0.099606,0.117602,0.135622,0.068639,0.108041,0.044718,0.077009,0.081148,0.076499,0.087598,0.119104,0.124756,0.111296,0.093088,0.099104,0.131657,0.102371,0.103647,0.058078,0.049274,0.10231,0.037122,0.117073,0.145262,0.138158,0.105413,0.161411,0.129432,0.092179,0.101252,0.084268,0.059309,0.092728,0.084023,0.12438,0.134125,0.105965,0.072902,0.110437,0.040425,0.044465
D4_b,0.120688,0.123625,0.102824,0.08281,0.456729,1.0,0.322752,0.464205,0.109857,0.109942,0.114791,0.123361,0.143548,0.067163,0.126853,0.043393,0.090215,0.082088,0.085947,0.0941,0.123554,0.138308,0.126969,0.101312,0.10574,0.146649,0.113778,0.116214,0.072831,0.04864,0.118705,0.042031,0.133976,0.16882,0.148576,0.115849,0.183918,0.147872,0.105762,0.109053,0.084958,0.055979,0.109084,0.095962,0.133955,0.14693,0.109042,0.076773,0.117818,0.042556,0.045142
D4_c,0.111168,0.116068,0.100124,0.079639,0.54146,0.322752,1.0,0.290724,0.112901,0.115076,0.10755,0.131348,0.148456,0.063705,0.108592,0.050076,0.088481,0.079896,0.068339,0.081483,0.127304,0.128313,0.115412,0.105587,0.11546,0.141836,0.113321,0.10543,0.060873,0.06131,0.113643,0.044599,0.116458,0.151262,0.140219,0.107099,0.165851,0.131161,0.08894,0.096646,0.079245,0.055508,0.108046,0.090688,0.12922,0.133364,0.119196,0.068319,0.113168,0.046992,0.047882
D4_d,0.090119,0.085145,0.075344,0.06444,0.201024,0.464205,0.290724,1.0,0.056138,0.060351,0.08633,0.064151,0.079615,0.066743,0.111214,0.015591,0.056401,0.059031,0.07235,0.089018,0.070252,0.099486,0.095921,0.053631,0.047977,0.092677,0.069109,0.090329,0.062125,0.025824,0.074612,0.030113,0.113551,0.129508,0.094959,0.087818,0.136913,0.114976,0.103952,0.096069,0.066441,0.043613,0.0715,0.055009,0.094804,0.112197,0.04829,0.055956,0.087019,0.044523,0.036568
D5_a,0.141095,0.156427,0.115609,0.102201,0.100737,0.109857,0.112901,0.056138,1.0,0.246935,0.088119,0.208602,0.196014,0.102484,0.108142,0.08041,0.099544,0.128741,0.073748,0.092682,0.137126,0.169236,0.165489,0.123806,0.154307,0.168529,0.130108,0.138934,0.05502,0.077723,0.139551,0.055823,0.12615,0.134514,0.135548,0.136257,0.146343,0.157701,0.079157,0.100498,0.082491,0.061871,0.12216,0.087926,0.123548,0.145367,0.152409,0.053531,0.119586,0.01312,0.026196
D5_b,0.13284,0.144796,0.114272,0.090864,0.10701,0.109942,0.115076,0.060351,0.246935,1.0,0.092956,0.171029,0.178149,0.08247,0.105408,0.08044,0.0957,0.106401,0.070168,0.086166,0.141502,0.13564,0.119319,0.13442,0.148389,0.163288,0.128379,0.11889,0.057226,0.070607,0.12223,0.055341,0.123041,0.13566,0.132823,0.114968,0.146091,0.137408,0.088519,0.099215,0.086817,0.07163,0.11238,0.101806,0.13357,0.141657,0.1454,0.071285,0.112515,0.037611,0.047535


## Spearman rank correlation

In [32]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='spearman')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.159962,0.117898,0.122366,0.110744,0.120688,0.111168,0.090119,0.141095,0.13284,0.092501,0.16226,0.156937,0.114144,0.137984,0.05174,0.072593,0.090781,0.074167,0.100242,0.11005,0.155488,0.145865,0.104897,0.112098,0.148164,0.109666,0.153948,0.059813,0.049402,0.105747,0.041508,0.152072,0.153062,0.134426,0.13477,0.166495,0.163874,0.128623,0.120701,0.094943,0.07036,0.106159,0.092624,0.148165,0.155592,0.100861,0.05854,0.112296,0.043786,0.045914
D2,0.159962,1.0,0.127355,0.115453,0.107185,0.123625,0.116068,0.085145,0.156427,0.144796,0.097521,0.167598,0.165464,0.099342,0.135203,0.063148,0.084418,0.104845,0.067833,0.097236,0.120287,0.159201,0.151826,0.114048,0.12417,0.151979,0.116963,0.141717,0.065317,0.06203,0.113806,0.04756,0.139755,0.141667,0.132132,0.136285,0.155686,0.167342,0.109348,0.113168,0.084681,0.062102,0.107689,0.086031,0.143441,0.155675,0.114016,0.055231,0.111418,0.033378,0.038129
D3_a,0.117898,0.127355,1.0,0.16435,0.093999,0.102824,0.100124,0.075344,0.115609,0.114272,0.100369,0.138519,0.157017,0.088901,0.128263,0.070578,0.063826,0.08364,0.072489,0.089704,0.13326,0.130668,0.135893,0.136484,0.111671,0.140222,0.115144,0.117141,0.067264,0.047743,0.09249,0.043972,0.130433,0.131943,0.123215,0.113853,0.135972,0.144457,0.103196,0.103436,0.073166,0.056038,0.085141,0.072087,0.14267,0.133937,0.123459,0.055894,0.091226,0.034187,0.039141
D3_b,0.122366,0.115453,0.16435,1.0,0.080663,0.08281,0.079639,0.06444,0.102201,0.090864,0.09522,0.142141,0.142324,0.099218,0.105829,0.050737,0.047643,0.076841,0.055321,0.077192,0.097252,0.126303,0.135465,0.076703,0.094614,0.1071,0.107128,0.120877,0.060221,0.037456,0.076204,0.036649,0.118962,0.131351,0.115114,0.107417,0.137407,0.126343,0.097087,0.086762,0.07087,0.046162,0.071725,0.05869,0.120678,0.117574,0.077571,0.038398,0.078307,0.025828,0.031192
D4_a,0.110744,0.107185,0.093999,0.080663,1.0,0.456729,0.54146,0.201024,0.100737,0.10701,0.099606,0.117602,0.135622,0.068639,0.108041,0.044718,0.077009,0.081148,0.076499,0.087598,0.119104,0.124756,0.111296,0.093088,0.099104,0.131657,0.102371,0.103647,0.058078,0.049274,0.10231,0.037122,0.117073,0.145262,0.138158,0.105413,0.161411,0.129432,0.092179,0.101252,0.084268,0.059309,0.092728,0.084023,0.12438,0.134125,0.105965,0.072902,0.110437,0.040425,0.044465
D4_b,0.120688,0.123625,0.102824,0.08281,0.456729,1.0,0.322752,0.464205,0.109857,0.109942,0.114791,0.123361,0.143548,0.067163,0.126853,0.043393,0.090215,0.082088,0.085947,0.0941,0.123554,0.138308,0.126969,0.101312,0.10574,0.146649,0.113778,0.116214,0.072831,0.04864,0.118705,0.042031,0.133976,0.16882,0.148576,0.115849,0.183918,0.147872,0.105762,0.109053,0.084958,0.055979,0.109084,0.095962,0.133955,0.14693,0.109042,0.076773,0.117818,0.042556,0.045142
D4_c,0.111168,0.116068,0.100124,0.079639,0.54146,0.322752,1.0,0.290724,0.112901,0.115076,0.10755,0.131348,0.148456,0.063705,0.108592,0.050076,0.088481,0.079896,0.068339,0.081483,0.127304,0.128313,0.115412,0.105587,0.11546,0.141836,0.113321,0.10543,0.060873,0.06131,0.113643,0.044599,0.116458,0.151262,0.140219,0.107099,0.165851,0.131161,0.08894,0.096646,0.079245,0.055508,0.108046,0.090688,0.12922,0.133364,0.119196,0.068319,0.113168,0.046992,0.047882
D4_d,0.090119,0.085145,0.075344,0.06444,0.201024,0.464205,0.290724,1.0,0.056138,0.060351,0.08633,0.064151,0.079615,0.066743,0.111214,0.015591,0.056401,0.059031,0.07235,0.089018,0.070252,0.099486,0.095921,0.053631,0.047977,0.092677,0.069109,0.090329,0.062125,0.025824,0.074612,0.030113,0.113551,0.129508,0.094959,0.087818,0.136913,0.114976,0.103952,0.096069,0.066441,0.043613,0.0715,0.055009,0.094804,0.112197,0.04829,0.055956,0.087019,0.044523,0.036568
D5_a,0.141095,0.156427,0.115609,0.102201,0.100737,0.109857,0.112901,0.056138,1.0,0.246935,0.088119,0.208602,0.196014,0.102484,0.108142,0.08041,0.099544,0.128741,0.073748,0.092682,0.137126,0.169236,0.165489,0.123806,0.154307,0.168529,0.130108,0.138934,0.05502,0.077723,0.139551,0.055823,0.12615,0.134514,0.135548,0.136257,0.146343,0.157701,0.079157,0.100498,0.082491,0.061871,0.12216,0.087926,0.123548,0.145367,0.152409,0.053531,0.119586,0.01312,0.026196
D5_b,0.13284,0.144796,0.114272,0.090864,0.10701,0.109942,0.115076,0.060351,0.246935,1.0,0.092956,0.171029,0.178149,0.08247,0.105408,0.08044,0.0957,0.106401,0.070168,0.086166,0.141502,0.13564,0.119319,0.13442,0.148389,0.163288,0.128379,0.11889,0.057226,0.070607,0.12223,0.055341,0.123041,0.13566,0.132823,0.114968,0.146091,0.137408,0.088519,0.099215,0.086817,0.07163,0.11238,0.101806,0.13357,0.141657,0.1454,0.071285,0.112515,0.037611,0.047535


# Nota sulle matrici di correlazione ottenute
Le **matrici di correlazione sulle domande** non evidenziano significative dipendenze lineari tra di essse: i valori più elevati appaiono in corrispondenza di domande consecutive, il più delle volte parti della stessa domanda (e.g. D7, D3 e D4).  
Conseguentemente, **non è possibile realizzare alcuna riduzione di dimensionalità**.  
Di seguito, verifichiamo se anche filtrando per area geografica la stessa situazione si verifica.

## Standard correlation coefficient (pearson method) filtered by Areageo_3 ( == [0, 1, 0])

In [43]:
questions_dataset_areageo_3 = dataset.query('Areageo_3 == (0, 1, 0)')
print(questions_dataset_areageo_3['Areageo_3'])
questions_dataset_areageo_3 = questions_dataset_areageo_3[questions_columns]

Series([], Name: Areageo_3, dtype: object)


In [36]:
questions_correlate_matrix_pearson_areageo_3 = questions_dataset_areageo_3.corr(method='pearson')
questions_correlate_matrix_pearson_areageo_3.style.background_gradient(cmap='coolwarm')

  smin = np.nanmin(s.to_numpy()) if vmin is None else vmin
  smax = np.nanmax(s.to_numpy()) if vmax is None else vmax


Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D3_a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D3_b,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D4_a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D4_b,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D4_c,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D4_d,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D5_a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
D5_b,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# ESEGUI QUEST'ULTIMA CELLA PER SMONTARE GDRIVE

In [None]:
drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')

All changes made in this colab session should now be visible in Drive.
