<a href="https://colab.research.google.com/github/MickPerl/MachineLearningProject/blob/main/ProjectAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QPsU3xbgoY25jiMgoWamYhxKLT_iMHYA#scrollTo=XDspGgUS55Xo)

# Codice per montare GDrive su GColab

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [3]:
%cd gdrive/MyDrive/ProjectAI/

/content/gdrive/MyDrive/ProjectAI


# Import delle librerire fondamentali per l'analisi dei dati

In [4]:
import pandas as pd
import numpy as np
import re

# Utility per tipi e conversioni

## Mappe e funzioni per conversioni valori del dataset in valori numerici o booleani

In [5]:
MESI = {
    "Gennaio": 1,
    "Febbraio": 2,
    "Marzo": 3,
    "Aprile": 4,
    "Maggio": 5,
    "Giugno": 6,
    "Luglio": 7,
    "Agosto": 8,
    "Settembre": 9,
    "Ottobre": 10,
    "Novembre": 11,
    "Dicembre": 12,
    "Non disponibile": np.nan
}

ANNI = {
    "2000": 1,
    "2001": 2,
    "1999": 3,
    "1998": 4,
    "<=1997": 5,
    ">=2002": 6,
    "Non disponibile": np.nan,
}

def convert_question_result(result: str):
    if result == "Errata":
        return False
    elif result == "Corretta":
        return True
    else:
        return np.nan
    
REGOLARITA = {
    'Regolare': 1, 
    'Posticipatario': 2, 
    'Anticipatario': 3, 
    'Dato mancante': np.nan
}

AREA_GEOGRAFICA_5_ISTAT = {
    'Sud': 1, 
    'Nord est': 2, 
    'Centro': 3, 
    'Nord ovest': 4, 
    'Isole': 5
}

AREA_GEOGRAFICA_5 = {
    'Sud': 1, 
    'Nord est': 2, 
    'Centro': 3, 
    'Nord ovest': 4, 
    'Sud e isole': 5
}

AREA_GEOGRAFICA_4 = {
    'Mezzogiorno': 1, 
    'Nord est': 2, 
    'Centro': 3, 
    'Nord ovest': 4
}

AREA_GEOGRAFICA_3 = {
    'Mezzogiorno': 1, 
    'Nord': 2, 
    'Centro': 3
}

REGIONI = {
    'Campania': 1, 
    'Emilia-Romagna': 2, 
    'Lazio': 3, 
    'Piemonte': 4, 
    'Puglia': 5,
    'Lombardia': 6, 
    'Veneto': 7, 
    'Sicilia': 8, 
    'Prov. Aut. Trento': 9,
    'Friuli-Venezia Giulia': 10, 
    'Abruzzo': 11, 
    'Liguria': 12, 
    'Toscana': 13,
    'Sardegna': 14, 
    'Calabria': 15, 
    'Molise': 16, 
    'Marche': 17, 
    'Umbria': 18, 
    'Basilicata': 19,
    'Prov. Aut. Bolzano (l. it.)': 20
}

PROVINCE = {
    '': np.nan, 
    'RE': 1, 
    'FR': 2, 
    'TO': 3, 
    'BA': 4, 
    'CO': 5, 
    'LE': 6, 
    'RO': 7, 
    'CT': 8, 
    'RM': 9, 
    'TA': 10,
    'BS': 11, 
    'SA': 12, 
    'TN': 13, 
    'UD': 14, 
    'FG': 15, 
    'LT': 16, 
    'AG': 17, 
    'CH': 18,  
    'PC': 19, 
    'TS': 20, 
    'SR': 21,
    'SP': 22, 
    'PD': 23, 
    'SI': 24, 
    'PA': 25, 
    'TP': 26, 
    'BO': 27, 
    'CA': 28, 
    'CN': 29, 
    'RC': 30, 
    'TE': 31, 
    'MI': 32,
    'LC': 33,  
    'LU': 34, 
    'FI': 35, 
    'AQ': 36, 
    'TV': 36, 
    'RG': 37, 
    'VA': 38, 
    'GO': 39, 
    'MO': 40, 
    'GE': 41, 
    'AL': 42,
    'CB': 43, 
    'PR': 44, 
    'OR': 45, 
    'VE': 46, 
    'MC': 47, 
    'NO': 48, 
    'PT': 49, 
    'MN': 50, 
    'VR': 51, 
    'PI': 52, 
    'AP': 53,
    'LO': 54, 
    'VI': 55, 
    'SV': 56, 
    'PU': 57, 
    'BG': 58, 
    'AR': 59, 
    'VT': 60, 
    'LI': 61, 
    'SS': 62, 
    'BR': 63, 
    'RA': 64,
    'TR': 65, 
    'SO': 66, 
    'IM': 67, 
    'PZ': 68, 
    'GR': 69, 
    'AN': 70, 
    'PN': 71, 
    'ME': 72, 
    'CR': 73, 
    'FE': 74, 
    'BI': 75,
    'PV': 76, 
    'PG': 77, 
    'VB': 78, 
    'BL': 79, 
    'PE': 80, 
    'CS': 81, 
    'CZ': 82, 
    'AV': 83, 
    'RN': 84, 
    'CL': 85, 
    'AT': 86,
    'MS': 87, 
    'KR': 88, 
    'RI': 89, 
    'EN': 90, 
    'CE': 91, 
    'MT': 92, 
    'VV': 93, 
    'VC': 94, 
    'NU': 95, 
    'FC': 96, 
    'PO': 97,
    'BZ': 98, 
    'BN': 99, 
    'IS': 100,
    'NA': 101, # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
    'PS': 102, # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
    'FO': 103, # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
    'LB': 104, # presente in cod_provincia_ISTAT ma non in sigla_provincia_istat
}

CITTADINANZA = {
    'Italiano': 1, 
    'Straniero II generazione': 2, 
    'Straniero I generazione': 3,
    'Dato mancante': np.nan
}

def voto_orale_decode(voto_orale: str):
    if voto_orale in ['4', '9', '7', '8', '5', '6', '10', '3', '2', '1']:
        return int(voto_orale)
    elif voto_orale == 'Non disponibile':
        return np.nan
    elif voto_orale == 'Non classificato': 
        return 0
    
def voto_scritto_decode(voto_scritto: str):
    if voto_scritto in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']:
        return int(voto_scritto)
    elif voto_scritto in ['Non disponibile', 'Senza voto scritto']:
        return np.nan
    elif voto_scritto == 'Non classificato': 
        return 0
    
PROFESSIONI = {
    '10. Non disponibile': np.nan,
    '7. Insegnante, impiegato, militare graduato': 1, 
    '2. Casalingo/a': 2,
    '5. Professionista dipendente, sottuff. militare o libero profession. (medico, av': 3,
    '8. Operaio, addetto ai servizi/socio di cooperativa': 4,
    '6. Lavoratore in proprio (commerciante, coltivatore diretto, artigiano, meccanic': 5,
    '1. Disoccupato/a': 6, 
    '4. Imprenditore/proprietario agricolo': 7,
    '3. Dirigente, docente universitario, funzionario o ufficiale militare': 8,
    '9. Pensionato/a': 9
}

TITOLI = {
    '7. Non disponibile': np.nan, 
    '4. Diploma di maturità': 1, 
    '2. Licenza media': 2,
    '6. Laurea o titolo superiore (ad esempio Dottorato di Ricerca)': 3,
    '5. Altro titolo di studio superiore al diploma (I.S.E.F., Accademia di Belle Art': 4,
    '1. Licenza elementare': 5, 
    '3. Qualifica professionale triennale': 6
}

LUOGHI_GENITORI = {
    'Italia (o Repubblica di San Marino)': 1, 
    'Unione Europea': 2,
    'Paese europeo Non UE': 3, 
    'Altro': 4, 
    'Non disponibile': np.nan
}

FREQUENZA_SCUOLA = {
    'No': False, 
    'Sì': True, 
    'Non disponibile': np.nan
}

LUOGO_DI_NASCITA = {
    'Italia (o Repubblica di San Marino)': 1, 
    'Altro': 2, 
    'Unione Europea': 3,
    'Paese europeo Non UE': 4, 
    'Non disponibile': np.nan
}

## Gestione conversione valori del dataset in tipi più facili da utilizzare

In [6]:
columns_converters = {
    "CODICE_SCUOLA": str, #identificativo della scuola
    "CODICE_PLESSO": str, #identificativo del plesso
    "CODICE_CLASSE": str, #identificato della classe
    "macrotipologia": str, #categoria di scuola
    "campione": int, #campione di riferimento
    "livello": int,
    "prog": int,
    "CODICE_STUDENTE": str, #codice dello studente
    "sesso": str, #sesso dello studente
    "mese": lambda month: MESI[month], #mese di nascita
    "anno": lambda year: ANNI[year], #anno di nascita
    "luogo": lambda luogo: LUOGO_DI_NASCITA[luogo],
    "eta": str, # cosa vuol dire eta?
    "codice_orario": lambda _: np.nan, # unico dato: Mancante di sistema
    "freq_asilo_nido": lambda frequenza: FREQUENZA_SCUOLA[frequenza],
    "freq_scuola_materna": lambda frequenza: FREQUENZA_SCUOLA[frequenza],
    "luogo_padre": lambda luogo: LUOGHI_GENITORI[luogo],
    "titolo_padre": lambda titolo: TITOLI[titolo],
    "prof_padre": lambda professione: PROFESSIONI[professione],
    "luogo_madre": lambda luogo: LUOGHI_GENITORI[luogo],
    "titolo_madre": lambda titolo: TITOLI[titolo],
    "prof_madre": lambda professione: PROFESSIONI[professione],
    "voto_scritto_ita": lambda voto: voto_scritto_decode(voto),
    "voto_orale_ita": lambda voto: voto_orale_decode(voto),
    "voto_scritto_mat": lambda voto: voto_scritto_decode(voto),
    "voto_orale_mat": lambda voto: voto_orale_decode(voto),
    "D1": lambda result: convert_question_result(result),
    "D2": lambda result: convert_question_result(result),
    "D3_a": lambda result: convert_question_result(result),
    "D3_b": lambda result: convert_question_result(result),
    "D4_a": lambda result: convert_question_result(result),
    "D4_b": lambda result: convert_question_result(result),
    "D4_c": lambda result: convert_question_result(result),
    "D4_d": lambda result: convert_question_result(result),
    "D5_a": lambda result: convert_question_result(result),
    "D5_b": lambda result: convert_question_result(result),
    "D6": lambda result: convert_question_result(result),
    "D7_a": lambda result: convert_question_result(result),
    "D7_b": lambda result: convert_question_result(result),
    "D8": lambda result: convert_question_result(result),
    "D9": lambda result: convert_question_result(result),
    "D10_a": lambda result: convert_question_result(result),
    "D10_b1": lambda result: convert_question_result(result),
    "D10_b2": lambda result: convert_question_result(result),
    "D10_b3": lambda result: convert_question_result(result),
    "D11_a": lambda result: convert_question_result(result),
    "D11_b": lambda result: convert_question_result(result),
    "D12_a": lambda result: convert_question_result(result),
    "D12_b": lambda result: convert_question_result(result),
    "D13_a": lambda result: convert_question_result(result),
    "D13_b": lambda result: convert_question_result(result),
    "D13_c": lambda result: convert_question_result(result),
    "D14": lambda result: convert_question_result(result),
    "D15": lambda result: convert_question_result(result),
    "D16_a": lambda result: convert_question_result(result),
    "D16_b": lambda result: convert_question_result(result),
    "D16_c": lambda result: convert_question_result(result),
    "D16_d": lambda result: convert_question_result(result),
    "D17_a": lambda result: convert_question_result(result),
    "D17_b": lambda result: convert_question_result(result),
    "D18": lambda result: convert_question_result(result),
    "D19_a": lambda result: convert_question_result(result),
    "D19_b": lambda result: convert_question_result(result),
    "D20": lambda result: convert_question_result(result),
    "D21": lambda result: convert_question_result(result),
    "D22": lambda result: convert_question_result(result),
    "D23_a": lambda result: convert_question_result(result),
    "D23_b": lambda result: convert_question_result(result),
    "D23_c": lambda result: convert_question_result(result),
    "D23_d": lambda result: convert_question_result(result),
    "D24_a": lambda result: convert_question_result(result),
    "D24_b": lambda result: convert_question_result(result),
    "D25": lambda result: convert_question_result(result),
    "D26_a": lambda result: convert_question_result(result),
    "D26_b": lambda result: convert_question_result(result),
    "D26_c": lambda result: convert_question_result(result),
    "D26_d": lambda result: convert_question_result(result),
    "regolarità": lambda regular: REGOLARITA[regular],
    "cittadinanza": lambda cittadinanza: CITTADINANZA[cittadinanza],
    "cod_provincia_ISTAT": lambda province_istat: PROVINCE[province_istat.upper()],
    "sigla_provincia_istat": lambda province: PROVINCE[province],
    "Nome_reg": lambda regione: REGIONI[" ".join(regione.split())],
    "Cod_reg": lambda codice: REGIONI[codice],
    "Areageo_3": lambda area: AREA_GEOGRAFICA_3[area],
    "Areageo_4": lambda area: AREA_GEOGRAFICA_4[area],
    "Areageo_5": lambda area: AREA_GEOGRAFICA_5[area],
    "Areageo_5_Istat": lambda area: AREA_GEOGRAFICA_5_ISTAT[area],
    "Pon": lambda pon: True if pon == "Area_Pon" else False, # lo studente appartiene all'aera Pon oppure no
    "pu_ma_gr": int,
    "pu_ma_no": float,
    "Fattore_correzione_new": float,
    "Cheating": float,
    "PesoClasse": lambda val: float(val) if val != "" else np.nan,
    "PesoScuola": lambda val: float(val) if val != "" else np.nan,
    "PesoTotale_Matematica": lambda val: float(val) if val != "" else np.nan,
    "WLE_MAT": float,
    "WLE_MAT_200": float,
    "WLE_MAT_200_CORR": float,
    "pu_ma_no_corr": float,
    "n_stud_prev": lambda val: int(float(val)),
    "n_classi_prev": lambda val: int(float(val)),
    "LIVELLI": int,
    "DROPOUT": bool,
}

# Lettura del dataset

In [7]:
dataset = pd.read_csv("Data/invalsi_mat_2014.csv", 
                      sep=';', 
                      converters=columns_converters)

# Matrici correlazione tra domande

In [None]:
questions_columns = [col for col in columns_converters.keys() if re.search("^D\d", col)]
questions_dataset = dataset[questions_columns]

## Standard correlation coefficient (pearson method)

In [None]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='pearson')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.159962,0.117898,0.122366,0.110744,0.120688,0.111168,0.090119,0.141095,0.13284,0.092501,0.16226,0.156937,0.114144,0.137984,0.05174,0.072593,0.090781,0.074167,0.100242,0.11005,0.155488,0.145865,0.104897,0.112098,0.148164,0.109666,0.153948,0.059813,0.049402,0.105747,0.041508,0.152072,0.153062,0.134426,0.13477,0.166495,0.163874,0.128623,0.120701,0.094943,0.07036,0.106159,0.092624,0.148165,0.155592,0.100861,0.05854,0.112296,0.043786,0.045914
D2,0.159962,1.0,0.127355,0.115453,0.107185,0.123625,0.116068,0.085145,0.156427,0.144796,0.097521,0.167598,0.165464,0.099342,0.135203,0.063148,0.084418,0.104845,0.067833,0.097236,0.120287,0.159201,0.151826,0.114048,0.12417,0.151979,0.116963,0.141717,0.065317,0.06203,0.113806,0.04756,0.139755,0.141667,0.132132,0.136285,0.155686,0.167342,0.109348,0.113168,0.084681,0.062102,0.107689,0.086031,0.143441,0.155675,0.114016,0.055231,0.111418,0.033378,0.038129
D3_a,0.117898,0.127355,1.0,0.16435,0.093999,0.102824,0.100124,0.075344,0.115609,0.114272,0.100369,0.138519,0.157017,0.088901,0.128263,0.070578,0.063826,0.08364,0.072489,0.089704,0.13326,0.130668,0.135893,0.136484,0.111671,0.140222,0.115144,0.117141,0.067264,0.047743,0.09249,0.043972,0.130433,0.131943,0.123215,0.113853,0.135972,0.144457,0.103196,0.103436,0.073166,0.056038,0.085141,0.072087,0.14267,0.133937,0.123459,0.055894,0.091226,0.034187,0.039141
D3_b,0.122366,0.115453,0.16435,1.0,0.080663,0.08281,0.079639,0.06444,0.102201,0.090864,0.09522,0.142141,0.142324,0.099218,0.105829,0.050737,0.047643,0.076841,0.055321,0.077192,0.097252,0.126303,0.135465,0.076703,0.094614,0.1071,0.107128,0.120877,0.060221,0.037456,0.076204,0.036649,0.118962,0.131351,0.115114,0.107417,0.137407,0.126343,0.097087,0.086762,0.07087,0.046162,0.071725,0.05869,0.120678,0.117574,0.077571,0.038398,0.078307,0.025828,0.031192
D4_a,0.110744,0.107185,0.093999,0.080663,1.0,0.456729,0.54146,0.201024,0.100737,0.10701,0.099606,0.117602,0.135622,0.068639,0.108041,0.044718,0.077009,0.081148,0.076499,0.087598,0.119104,0.124756,0.111296,0.093088,0.099104,0.131657,0.102371,0.103647,0.058078,0.049274,0.10231,0.037122,0.117073,0.145262,0.138158,0.105413,0.161411,0.129432,0.092179,0.101252,0.084268,0.059309,0.092728,0.084023,0.12438,0.134125,0.105965,0.072902,0.110437,0.040425,0.044465
D4_b,0.120688,0.123625,0.102824,0.08281,0.456729,1.0,0.322752,0.464205,0.109857,0.109942,0.114791,0.123361,0.143548,0.067163,0.126853,0.043393,0.090215,0.082088,0.085947,0.0941,0.123554,0.138308,0.126969,0.101312,0.10574,0.146649,0.113778,0.116214,0.072831,0.04864,0.118705,0.042031,0.133976,0.16882,0.148576,0.115849,0.183918,0.147872,0.105762,0.109053,0.084958,0.055979,0.109084,0.095962,0.133955,0.14693,0.109042,0.076773,0.117818,0.042556,0.045142
D4_c,0.111168,0.116068,0.100124,0.079639,0.54146,0.322752,1.0,0.290724,0.112901,0.115076,0.10755,0.131348,0.148456,0.063705,0.108592,0.050076,0.088481,0.079896,0.068339,0.081483,0.127304,0.128313,0.115412,0.105587,0.11546,0.141836,0.113321,0.10543,0.060873,0.06131,0.113643,0.044599,0.116458,0.151262,0.140219,0.107099,0.165851,0.131161,0.08894,0.096646,0.079245,0.055508,0.108046,0.090688,0.12922,0.133364,0.119196,0.068319,0.113168,0.046992,0.047882
D4_d,0.090119,0.085145,0.075344,0.06444,0.201024,0.464205,0.290724,1.0,0.056138,0.060351,0.08633,0.064151,0.079615,0.066743,0.111214,0.015591,0.056401,0.059031,0.07235,0.089018,0.070252,0.099486,0.095921,0.053631,0.047977,0.092677,0.069109,0.090329,0.062125,0.025824,0.074612,0.030113,0.113551,0.129508,0.094959,0.087818,0.136913,0.114976,0.103952,0.096069,0.066441,0.043613,0.0715,0.055009,0.094804,0.112197,0.04829,0.055956,0.087019,0.044523,0.036568
D5_a,0.141095,0.156427,0.115609,0.102201,0.100737,0.109857,0.112901,0.056138,1.0,0.246935,0.088119,0.208602,0.196014,0.102484,0.108142,0.08041,0.099544,0.128741,0.073748,0.092682,0.137126,0.169236,0.165489,0.123806,0.154307,0.168529,0.130108,0.138934,0.05502,0.077723,0.139551,0.055823,0.12615,0.134514,0.135548,0.136257,0.146343,0.157701,0.079157,0.100498,0.082491,0.061871,0.12216,0.087926,0.123548,0.145367,0.152409,0.053531,0.119586,0.01312,0.026196
D5_b,0.13284,0.144796,0.114272,0.090864,0.10701,0.109942,0.115076,0.060351,0.246935,1.0,0.092956,0.171029,0.178149,0.08247,0.105408,0.08044,0.0957,0.106401,0.070168,0.086166,0.141502,0.13564,0.119319,0.13442,0.148389,0.163288,0.128379,0.11889,0.057226,0.070607,0.12223,0.055341,0.123041,0.13566,0.132823,0.114968,0.146091,0.137408,0.088519,0.099215,0.086817,0.07163,0.11238,0.101806,0.13357,0.141657,0.1454,0.071285,0.112515,0.037611,0.047535


## Kendall Tau correlation coefficient

In [None]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='kendall')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.159962,0.117898,0.122366,0.110744,0.120688,0.111168,0.090119,0.141095,0.13284,0.092501,0.16226,0.156937,0.114144,0.137984,0.05174,0.072593,0.090781,0.074167,0.100242,0.11005,0.155488,0.145865,0.104897,0.112098,0.148164,0.109666,0.153948,0.059813,0.049402,0.105747,0.041508,0.152072,0.153062,0.134426,0.13477,0.166495,0.163874,0.128623,0.120701,0.094943,0.07036,0.106159,0.092624,0.148165,0.155592,0.100861,0.05854,0.112296,0.043786,0.045914
D2,0.159962,1.0,0.127355,0.115453,0.107185,0.123625,0.116068,0.085145,0.156427,0.144796,0.097521,0.167598,0.165464,0.099342,0.135203,0.063148,0.084418,0.104845,0.067833,0.097236,0.120287,0.159201,0.151826,0.114048,0.12417,0.151979,0.116963,0.141717,0.065317,0.06203,0.113806,0.04756,0.139755,0.141667,0.132132,0.136285,0.155686,0.167342,0.109348,0.113168,0.084681,0.062102,0.107689,0.086031,0.143441,0.155675,0.114016,0.055231,0.111418,0.033378,0.038129
D3_a,0.117898,0.127355,1.0,0.16435,0.093999,0.102824,0.100124,0.075344,0.115609,0.114272,0.100369,0.138519,0.157017,0.088901,0.128263,0.070578,0.063826,0.08364,0.072489,0.089704,0.13326,0.130668,0.135893,0.136484,0.111671,0.140222,0.115144,0.117141,0.067264,0.047743,0.09249,0.043972,0.130433,0.131943,0.123215,0.113853,0.135972,0.144457,0.103196,0.103436,0.073166,0.056038,0.085141,0.072087,0.14267,0.133937,0.123459,0.055894,0.091226,0.034187,0.039141
D3_b,0.122366,0.115453,0.16435,1.0,0.080663,0.08281,0.079639,0.06444,0.102201,0.090864,0.09522,0.142141,0.142324,0.099218,0.105829,0.050737,0.047643,0.076841,0.055321,0.077192,0.097252,0.126303,0.135465,0.076703,0.094614,0.1071,0.107128,0.120877,0.060221,0.037456,0.076204,0.036649,0.118962,0.131351,0.115114,0.107417,0.137407,0.126343,0.097087,0.086762,0.07087,0.046162,0.071725,0.05869,0.120678,0.117574,0.077571,0.038398,0.078307,0.025828,0.031192
D4_a,0.110744,0.107185,0.093999,0.080663,1.0,0.456729,0.54146,0.201024,0.100737,0.10701,0.099606,0.117602,0.135622,0.068639,0.108041,0.044718,0.077009,0.081148,0.076499,0.087598,0.119104,0.124756,0.111296,0.093088,0.099104,0.131657,0.102371,0.103647,0.058078,0.049274,0.10231,0.037122,0.117073,0.145262,0.138158,0.105413,0.161411,0.129432,0.092179,0.101252,0.084268,0.059309,0.092728,0.084023,0.12438,0.134125,0.105965,0.072902,0.110437,0.040425,0.044465
D4_b,0.120688,0.123625,0.102824,0.08281,0.456729,1.0,0.322752,0.464205,0.109857,0.109942,0.114791,0.123361,0.143548,0.067163,0.126853,0.043393,0.090215,0.082088,0.085947,0.0941,0.123554,0.138308,0.126969,0.101312,0.10574,0.146649,0.113778,0.116214,0.072831,0.04864,0.118705,0.042031,0.133976,0.16882,0.148576,0.115849,0.183918,0.147872,0.105762,0.109053,0.084958,0.055979,0.109084,0.095962,0.133955,0.14693,0.109042,0.076773,0.117818,0.042556,0.045142
D4_c,0.111168,0.116068,0.100124,0.079639,0.54146,0.322752,1.0,0.290724,0.112901,0.115076,0.10755,0.131348,0.148456,0.063705,0.108592,0.050076,0.088481,0.079896,0.068339,0.081483,0.127304,0.128313,0.115412,0.105587,0.11546,0.141836,0.113321,0.10543,0.060873,0.06131,0.113643,0.044599,0.116458,0.151262,0.140219,0.107099,0.165851,0.131161,0.08894,0.096646,0.079245,0.055508,0.108046,0.090688,0.12922,0.133364,0.119196,0.068319,0.113168,0.046992,0.047882
D4_d,0.090119,0.085145,0.075344,0.06444,0.201024,0.464205,0.290724,1.0,0.056138,0.060351,0.08633,0.064151,0.079615,0.066743,0.111214,0.015591,0.056401,0.059031,0.07235,0.089018,0.070252,0.099486,0.095921,0.053631,0.047977,0.092677,0.069109,0.090329,0.062125,0.025824,0.074612,0.030113,0.113551,0.129508,0.094959,0.087818,0.136913,0.114976,0.103952,0.096069,0.066441,0.043613,0.0715,0.055009,0.094804,0.112197,0.04829,0.055956,0.087019,0.044523,0.036568
D5_a,0.141095,0.156427,0.115609,0.102201,0.100737,0.109857,0.112901,0.056138,1.0,0.246935,0.088119,0.208602,0.196014,0.102484,0.108142,0.08041,0.099544,0.128741,0.073748,0.092682,0.137126,0.169236,0.165489,0.123806,0.154307,0.168529,0.130108,0.138934,0.05502,0.077723,0.139551,0.055823,0.12615,0.134514,0.135548,0.136257,0.146343,0.157701,0.079157,0.100498,0.082491,0.061871,0.12216,0.087926,0.123548,0.145367,0.152409,0.053531,0.119586,0.01312,0.026196
D5_b,0.13284,0.144796,0.114272,0.090864,0.10701,0.109942,0.115076,0.060351,0.246935,1.0,0.092956,0.171029,0.178149,0.08247,0.105408,0.08044,0.0957,0.106401,0.070168,0.086166,0.141502,0.13564,0.119319,0.13442,0.148389,0.163288,0.128379,0.11889,0.057226,0.070607,0.12223,0.055341,0.123041,0.13566,0.132823,0.114968,0.146091,0.137408,0.088519,0.099215,0.086817,0.07163,0.11238,0.101806,0.13357,0.141657,0.1454,0.071285,0.112515,0.037611,0.047535


## Spearman rank correlation

In [None]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='spearman')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.159962,0.117898,0.122366,0.110744,0.120688,0.111168,0.090119,0.141095,0.13284,0.092501,0.16226,0.156937,0.114144,0.137984,0.05174,0.072593,0.090781,0.074167,0.100242,0.11005,0.155488,0.145865,0.104897,0.112098,0.148164,0.109666,0.153948,0.059813,0.049402,0.105747,0.041508,0.152072,0.153062,0.134426,0.13477,0.166495,0.163874,0.128623,0.120701,0.094943,0.07036,0.106159,0.092624,0.148165,0.155592,0.100861,0.05854,0.112296,0.043786,0.045914
D2,0.159962,1.0,0.127355,0.115453,0.107185,0.123625,0.116068,0.085145,0.156427,0.144796,0.097521,0.167598,0.165464,0.099342,0.135203,0.063148,0.084418,0.104845,0.067833,0.097236,0.120287,0.159201,0.151826,0.114048,0.12417,0.151979,0.116963,0.141717,0.065317,0.06203,0.113806,0.04756,0.139755,0.141667,0.132132,0.136285,0.155686,0.167342,0.109348,0.113168,0.084681,0.062102,0.107689,0.086031,0.143441,0.155675,0.114016,0.055231,0.111418,0.033378,0.038129
D3_a,0.117898,0.127355,1.0,0.16435,0.093999,0.102824,0.100124,0.075344,0.115609,0.114272,0.100369,0.138519,0.157017,0.088901,0.128263,0.070578,0.063826,0.08364,0.072489,0.089704,0.13326,0.130668,0.135893,0.136484,0.111671,0.140222,0.115144,0.117141,0.067264,0.047743,0.09249,0.043972,0.130433,0.131943,0.123215,0.113853,0.135972,0.144457,0.103196,0.103436,0.073166,0.056038,0.085141,0.072087,0.14267,0.133937,0.123459,0.055894,0.091226,0.034187,0.039141
D3_b,0.122366,0.115453,0.16435,1.0,0.080663,0.08281,0.079639,0.06444,0.102201,0.090864,0.09522,0.142141,0.142324,0.099218,0.105829,0.050737,0.047643,0.076841,0.055321,0.077192,0.097252,0.126303,0.135465,0.076703,0.094614,0.1071,0.107128,0.120877,0.060221,0.037456,0.076204,0.036649,0.118962,0.131351,0.115114,0.107417,0.137407,0.126343,0.097087,0.086762,0.07087,0.046162,0.071725,0.05869,0.120678,0.117574,0.077571,0.038398,0.078307,0.025828,0.031192
D4_a,0.110744,0.107185,0.093999,0.080663,1.0,0.456729,0.54146,0.201024,0.100737,0.10701,0.099606,0.117602,0.135622,0.068639,0.108041,0.044718,0.077009,0.081148,0.076499,0.087598,0.119104,0.124756,0.111296,0.093088,0.099104,0.131657,0.102371,0.103647,0.058078,0.049274,0.10231,0.037122,0.117073,0.145262,0.138158,0.105413,0.161411,0.129432,0.092179,0.101252,0.084268,0.059309,0.092728,0.084023,0.12438,0.134125,0.105965,0.072902,0.110437,0.040425,0.044465
D4_b,0.120688,0.123625,0.102824,0.08281,0.456729,1.0,0.322752,0.464205,0.109857,0.109942,0.114791,0.123361,0.143548,0.067163,0.126853,0.043393,0.090215,0.082088,0.085947,0.0941,0.123554,0.138308,0.126969,0.101312,0.10574,0.146649,0.113778,0.116214,0.072831,0.04864,0.118705,0.042031,0.133976,0.16882,0.148576,0.115849,0.183918,0.147872,0.105762,0.109053,0.084958,0.055979,0.109084,0.095962,0.133955,0.14693,0.109042,0.076773,0.117818,0.042556,0.045142
D4_c,0.111168,0.116068,0.100124,0.079639,0.54146,0.322752,1.0,0.290724,0.112901,0.115076,0.10755,0.131348,0.148456,0.063705,0.108592,0.050076,0.088481,0.079896,0.068339,0.081483,0.127304,0.128313,0.115412,0.105587,0.11546,0.141836,0.113321,0.10543,0.060873,0.06131,0.113643,0.044599,0.116458,0.151262,0.140219,0.107099,0.165851,0.131161,0.08894,0.096646,0.079245,0.055508,0.108046,0.090688,0.12922,0.133364,0.119196,0.068319,0.113168,0.046992,0.047882
D4_d,0.090119,0.085145,0.075344,0.06444,0.201024,0.464205,0.290724,1.0,0.056138,0.060351,0.08633,0.064151,0.079615,0.066743,0.111214,0.015591,0.056401,0.059031,0.07235,0.089018,0.070252,0.099486,0.095921,0.053631,0.047977,0.092677,0.069109,0.090329,0.062125,0.025824,0.074612,0.030113,0.113551,0.129508,0.094959,0.087818,0.136913,0.114976,0.103952,0.096069,0.066441,0.043613,0.0715,0.055009,0.094804,0.112197,0.04829,0.055956,0.087019,0.044523,0.036568
D5_a,0.141095,0.156427,0.115609,0.102201,0.100737,0.109857,0.112901,0.056138,1.0,0.246935,0.088119,0.208602,0.196014,0.102484,0.108142,0.08041,0.099544,0.128741,0.073748,0.092682,0.137126,0.169236,0.165489,0.123806,0.154307,0.168529,0.130108,0.138934,0.05502,0.077723,0.139551,0.055823,0.12615,0.134514,0.135548,0.136257,0.146343,0.157701,0.079157,0.100498,0.082491,0.061871,0.12216,0.087926,0.123548,0.145367,0.152409,0.053531,0.119586,0.01312,0.026196
D5_b,0.13284,0.144796,0.114272,0.090864,0.10701,0.109942,0.115076,0.060351,0.246935,1.0,0.092956,0.171029,0.178149,0.08247,0.105408,0.08044,0.0957,0.106401,0.070168,0.086166,0.141502,0.13564,0.119319,0.13442,0.148389,0.163288,0.128379,0.11889,0.057226,0.070607,0.12223,0.055341,0.123041,0.13566,0.132823,0.114968,0.146091,0.137408,0.088519,0.099215,0.086817,0.07163,0.11238,0.101806,0.13357,0.141657,0.1454,0.071285,0.112515,0.037611,0.047535


Le matrici di correlazione non evidenziano significative dipendenze lineari tra le domande: i valori più elevati appaiono in corrispondenza di domande consecutive, il più delle volte parti della stessa domanda (e.g. D7, D3 e D4). Conseguentemente, non è possibile realizzare alcuna riduzione di dimensionalità.

## Standard correlation coefficient (pearson method) in Areageo_3 == 1

In [12]:
questions_columns = [col for col in columns_converters.keys() if re.search("^D\d", col)]
questions_dataset = dataset.query('Areageo_3 == 2')
questions_dataset = questions_dataset[questions_columns]

In [13]:
questions_correlate_matrix_pearson = questions_dataset.corr(method='pearson')
questions_correlate_matrix_pearson.style.background_gradient(cmap='coolwarm')

Unnamed: 0,D1,D2,D3_a,D3_b,D4_a,D4_b,D4_c,D4_d,D5_a,D5_b,D6,D7_a,D7_b,D8,D9,D10_a,D10_b1,D10_b2,D10_b3,D11_a,D11_b,D12_a,D12_b,D13_a,D13_b,D13_c,D14,D15,D16_a,D16_b,D16_c,D16_d,D17_a,D17_b,D18,D19_a,D19_b,D20,D21,D22,D23_a,D23_b,D23_c,D23_d,D24_a,D24_b,D25,D26_a,D26_b,D26_c,D26_d
D1,1.0,0.139418,0.100961,0.101778,0.115849,0.130827,0.113153,0.103342,0.127963,0.119773,0.109024,0.162314,0.157559,0.082701,0.119667,0.043125,0.076436,0.084519,0.075713,0.083302,0.101555,0.15269,0.143442,0.098595,0.110269,0.141875,0.117757,0.141299,0.06282,0.043654,0.109351,0.041699,0.143732,0.177865,0.137098,0.122782,0.181798,0.150857,0.113332,0.100234,0.098323,0.072677,0.108491,0.099258,0.14478,0.143512,0.090086,0.059718,0.118191,0.03983,0.03715
D2,0.139418,1.0,0.11742,0.097379,0.111939,0.139734,0.122739,0.103207,0.142251,0.132192,0.112526,0.1552,0.160224,0.068293,0.126487,0.050455,0.089791,0.095683,0.073601,0.083315,0.116104,0.160641,0.148803,0.103511,0.125213,0.15106,0.123236,0.12654,0.075532,0.051909,0.115418,0.04705,0.132628,0.160515,0.135876,0.124126,0.171696,0.158067,0.096875,0.097978,0.082599,0.061623,0.111175,0.088515,0.141977,0.152212,0.103693,0.054899,0.114727,0.024192,0.023904
D3_a,0.100961,0.11742,1.0,0.121466,0.094422,0.111686,0.100112,0.080986,0.111012,0.103205,0.108894,0.127267,0.145019,0.059111,0.116096,0.04948,0.065581,0.080055,0.069308,0.074917,0.109772,0.132082,0.134475,0.103136,0.112537,0.134013,0.113034,0.098825,0.075322,0.040318,0.098128,0.040677,0.114579,0.144196,0.121071,0.097119,0.145056,0.139071,0.092371,0.086031,0.063684,0.047652,0.082505,0.071686,0.118423,0.124403,0.097918,0.051261,0.09263,0.024124,0.022813
D3_b,0.101778,0.097379,0.121466,1.0,0.079477,0.090975,0.078076,0.071143,0.091342,0.086242,0.097216,0.113045,0.124287,0.066938,0.083645,0.037498,0.053094,0.067068,0.052204,0.059247,0.086034,0.111898,0.096104,0.067577,0.08274,0.100552,0.102619,0.101497,0.071368,0.034829,0.078043,0.034729,0.102038,0.129908,0.103777,0.08522,0.123916,0.109568,0.081963,0.067471,0.065572,0.039903,0.067976,0.05776,0.100184,0.100107,0.063142,0.034018,0.071468,0.022738,0.022013
D4_a,0.115849,0.111939,0.094422,0.079477,1.0,0.454178,0.558265,0.213346,0.110073,0.104036,0.113888,0.124553,0.136338,0.06322,0.117067,0.041507,0.079866,0.083419,0.083294,0.090928,0.116076,0.136123,0.125518,0.090218,0.104358,0.138379,0.105585,0.110193,0.061634,0.043806,0.102342,0.033662,0.126123,0.165413,0.149023,0.112725,0.184056,0.144816,0.098697,0.10219,0.075169,0.05922,0.087341,0.077845,0.140623,0.148416,0.095214,0.06546,0.116134,0.028369,0.025755
D4_b,0.130827,0.139734,0.111686,0.090975,0.454178,1.0,0.34484,0.477495,0.127791,0.112596,0.137271,0.139584,0.152804,0.064741,0.147409,0.038778,0.094073,0.082464,0.095851,0.102608,0.125338,0.160537,0.148837,0.101641,0.117669,0.163371,0.122055,0.132428,0.088133,0.040673,0.123869,0.045705,0.152788,0.197677,0.160963,0.131132,0.213997,0.169213,0.117712,0.112769,0.081631,0.060139,0.10758,0.094358,0.156917,0.171826,0.105256,0.07574,0.131779,0.03107,0.032377
D4_c,0.113153,0.122739,0.100112,0.078076,0.558265,0.34484,1.0,0.291795,0.116422,0.109598,0.114693,0.135863,0.145619,0.059073,0.125826,0.038099,0.089714,0.07753,0.074185,0.08571,0.11768,0.137361,0.128869,0.096379,0.112624,0.148266,0.110037,0.113788,0.067889,0.049263,0.107494,0.040364,0.130017,0.169095,0.13873,0.109528,0.181156,0.146771,0.097556,0.094512,0.073944,0.052661,0.100232,0.084031,0.142863,0.148719,0.10211,0.059702,0.115453,0.032446,0.032591
D4_d,0.103342,0.103207,0.080986,0.071143,0.213346,0.477495,0.291795,1.0,0.077953,0.073372,0.114824,0.095142,0.107258,0.065066,0.127117,0.018294,0.070759,0.067646,0.088672,0.094471,0.080704,0.126349,0.122391,0.058109,0.07588,0.108714,0.096678,0.10525,0.072264,0.029944,0.095309,0.031462,0.132189,0.171022,0.122803,0.100382,0.180073,0.13753,0.11397,0.099498,0.073302,0.05237,0.083568,0.067536,0.115536,0.133262,0.062597,0.06395,0.106564,0.039404,0.035606
D5_a,0.127963,0.142251,0.111012,0.091342,0.110073,0.127791,0.116422,0.077953,1.0,0.217491,0.09246,0.191704,0.185048,0.088242,0.110352,0.060827,0.101783,0.124374,0.083171,0.085763,0.13074,0.176372,0.168371,0.116633,0.141662,0.17027,0.123422,0.134191,0.063187,0.063847,0.137228,0.047657,0.130835,0.145088,0.134157,0.13599,0.157278,0.161833,0.073483,0.094341,0.076925,0.059926,0.124309,0.077766,0.129171,0.150972,0.136849,0.049488,0.126983,-0.005503,0.010104
D5_b,0.119773,0.132192,0.103205,0.086242,0.104036,0.112596,0.109598,0.073372,0.217491,1.0,0.095013,0.154999,0.161125,0.061574,0.102083,0.059922,0.08421,0.091944,0.065594,0.078348,0.124796,0.134008,0.117346,0.115611,0.132322,0.153661,0.112911,0.108496,0.060586,0.051111,0.11086,0.044401,0.115054,0.135091,0.119648,0.106628,0.148562,0.131751,0.081105,0.085656,0.079536,0.064707,0.101378,0.088539,0.128644,0.133805,0.111335,0.057272,0.106671,0.02085,0.02725


# ESEGUI QUEST'ULTIMA CELLA PER SMONTARE GDRIVE

In [None]:
drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')

All changes made in this colab session should now be visible in Drive.
