<a href="https://colab.research.google.com/github/Gus-1003/ENEM_2021-Data_analysis/blob/main/Sistema_Enem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bibliotecas

In [1]:
# Biblioteca Pandas: usada para manipulação de dados em formato tabular, facilitando a análise e processamento desses dados;
import pandas as pd

# NumPy oferece operações matemáticas poderosas para arrays e matrizes;
import numpy as np

# A biblioteca Matplotlib é uma das mais usadas para visualização de dados, oferecendo diversas ferramentas e opções de plotagem;
import matplotlib.pyplot as plt

# A biblioteca Seaborn é construída em cima do Matplotlib, adicionando ainda mais recursos para criar gráficos mais sofisticados e visualmente atraentes.
import seaborn as sns

In [2]:
# importe da biblioteca drive do Google Colab, que é usada para fazer a conexão com o Google Drive.
from google.colab import drive

# o método mount() para montar o Google Drive na sessão atual do Colab. Isso significa que a sessão do Colab agora terá acesso aos arquivos armazenados no Google Drive.
drive.mount('/content/drive/')

''' comando %cd é usado para mudar o diretório atual para a pasta MyDrive dentro do Google Drive, para que os arquivos dentro dessa
pasta possam ser facilmente acessados pelo código em Python que será executado na sessão do Colab.'''

%cd /content/drive/MyDrive/

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive


# Leitura dataset:

In [3]:
"""Para carregar os dados em Python, você pode usar a função read_csv() do pandas para carregar dados de um arquivo CSV,
ou outras funções semelhantes para carregar dados de outros tipos de arquivos:"""

dados = pd.read_csv('/content/drive/MyDrive/Pesquisa_Enem2021/Base de Dados/MICRODADOS_ENEM_2021.csv', encoding='latin-1', delimiter=";", quotechar='"')

# Pré-processamento:

## Limpeza dos dados nulos:

In [4]:
'''O comando dados.dropna() é utilizado para remover as linhas com valores faltantes (NaN) do DataFrame dados.
O resultado da operação é armazenado na variável dados_sem_nulos.'''

# contagem dos valores NaN por coluna
nulos = dados.isnull().sum()

# seleciona as colunas com mais de 1.500.000 valores NaN
colunas_remover = nulos[nulos > 1500000].index

# remove as colunas selecionadas
dados_sem_nulos = dados.drop(columns=colunas_remover)

dados_sem_nulos.shape

(3389832, 68)

## Filtragem dos atributos:

In [5]:
df = dados_sem_nulos[['TP_PRESENCA_CN', 'CO_PROVA_CN','NU_NOTA_CN','TX_RESPOSTAS_CN']]

## Garantia da Presença:

In [6]:
'''O argumento index do método drop() é utilizado para remover as linhas que foram selecionadas, utilizando o atributo index do DataFrame resultante
da seleção: dados[dados.TP_PRESENCA_CN == 0].index.'''

# Apagando o registro dos alunos ausentes:
df = df.drop(index=df[df.TP_PRESENCA_CN == 0].index)

# Apagando o registro dos alunos eliminados:
df = df.drop(index=df[df.TP_PRESENCA_CN == 2].index)

df.shape

(2245844, 4)

## Remoção da coluna de presença:

In [7]:
df.drop('TP_PRESENCA_CN', axis=1, inplace=True)
df.head()

Unnamed: 0,CO_PROVA_CN,NU_NOTA_CN,TX_RESPOSTAS_CN
1,912.0,505.9,DCBCCBDBCCCCCCDDCDCCCACCABCCECCCCAADCCCBBCADE
3,911.0,580.7,CCABBDAEDBEBEDDCAAABBDAADBDCACACEDAABCEABAEBC
4,912.0,497.7,BABCDCCBEBCD.DEDCABCBDEDAABEDADBDBCDECCAEDBCB
8,910.0,487.4,BBBDCCBDCCEDBBBCACEBCDADEEADCDEBEDDEBBEBEEECB
9,909.0,507.6,BAEDEBDECCEAEDDCBDEDAEECEEEDBDECBDCECBCCDABED


## Reset index:

In [8]:
'''o método reset_index() do objeto DataFrame do pandas para redefinir o índice das linhas do DataFrame dados.'''

df.reset_index(drop=True, inplace=True)

# Processamento:

## Separando as questões:

In [9]:
# Identificando a quantidade de quetões - Esse código é um exemplo de como criar uma lista com os valores de uma coluna específica de um DataFrame do pandas.
questoes = []

for i, gabarito in enumerate(list(df['TX_RESPOSTAS_CN'].loc[0:])):
  questoes.append(gabarito)

## Listando as respostas:

In [10]:
# Construindo as listas de repostas
respostas = []

for i in range(45):
  respostas.append([aluno[i] for aluno in questoes])

## Renomeando as questões:

In [11]:
# Renomeando e preenchendo as Colunas do dicionario
for i in range(45):
  df.loc[:, 'q' + str(i + 1)] = respostas[i]

# Pré-processamento 2:

## Removendo coluna gabarito:

In [12]:
df.drop('TX_RESPOSTAS_CN', axis=1, inplace=True)
df

Unnamed: 0,CO_PROVA_CN,NU_NOTA_CN,q1,q2,q3,q4,q5,q6,q7,q8,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,912.0,505.9,D,C,B,C,C,B,D,B,...,D,C,C,C,B,B,C,A,D,E
1,911.0,580.7,C,C,A,B,B,D,A,E,...,A,B,C,E,A,B,A,E,B,C
2,912.0,497.7,B,A,B,C,D,C,C,B,...,D,E,C,C,A,E,D,B,C,B
3,910.0,487.4,B,B,B,D,C,C,B,D,...,E,B,B,E,B,E,E,E,C,B
4,909.0,507.6,B,A,E,D,E,B,D,E,...,E,C,B,C,C,D,A,B,E,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2245839,1048.0,506.0,A,D,B,D,A,D,A,C,...,E,D,B,A,A,D,C,B,E,D
2245840,1048.0,435.6,C,B,A,C,A,C,C,D,...,C,A,D,C,A,C,C,D,C,D
2245841,1045.0,576.9,B,E,C,E,E,E,E,D,...,B,A,A,C,C,C,B,E,E,A
2245842,1045.0,449.9,D,A,A,D,C,E,A,C,...,C,A,E,D,A,C,D,E,E,D


## Apagando outros tipos de prova:

In [13]:
df.drop(df[df['CO_PROVA_CN'] > 912].index, inplace = True)
df['CO_PROVA_CN'].value_counts()

910.0    540520
911.0    534778
912.0    534535
909.0    533925
Name: CO_PROVA_CN, dtype: int64

## Reset index:

In [14]:
dfResposta = df.reset_index(drop=True)
display(dfResposta)

Unnamed: 0,CO_PROVA_CN,NU_NOTA_CN,q1,q2,q3,q4,q5,q6,q7,q8,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,912.0,505.9,D,C,B,C,C,B,D,B,...,D,C,C,C,B,B,C,A,D,E
1,911.0,580.7,C,C,A,B,B,D,A,E,...,A,B,C,E,A,B,A,E,B,C
2,912.0,497.7,B,A,B,C,D,C,C,B,...,D,E,C,C,A,E,D,B,C,B
3,910.0,487.4,B,B,B,D,C,C,B,D,...,E,B,B,E,B,E,E,E,C,B
4,909.0,507.6,B,A,E,D,E,B,D,E,...,E,C,B,C,C,D,A,B,E,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2143753,911.0,387.9,D,E,B,D,A,C,E,B,...,B,C,D,D,A,C,E,E,D,B
2143754,910.0,452.4,B,A,C,E,C,D,A,E,...,E,B,A,D,C,E,B,D,E,A
2143755,912.0,407.2,C,A,D,D,D,D,D,E,...,E,D,D,A,D,B,E,D,C,D
2143756,911.0,540.8,C,C,C,C,A,B,A,D,...,A,C,E,A,D,A,E,C,B,B


## Separando os tipos de prova:

In [15]:
# Separação das provas por cor
enemAzul = dfResposta[dfResposta['CO_PROVA_CN'] == 909].reset_index(drop=True)
enemAmarela = dfResposta[dfResposta['CO_PROVA_CN'] == 910].reset_index(drop=True)
enemCinza = dfResposta[dfResposta['CO_PROVA_CN'] == 911].reset_index(drop=True)
enemRosa = dfResposta[dfResposta['CO_PROVA_CN'] == 912].reset_index(drop=True)

## Removendo o codigo da prova:

In [16]:
enemAmarela = enemAmarela.drop('CO_PROVA_CN', axis=1)
enemAmarela

Unnamed: 0,NU_NOTA_CN,q1,q2,q3,q4,q5,q6,q7,q8,q9,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,487.4,B,B,B,D,C,C,B,D,C,...,E,B,B,E,B,E,E,E,C,B
1,378.5,A,C,E,E,D,B,C,C,B,...,A,E,A,C,B,D,C,E,B,D
2,516.3,A,A,E,C,C,D,A,B,C,...,E,C,C,D,B,B,E,E,D,E
3,595.2,A,D,D,C,C,A,C,A,C,...,A,E,C,B,C,E,D,A,C,E
4,386.9,B,B,E,C,E,C,D,A,D,...,B,D,C,E,D,D,A,D,E,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540515,399.6,C,A,C,C,E,E,B,D,E,...,C,E,C,A,C,B,C,A,A,C
540516,389.3,C,C,E,D,E,C,B,A,B,...,E,C,D,B,E,B,B,D,A,C
540517,563.2,A,B,E,D,C,C,E,B,D,...,A,B,B,A,E,B,C,D,A,E
540518,452.4,B,A,C,E,C,D,A,E,B,...,E,B,A,D,C,E,B,D,E,A


## Renomeando a coluna "Nota":

In [17]:
enemAmarela = enemAmarela.rename(columns={'NU_NOTA_CN': 'nota'})

# Ordenando as questões:

## Azul:

### Organizando:

In [18]:
enemAzul = enemAzul.loc[:, ['NU_NOTA_CN', 'q42', 'q43', 'q44', 'q45', 'q37', 'q38', 'q39', 'q40', 'q41', 'q31',
                            'q32', 'q33', 'q16', 'q17', 'q36', 'q34', 'q35', 'q26', 'q27', 'q28',
                            'q9', 'q10', 'q11', 'q29', 'q30', 'q18', 'q19', 'q20', 'q13', 'q14',
                            'q15', 'q21', 'q22', 'q23', 'q24', 'q25', 'q12', 'q4', 'q5', 'q6',
                            'q7', 'q8', 'q1', 'q2', 'q3']]

enemAzul.head() # Ordem corrigida

Unnamed: 0,NU_NOTA_CN,q42,q43,q44,q45,q37,q38,q39,q40,q41,...,q25,q12,q4,q5,q6,q7,q8,q1,q2,q3
0,507.6,A,B,E,D,C,B,C,C,D,...,E,A,D,E,B,D,E,B,A,E
1,650.9,A,C,A,D,C,D,B,C,D,...,C,D,C,A,B,E,C,E,C,E
2,388.1,C,B,C,E,A,C,C,D,C,...,D,E,C,D,D,A,C,D,C,A
3,521.0,A,B,C,D,C,C,B,B,D,...,C,D,B,D,C,D,C,B,A,C
4,531.9,D,D,D,B,E,C,D,C,C,...,A,D,D,A,E,E,A,E,B,E


### Renomeando:

In [19]:
enemAzul = enemAzul.rename(columns={'q42': 'q1', 'q43': 'q2', 'q44': 'q3', 'q45': 'q4', 'q37': 'q5',
                                    'q38': 'q6', 'q39': 'q7', 'q40': 'q8', 'q41': 'q9', 'q31': 'q10',
                                    'q32': 'q11', 'q33': 'q12', 'q16': 'q13', 'q17': 'q14', 'q36': 'q15',
                                    'q34': 'q16', 'q35': 'q17', 'q26': 'q18', 'q27': 'q19', 'q28': 'q20',
                                    'q9': 'q21', 'q10': 'q22', 'q11': 'q23', 'q29': 'q24', 'q30': 'q25',
                                    'q18': 'q26', 'q19': 'q27', 'q20': 'q28', 'q13': 'q29', 'q14': 'q30',
                                    'q15': 'q31', 'q21': 'q32', 'q22': 'q33', 'q23': 'q34', 'q24': 'q35',
                                    'q25': 'q36', 'q12': 'q37', 'q4': 'q38', 'q5': 'q39', 'q6': 'q40',
                                    'q7': 'q41', 'q8': 'q42', 'q1': 'q43', 'q2': 'q44', 'q3': 'q45', 'NU_NOTA_CN': 'nota'}).reset_index(drop=True)

enemAzul.head() # Colunas Renomeadas

Unnamed: 0,nota,q1,q2,q3,q4,q5,q6,q7,q8,q9,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,507.6,A,B,E,D,C,B,C,C,D,...,E,A,D,E,B,D,E,B,A,E
1,650.9,A,C,A,D,C,D,B,C,D,...,C,D,C,A,B,E,C,E,C,E
2,388.1,C,B,C,E,A,C,C,D,C,...,D,E,C,D,D,A,C,D,C,A
3,521.0,A,B,C,D,C,C,B,B,D,...,C,D,B,D,C,D,C,B,A,C
4,531.9,D,D,D,B,E,C,D,C,C,...,A,D,D,A,E,E,A,E,B,E


## Cinza:

### Organizando:

In [20]:
#ProvaCinza
enemCinza = enemCinza.loc[:, ['NU_NOTA_CN', 'q36', 'q37', 'q38', 'q39', 'q1', 'q2', 'q3',
                              'q4', 'q5', 'q33', 'q34', 'q35', 'q19', 'q20', 'q32', 'q30',
                              'q31', 'q21', 'q22', 'q23', 'q24', 'q25', 'q26', 'q14', 'q15',
                              'q27', 'q28', 'q29', 'q16', 'q17', 'q18', 'q41', 'q42', 'q43',
                              'q44', 'q45', 'q40', 'q6', 'q7', 'q8','q9', 'q10', 'q11', 'q12', 'q13']]

enemCinza.head()

Unnamed: 0,NU_NOTA_CN,q36,q37,q38,q39,q1,q2,q3,q4,q5,...,q45,q40,q6,q7,q8,q9,q10,q11,q12,q13
0,580.7,A,B,C,E,C,C,A,B,B,...,C,A,D,A,E,D,B,E,B,E
1,462.2,B,B,C,B,C,D,B,A,D,...,B,D,C,B,A,B,D,B,B,E
2,487.9,A,D,E,A,C,D,C,D,D,...,A,A,A,E,B,D,E,B,A,B
3,447.4,B,C,E,D,E,A,C,D,C,...,B,B,B,D,C,E,D,A,E,E
4,548.1,C,D,E,A,C,C,A,E,C,...,E,D,C,A,D,E,A,A,D,D


### Renomeando:

In [21]:
enemCinza = enemCinza.rename(columns={'q36': 'q1', 'q37': 'q2', 'q38': 'q3', 'q39': 'q4', 'q1': 'q5', 'q2': 'q6', 'q3': 'q7',
               'q4': 'q8', 'q5': 'q9', 'q33': 'q10', 'q34': 'q11', 'q35': 'q12', 'q19': 'q13', 'q20': 'q14',
               'q32': 'q15', 'q30': 'q16', 'q31': 'q17', 'q21': 'q18', 'q22': 'q19', 'q23': 'q20', 'q24': 'q21',
               'q25': 'q22', 'q26': 'q23', 'q14': 'q24', 'q15': 'q25', 'q27': 'q26', 'q28': 'q27', 'q29': 'q28',
               'q16': 'q29', 'q17': 'q30', 'q18': 'q31', 'q41': 'q32', 'q42': 'q33', 'q43': 'q34', 'q44': 'q35',
               'q45': 'q36', 'q40': 'q37', 'q6': 'q38', 'q7': 'q39', 'q8': 'q40', 'q9': 'q41', 'q10': 'q42',
               'q11': 'q43', 'q12': 'q44', 'q13': 'q45', 'NU_NOTA_CN': 'nota'}).reset_index(drop=True)

enemCinza.head()

Unnamed: 0,nota,q1,q2,q3,q4,q5,q6,q7,q8,q9,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,580.7,A,B,C,E,C,C,A,B,B,...,C,A,D,A,E,D,B,E,B,E
1,462.2,B,B,C,B,C,D,B,A,D,...,B,D,C,B,A,B,D,B,B,E
2,487.9,A,D,E,A,C,D,C,D,D,...,A,A,A,E,B,D,E,B,A,B
3,447.4,B,C,E,D,E,A,C,D,C,...,B,B,B,D,C,E,D,A,E,E
4,548.1,C,D,E,A,C,C,A,E,C,...,E,D,C,A,D,E,A,A,D,D


## Rosa:

### Organizando:

In [22]:
enemRosa = enemRosa.loc[:, ['NU_NOTA_CN', 'q26', 'q27', 'q28', 'q29', 'q37', 'q38', 'q39', 'q40', 'q41', 'q23',
                            'q24', 'q25', 'q35', 'q36', 'q1', 'q21', 'q22', 'q32', 'q33', 'q34',
                            'q2', 'q3', 'q4', 'q30', 'q31', 'q18', 'q19', 'q20', 'q42', 'q43',
                            'q44', 'q8', 'q9', 'q10', 'q11', 'q12', 'q45', 'q13', 'q14', 'q15',
                            'q16', 'q17', 'q5', 'q6', 'q7']]

enemRosa.head()

Unnamed: 0,NU_NOTA_CN,q26,q27,q28,q29,q37,q38,q39,q40,q41,...,q12,q45,q13,q14,q15,q16,q17,q5,q6,q7
0,505.9,B,C,C,E,C,C,C,B,B,...,C,E,C,C,D,D,C,C,B,D
1,497.7,A,B,E,D,E,C,C,A,E,...,D,B,.,D,E,D,C,D,C,C
2,509.6,A,A,E,A,C,D,C,E,B,...,E,B,C,D,D,B,C,D,B,D
3,439.6,D,D,E,B,C,C,A,B,B,...,E,A,D,C,B,D,D,E,A,C
4,426.2,A,B,A,C,E,C,D,A,A,...,C,D,C,A,D,B,E,E,A,B


### Renomeando:

In [23]:
enemRosa = enemRosa.rename(columns={'q26': 'q1', 'q27': 'q2', 'q28': 'q3', 'q29': 'q4', 'q37': 'q5',
                                    'q38': 'q6', 'q39': 'q7', 'q40': 'q8', 'q41': 'q9', 'q23': 'q10',
                                    'q24': 'q11', 'q25': 'q12', 'q35': 'q13', 'q36': 'q14', 'q1': 'q15',
                                    'q21': 'q16', 'q22': 'q17', 'q32': 'q18', 'q33': 'q19', 'q34': 'q20',
                                    'q2': 'q21', 'q3': 'q22', 'q4': 'q23', 'q30': 'q24', 'q31': 'q25',
                                    'q18': 'q26', 'q19': 'q27', 'q20': 'q28', 'q42': 'q29', 'q43': 'q30',
                                    'q44': 'q31', 'q8': 'q32', 'q9': 'q33', 'q10': 'q34', 'q11': 'q35',
                                    'q12': 'q36', 'q45': 'q37', 'q13': 'q38', 'q14': 'q39', 'q15': 'q40',
                                    'q16': 'q41', 'q17': 'q42', 'q5': 'q43', 'q6': 'q44', 'q7': 'q45', 'NU_NOTA_CN': 'nota'}).reset_index(drop=True)

enemRosa.head()

Unnamed: 0,nota,q1,q2,q3,q4,q5,q6,q7,q8,q9,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,505.9,B,C,C,E,C,C,C,B,B,...,C,E,C,C,D,D,C,C,B,D
1,497.7,A,B,E,D,E,C,C,A,E,...,D,B,.,D,E,D,C,D,C,C
2,509.6,A,A,E,A,C,D,C,E,B,...,E,B,C,D,D,B,C,D,B,D
3,439.6,D,D,E,B,C,C,A,B,B,...,E,A,D,C,B,D,D,E,A,C
4,426.2,A,B,A,C,E,C,D,A,A,...,C,D,C,A,D,B,E,E,A,B


# Processamento 2:

## Concatenando respostas|:

In [24]:
provaEnem = pd.concat([enemAmarela, enemAzul, enemCinza, enemRosa], ignore_index=True)
display(provaEnem)

Unnamed: 0,nota,q1,q2,q3,q4,q5,q6,q7,q8,q9,...,q36,q37,q38,q39,q40,q41,q42,q43,q44,q45
0,487.4,B,B,B,D,C,C,B,D,C,...,E,B,B,E,B,E,E,E,C,B
1,378.5,A,C,E,E,D,B,C,C,B,...,A,E,A,C,B,D,C,E,B,D
2,516.3,A,A,E,C,C,D,A,B,C,...,E,C,C,D,B,B,E,E,D,E
3,595.2,A,D,D,C,C,A,C,A,C,...,A,E,C,B,C,E,D,A,C,E
4,386.9,B,B,E,C,E,C,D,A,D,...,B,D,C,E,D,D,A,D,E,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2143753,482.0,C,E,C,B,C,B,C,C,B,...,E,D,D,B,B,D,A,A,C,B
2143754,380.9,A,D,D,A,A,D,D,B,E,...,B,E,D,A,C,C,C,D,E,A
2143755,388.8,B,A,C,D,E,C,E,A,B,...,D,D,C,C,E,D,*,E,E,D
2143756,516.3,A,A,C,B,C,C,E,D,B,...,D,D,C,E,A,E,E,D,B,D


## Dados das respostas:

In [28]:
# lista com os nomes das colunas que vão ser plotadas
colunas = ['q' + str(i) for i in range(1, 46)]

# criar um DataFrame vazio para armazenar os resultados
resultado = pd.DataFrame()

# loop para iterar sobre as colunas e criar os gráficos
for coluna in colunas:
    # obter os valores de value_counts()
    valores = provaEnem[coluna].value_counts()
    porcentagens = provaEnem[coluna].value_counts(normalize=True)

    # adicionar os valores e porcentagens como colunas no DataFrame resultado
    #resultado[coluna + '_valores'] = valores
    resultado[coluna + '_porcentagens'] = porcentagens

# exibir o resultado final
resultado

Unnamed: 0,q1_porcentagens,q2_porcentagens,q3_porcentagens,q4_porcentagens,q5_porcentagens,q6_porcentagens,q7_porcentagens,q8_porcentagens,q9_porcentagens,q10_porcentagens,...,q36_porcentagens,q37_porcentagens,q38_porcentagens,q39_porcentagens,q40_porcentagens,q41_porcentagens,q42_porcentagens,q43_porcentagens,q44_porcentagens,q45_porcentagens
A,0.419966,0.239376,0.129798,0.31747,0.050302,0.056202,0.209428,0.177025,0.095646,0.186849,...,0.254368,0.095238,0.12735,0.30766,0.116643,0.135765,0.222139,0.219404,0.185449,0.10159
B,0.229326,0.361371,0.15452,0.15888,0.032968,0.184382,0.245463,0.208454,0.381271,0.156122,...,0.217085,0.255617,0.224719,0.190811,0.373344,0.1712,0.154653,0.293934,0.173642,0.189509
D,0.160552,0.17011,0.170522,0.16979,0.0888,0.267491,0.14746,0.30434,0.270614,0.105466,...,0.215367,0.346232,0.294871,0.191058,0.179723,0.207562,0.163581,0.14054,0.207858,0.245192
C,0.151723,0.148156,0.249857,0.130529,0.649087,0.410078,0.32146,0.153139,0.206175,0.301873,...,0.145073,0.158175,0.252225,0.168545,0.173787,0.119361,0.165599,0.097123,0.276312,0.119201
E,0.036847,0.077789,0.292025,0.220168,0.177376,0.077907,0.073934,0.154348,0.043789,0.247736,...,0.164831,0.142074,0.096619,0.139122,0.152918,0.364106,0.291754,0.245345,0.154077,0.342366
.,0.000944,0.00218,0.001951,0.002052,0.000665,0.003463,0.001302,0.00176,0.001627,0.001113,...,0.00245,0.001891,0.003485,0.001594,0.002486,0.001035,0.001358,0.002741,0.00174,0.000842
*,0.000643,0.001018,0.001329,0.001111,0.000801,0.000478,0.000953,0.000935,0.000878,0.000842,...,0.000827,0.000773,0.00073,0.00121,0.001099,0.00097,0.000916,0.000911,0.000921,0.0013


# Salvando:

In [None]:
# solicitar o ano do dataset ao usuário
ano = input("Digite o ano do dataset: ")

# salvar o resultado em um arquivo CSV com o nome personalizado
nome_arquivo = f"resultados_{ano}.csv"
resultado.to_csv("/content/drive/MyDrive/Pesquisa_Enem2021/Base de Dados/" + nome_arquivo)

print(f"O dataset foi salvo como {nome_arquivo}.")