<a href="https://colab.research.google.com/github/MarianaAlves01/ProjetoIA/blob/main/ProjetoFinal_MotivosEndividamento.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Importar bibliotecas
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
from openpyxl.utils.dataframe import dataframe_to_rows

# Carregar dados
data = pd.read_csv('train.csv', low_memory=False)

# Tratamento de valores ausentes
categorical_cols_to_fill = ['Type_of_Loan', 'Credit_Mix', 'Payment_Behaviour']
for col in categorical_cols_to_fill:
    data[col] = data[col].fillna('Desconhecido')

numerical_cols_to_fill = ['Monthly_Inhand_Salary', 'Num_of_Delayed_Payment', 'Num_Credit_Inquiries',
                          'Amount_invested_monthly', 'Monthly_Balance']
for col in numerical_cols_to_fill:
    data[col] = pd.to_numeric(data[col], errors='coerce')
    data[col] = data[col].fillna(data[col].median())

cols_to_drop = ['Name', 'SSN', 'ID', 'Customer_ID']
data = data.drop(columns=cols_to_drop)

label_encoders = {}
categorical_cols = data.select_dtypes(include='object').columns
for col in categorical_cols:
    if col != 'Credit_Score':
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

X = data.drop(columns=['Credit_Score'])
y = data['Credit_Score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

feature_importances = pd.DataFrame({
    'Característica': X.columns,
    'Importância': model.feature_importances_
}).sort_values(by='Importância', ascending=False)

# Criando o arquivo Excel com gráfico
wb = Workbook()
ws1 = wb.active
ws1.title = 'Acurácia do Modelo'

# Adicionando acurácia à primeira aba
ws1.append(['Acurácia'])
ws1.append([accuracy])

# Adicionando importância das características à segunda aba
ws2 = wb.create_sheet(title='Importância das Características')
for r in dataframe_to_rows(feature_importances, index=False, header=True):
    ws2.append(r)

# Criando gráfico de barras
chart = BarChart()
chart.title = "Importância das Características"
chart.x_axis.title = "Características"
chart.y_axis.title = "Importância"

data_range = Reference(ws2, min_col=2, min_row=2, max_col=2, max_row=ws2.max_row)
categories_range = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row)
chart.add_data(data_range, titles_from_data=False)
chart.set_categories(categories_range)

ws2.add_chart(chart, "E5")  # Posicionando o gráfico na planilha

# Salvando o arquivo Excel
wb.save('resultados_modelo_com_grafico.xlsx')

print("Resultados exportados para 'resultados_modelo_com_grafico.xlsx'")


Resultados exportados para 'resultados_modelo_com_grafico.xlsx'
