<a href="https://colab.research.google.com/github/Rivianee/Rivianee/blob/main/C%C3%B3pia_de_ste_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import OneHotEncoder
from lightgbm import LGBMClassifier
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Função para carregar e processar os dados
def load_and_process_data(filepath):
    data = pd.read_excel(filepath)

    # Remover espaços em branco dos nomes de colunas
    data.columns = data.columns.str.replace(' ', '_')

    X = data.drop('fez_portabilidade', axis=1)
    y = data['fez_portabilidade']

    categorical_cols = X.select_dtypes(include=['object']).columns

    # Usar ColumnTransformer para aplicar OneHotEncoder apenas nas colunas categóricas
    transformer = ColumnTransformer(
        [('encoder', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_cols)],
        remainder='passthrough'
    )
    X_encoded = transformer.fit_transform(X)

    return X_encoded, y, transformer, data

# Função para treinar e salvar o modelo
def train_and_save_model(X, y, transformer, model_path='model.pkl', transformer_path='transformer.pkl'):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

    model = LGBMClassifier(random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    with open(transformer_path, 'wb') as f:
        pickle.dump(transformer, f)

    return model, transformer, accuracy, precision, recall, f1

# Função para carregar o modelo treinado e o transformer
def load_model_and_transformer(model_path='model.pkl', transformer_path='transformer.pkl'):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    with open(transformer_path, 'rb') as f:
        transformer = pickle.load(f)
    return model, transformer

# Função para fazer previsões com o modelo treinado
def predict(model, transformer, params):
    input_df = pd.DataFrame(params, index=[0])
    transformed_input = transformer.transform(input_df)
    prediction = model.predict(transformed_input)
    probability = model.predict_proba(transformed_input)[:, 1]  # Probabilidade de sucesso

    return prediction, probability

# Configuração do app Streamlit
def main():
    st.title('Previsão de Portabilidade')

    # Carregar e processar os dados uma vez (caminho do seu arquivo Excel)
    data_file_path = 'PLANILHA_ATUALIZADA_MODELAGEM_28_06_2024.xlsx'
    X, y, transformer, original_data = load_and_process_data(data_file_path)

    # Treinar e salvar o modelo uma vez
    model, transformer, accuracy, precision, recall, f1 = train_and_save_model(X, y, transformer)

    st.sidebar.header('Insira os dados do cliente')
    user_params = {}
    for column in original_data.columns:
        if column != 'fez_portabilidade':
            if original_data[column].dtype == 'object':
                unique_values = original_data[column].unique()
                user_params[column] = st.sidebar.selectbox(column, unique_values)
            else:
                user_params[column] = st.sidebar.number_input(column)

    if st.sidebar.button('Prever'):
        if all(user_params.values()):
            prediction, probability = predict(model, transformer, user_params)
            st.write('### Resultado da Previsão:')
            st.write(f'A pessoa provavelmente {"fará" if prediction == 1 else "não fará"} portabilidade.')
            st.write(f'Probabilidade de sucesso: {probability[0]:.2%}')

            # Exibir métricas do modelo
            st.subheader('Métricas do Modelo')
            st.write(f'Acurácia: {accuracy:.2%}')
            st.write(f'Precisão: {precision:.2%}')
            st.write(f'Recall: {recall:.2%}')
            st.write(f'F1 Score: {f1:.2%}')

        else:
            st.write('Preencha todos os parâmetros para fazer a previsão.')

if __name__ == "__main__":
    main()

[LightGBM] [Info] Number of positive: 5341, number of negative: 5341
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002758 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1855
[LightGBM] [Info] Number of data points in the train set: 10682, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
