In [1]:
import pandas as pd
import numpy as np
from utils.feature_engineering import create_features

In [2]:
test_data = pd.read_csv('./database/test.csv')
test_id = test_data['id'].copy()
test_data = test_data.drop(columns= 'id')

* Criando as Mesmas Features

In [3]:
test_data = create_features(test_data)

## Etapa 1 - Clustering

* Importando os artefatos - Pipeline

In [4]:
# Importando os artefatos
import pickle

# Importando a pipeline
with open('./artefatos/01_std_dataprep/std_pipeline.pkl', mode= 'rb') as file:
    std_pipeline = pickle.load(file)

# Importando a lista de features
with open('./artefatos/01_std_dataprep/std_pipeline_features.pkl', mode= 'rb') as file:
    std_pipeline_feature_list = pickle.load(file)

* Aplica a pipeline

In [5]:
test_data_dp = pd.DataFrame(
    data= std_pipeline.transform(test_data),
    columns= std_pipeline_feature_list
)

* Importando os Artefatos - PCA e GMM

In [6]:
# PCA
with open('./artefatos/02_pca/pca.pkl', mode= 'rb') as file:
    pca = pickle.load(file)

# Modelo GMM treinado
with open('./artefatos/03_gmm/gmm_model.pkl', mode= 'rb') as file:
    gmm = pickle.load(file)

* Aplicando o PCA e o GMM

In [7]:
test_data_reduced = pca.transform(test_data_dp)

In [8]:
test_data['Cluster'] = gmm.predict(test_data_reduced)

## Etapa 2 - Modelo Final

* Importando os Artefatos: Pipeline, Lista de Features do Feature Selection e Modelos Treinados

In [9]:
# Pipeline
with open('./artefatos/05_modelo_final/pipeline.pkl', mode= 'rb') as file:
    pipeline_final = pickle.load(file)

# Feature List - Pipeline
with open('./artefatos/05_modelo_final/pipeline_features_out.pkl', mode= 'rb') as file:
    pipeline_final_feature_list = pickle.load(file)

# Feature List - Pós Feature Selection
with open('./artefatos/05_modelo_final/feature_selection_step2_rfecv.pkl', mode= 'rb') as file:
    feature_selection_feature_list = pickle.load(file)

# Modelo - Grad Boosting
with open('./artefatos/05_modelo_final/grad_boosting_trained_model_70perc_data.pkl', mode= 'rb') as file:
    grad_boosting = pickle.load(file)

# Modelo - LGBM
with open('./artefatos/05_modelo_final/lgbm_trained_model_70perc_data.pkl', mode= 'rb') as file:
    lgbm = pickle.load(file)

* Aplicando o Pipeline

In [10]:
test_data_final = pd.DataFrame(
    data= pipeline_final.transform(test_data),
    columns= pipeline_final_feature_list
)

* Aplicando o Feature Selection

In [11]:
test_data_final = test_data_final[feature_selection_feature_list]

* Aplicando o Modelo I - Grad Boosting

In [12]:
grad_boosting_pred = grad_boosting.predict(test_data_final)

* Aplicando o Modelo II - LGBM

In [13]:
lgbm_pred = lgbm.predict(test_data_final)



## Etapa 3 - Gerando os Arquivos Para Submissão

In [25]:
pd.read_csv('./database/sample_submission.csv').head(1)

Unnamed: 0,id,Rings
0,90615,10


* Grad Boosting

In [26]:
sub_grad_boosting = pd.DataFrame(
    data= {
        'id' : test_id,
        'Rings' : grad_boosting_pred
    }
).set_index('id')

In [27]:
sub_grad_boosting['Rings'].to_csv('./submission/model1.csv')

* LGBM

In [28]:
sub_lgbm = pd.DataFrame(
    data= {
        'id' : test_id,
        'Rings' : lgbm_pred
    }
).set_index('id')

In [29]:
sub_lgbm['Rings'].to_csv('./submission/model2.csv')