In [None]:
from google.colab import drive
drive.mount('/content/drive') # conectar com o drive

Mounted at /content/drive


In [None]:
!pip install lightgbm  # instalando modelo



In [None]:
import pandas as pd    # trabalhar com tabelas
import numpy as np     # trabalhar com arrays numéricos
from sklearn.model_selection import train_test_split, GridSearchCV # divisão treino/val e otimização
from sklearn.metrics import accuracy_score # medir acurácia
from sklearn.impute import SimpleImputer # tratar valores faltantes
from sklearn.metrics import classification_report # relatório completo
import lightgbm as lgb # modelo usado

In [None]:
CAMINHO_TREINO = '/content/drive/MyDrive/Projeto IA/train.csv' # mostrando o que é treino
CAMINHO_TESTE = '/content/drive/MyDrive/Projeto IA/test.csv'   # mostrando o que é teste

In [None]:
train_df = pd.read_csv(CAMINHO_TREINO)  # treino variavel
test_df = pd.read_csv(CAMINHO_TESTE)    # teste variavel

In [None]:
train_df.columns = train_df.columns.str.upper()    # colocando nome das colunas em  maisculos
test_df.columns = test_df.columns.str.upper()

In [None]:
print(f"Dimensões treino: {train_df.shape}") # quantidade de linhas e colunas

print(f"\nColunas treino: {train_df.columns.tolist()[:5]}...TARGET") # nomes das colunas

Dimensões treino: (28992, 526)

Colunas treino: ['ID', 'F0', 'F1', 'F2', 'F3']...TARGET


In [None]:
print(train_df.columns)  #mostrar se todas estao maisculas


Index(['ID', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8',
       ...
       'F515', 'F516', 'F517', 'F518', 'F519', 'F520', 'F521', 'F522', 'F523',
       'TARGET'],
      dtype='object', length=526)


In [None]:
# separa x e y
X = train_df.drop('TARGET', axis=1) # tudo menos a coluna alvo
y = train_df['TARGET'] # coluna alvo

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # dividir treino e validação

In [None]:
imputer = SimpleImputer(strategy='median')  # tratando valor em falta onde subistimui pela mediana

X_train_imputed = imputer.fit_transform(X_train)
X_val_imputed = imputer.transform(X_val)

test_df_imputed = imputer.transform(test_df)

In [None]:
X_train_imputed = X_train_imputed[:, 1:] # removendo a primeira coluna pro teste pq o modelo não usa
X_val_imputed = X_val_imputed[:, 1:]

test_df_imputed = test_df_imputed[:, 1:]

In [None]:
modelo_lgbm = lgb.LGBMClassifier( # criando meu modelo
    n_estimators=500,
    learning_rate=0.05,
    random_state=42,
    n_jobs=-1,
    verbose=-1
)

In [None]:
print("\nTreinando LightGBM...")
modelo_lgbm.fit(X_train_imputed, y_train) # treinamando


Treinando LightGBM...


In [None]:
y_pred_val = modelo_lgbm.predict(X_val_imputed) # fazendo previsão



In [None]:
acuracia_final = accuracy_score(y_val, y_pred_val) # calculando a acuracia

print(f"Acurácia no conjunto de validação (LightGBM): {acuracia_final:.4f}")


Acurácia no conjunto de validação (LightGBM): 0.9160


In [None]:


print("\nRelatório de Classificação:\n\n", classification_report(y_val, y_pred_val)) # relatorio



Relatório de Classificação:

               precision    recall  f1-score   support

      attack       1.00      0.84      0.91      2900
      normal       0.86      1.00      0.92      2899

    accuracy                           0.92      5799
   macro avg       0.93      0.92      0.92      5799
weighted avg       0.93      0.92      0.92      5799



In [None]:
previsoes_finais = modelo_lgbm.predict(test_df_imputed)  # salvando as previsões



In [None]:
submission_df_final = pd.DataFrame( # criando um arquivo com ID e TARGET
     {
    'id': test_df['ID'],
    'target': previsoes_finais
    }
 )

In [None]:
CAMINHO_FINAL = '/content/drive/MyDrive/Projeto IA/submissao_lightgbm_melhorada.csv' # submissão no drive
submission_df_final.to_csv(CAMINHO_FINAL, index=False)
print(f"\nArquivo de submissão salvo com sucesso em: {CAMINHO_FINAL}")


Arquivo de submissão salvo com sucesso em: /content/drive/MyDrive/Projeto IA/submissao_lightgbm_melhorada.csv


In [None]:
X_train_imputed = imputer.transform(X_train)
X_val

Unnamed: 0,ID,F0,F1,F2,F3,F4,F5,F6,F7,F8,...,F514,F515,F516,F517,F518,F519,F520,F521,F522,F523
28573,37695,54,56,98,51,52,100,56,97,98,...,52,55,49,52,51,98,97,101,56,54
17109,11683,48,53,55,51,48,48,54,55,55,...,48,48,48,48,48,48,48,48,48,48
24122,21471,49,51,54,53,52,54,54,54,54,...,53,55,51,50,50,97,53,98,50,50
3354,4804,56,51,51,54,52,51,50,48,48,...,48,48,48,48,48,48,48,48,48,48
4540,6720,57,57,48,54,57,56,48,100,57,...,48,57,100,51,97,54,50,98,51,55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12812,20554,48,48,48,48,48,48,48,48,48,...,48,48,48,48,48,48,48,48,48,48
10473,3554,49,50,54,99,48,48,54,49,48,...,48,48,48,48,48,48,48,48,48,48
23340,41599,48,48,48,48,48,48,48,48,48,...,48,48,48,48,48,48,48,48,48,48
5911,23056,100,97,54,51,100,54,51,51,51,...,50,51,48,51,49,50,55,100,50,99
