## 구글 드라이브 마운트

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/BigContest')

Mounted at /content/drive


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/BigContest'

## 라이브러리

In [None]:
pip install pycaret

In [None]:
import pandas as pd
import datetime as dt
import numpy as np
from sklearn.model_selection import train_test_split
from pycaret.classification import *
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

## 데이터 불러오기

In [None]:
data = pd.read_csv('data.csv').drop(columns = ['Unnamed: 0.1', 'Unnamed: 0'])
data

## 결측치 확인

In [None]:
data.isna().sum()

## 전처리

In [None]:
# 컬럼을 숫자 형태(num)와 범주형(category)로 나누기
num_features = ['age', 'od_dist_avg', 'od_duration_avg', 'od_cnts']
cat_features = ['gender', 'modal',  'dest_purpose', 'origin_cd', 'dest_cd']

In [None]:
# 범주형 변수를 float -> int -> category로 변환
data[cat_features] = data[cat_features].astype('int').astype('category')

In [None]:
# 데이터 분할: X(독립변수), y(종속변수)
X_data = data[['age', 'od_dist_avg', 'od_duration_avg', 'od_cnts', 'gender', 'modal', 'dest_purpose', 'origin_cd', 'dest_cd']].drop(columns = 'modal')
y_data = data.modal

## Pycaret

In [None]:
col = X_data.columns

# train, test 데이터 셋 분할
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.3, random_state = 42)

train_data = pd.DataFrame(X_train, columns = col)
train_data['modal'] = y_train
train_data = train_data.reset_index()

# setup
exp = setup(data = train_data, target = 'modal')
# 모델 비교
best_model = compare_models()
# 성능이 가장 좋은 알고리즘으로 모델 생성(기본 평가 지표: Accuracy)
model = create_model(best_model)
# 모델 생성 마무리
final_model = finalize_model(model)

# y_pred = predict_model(final_model, data = X_test)
# accuracy = accuracy_score(y_test, y_pred['prediction_label'])
# print(f"Accuracy: {accuracy}")

In [None]:
evaluate_model(final_model)

In [None]:
tuned_model = tune_model(final_model)

In [None]:
plot_model(tuned_model, plot = 'confusion_matrix')