In [2]:
# nnictl create --config config.yaml --port 8080

from src.data import CostomerDataset, CostomerDataModule
from src.utils import convert_category_into_integer
from src.model.mlp import Model
from src.training import CostomerModule

import pandas as pd
import numpy as np
import random
import json
import nni
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch

import lightning as L
from lightning.pytorch.trainer import Trainer
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import TensorBoardLogger

import seaborn as sns

In [33]:
from src.data import CostomerDataset, CostomerDataModule
from src.model.mlp import Model
from src.training import CostomerModule

import pandas as pd
import numpy as np
import random
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler



import torch
from torch.utils.data import DataLoader

def main(configs):
    # 데이터 로드 및 전처리
    costomer = pd.read_csv('./data/train.csv')
    costomer = costomer.dropna()

    # 범주형 변수를 숫자로 변환하는 함수
    costomer, _ = convert_category_into_integer(costomer, (
        'Churn', 'ServiceArea', 'ChildrenInHH', 'HandsetRefurbished', 
        'HandsetWebCapable', 'TruckOwner', 'RVOwner', 'Homeownership', 
        'BuysViaMailOrder', 'RespondsToMailOffers', 'OptOutMailings', 
        'NonUSTravel', 'OwnsComputer', 'HasCreditCard', 'NewCellphoneUser', 
        'NotNewCellphoneUser', 'OwnsMotorcycle', 'HandsetPrice', 
        'MadeCallToRetentionTeam', 'CreditRating', 'PrizmCode', 
        'Occupation', 'MaritalStatus'
    ))
    costomer = costomer.astype(np.float32)

    # Train/Validation/Test Split
    train, temp = train_test_split(costomer, test_size=0.4, random_state=seed)
    valid, test = train_test_split(temp, test_size=0.5, random_state=seed)

    # 표준화 작업
    standard_scaler = StandardScaler()

    other_columns = ['MonthlyRevenue', 'MonthlyMinutes', 'TotalRecurringCharge', 
                     'DirectorAssistedCalls', 'OverageMinutes', 'RoamingCalls', 
                     'PercChangeMinutes', 'PercChangeRevenues', 'DroppedCalls', 
                     'BlockedCalls', 'UnansweredCalls', 'CustomerCareCalls', 
                     'ThreewayCalls', 'ReceivedCalls', 'OutboundCalls', 
                     'InboundCalls', 'PeakCallsInOut', 'OffPeakCallsInOut', 
                     'DroppedBlockedCalls', 'CallForwardingCalls', 
                     'CallWaitingCalls', 'MonthsInService', 'UniqueSubs', 
                     'ActiveSubs', 'Handsets', 'HandsetModels', 
                     'CurrentEquipmentDays', 'AgeHH1', 'AgeHH2', 'RetentionCalls', 
                     'RetentionOffersAccepted', 'ReferralsMadeBySubscriber', 
                     'IncomeGroup', 'AdjustmentsToCreditRating']

    train.loc[:, other_columns] = standard_scaler.fit_transform(train.loc[:, other_columns])
    valid.loc[:, other_columns] = standard_scaler.transform(valid.loc[:, other_columns])
    test.loc[:, other_columns] = standard_scaler.transform(test.loc[:, other_columns])

    # Dataset과 DataLoader 설정
    train_dataset = CostomerDataset(train)
    valid_dataset = CostomerDataset(valid)
    test_dataset = CostomerDataset(test)

    test_dataloader = DataLoader(
        test_dataset,
        batch_size=configs.get('batch_size'),
    )

    configs.update({'input_dim': len(costomer.columns)-1})
    
    # 모델 로드
    model = Model(configs)
    # model.load_state_dict(torch.load('./model/mlp.pth'))  # 모델 파라미터 불러오기
    model_state_dict = torch.load("./model/mlp.pth", map_location=device)
    model.load_state_dict(model_state_dict)
    model.eval()  # 평가 모드로 전환

    # prediction
    preds = []
    gts = []
    for batch in test_dataloader:
        X = batch.get('X')
        y = batch.get('y')
        
        with torch.no_grad():  # 예측 시에는 그래디언트 필요 없음
            pred = model(X)
            # print(pred)
            preds.append(pred.argmax(dim=-1))
            gts.append(y)

    # 텐서 리스트를 합침
    preds = torch.cat(preds)
    gts = torch.cat(gts)

    # Confusion Matrix 및 성능 지표 계산
    confusion_matrix_result = confusion_matrix(gts.cpu().numpy(), preds.cpu().numpy())
    TP = confusion_matrix_result[0, 0]
    FN = confusion_matrix_result[0, 1]
    FP = confusion_matrix_result[1, 0]
    TN = confusion_matrix_result[1, 1]

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * precision * recall / (precision + recall)
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    
    print(f'TP: {TP: .2f}, TN: {TN: .2f}, FP: {FP: .2f}, FN: {FN: .2f}')

    print(f'precision: {precision: .2f}, recall: {recall: .2f}, f1: {f1: .2f}, accuracy: {accuracy: .2f}')
    print(classification_report(gts, preds))

if __name__ == '__main__':
    # 사용 가능한 GPU가 있는 경우 'cuda', 그렇지 않으면 'cpu' 사용
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # hyperparameter
    with open('./configs.json', 'r') as file:
        configs = json.load(file)
    configs.update({'device': device})

    # seed 설정
    seed = configs.get('seed')
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    # CUDA 설정
    if device == 'cuda':
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True
    
    main(configs)


OrderedDict([('linear1.weight', tensor([[-0.0056,  0.0779, -0.1004,  ...,  0.0955, -0.0782,  0.0196],
        [-0.1303, -0.0461,  0.0092,  ...,  0.1051, -0.0775,  0.1237],
        [ 0.0529, -0.0741,  0.1527,  ..., -0.1182, -0.0077,  0.1043],
        ...,
        [-0.0472, -0.0940,  0.0614,  ..., -0.0549, -0.0798,  0.0079],
        [-0.0975,  0.1754,  0.1000,  ...,  0.0449, -0.0328,  0.0720],
        [ 0.0918,  0.0134,  0.1293,  ...,  0.0811, -0.0604, -0.0498]])), ('linear1.bias', tensor([ 0.0753, -0.0510,  0.0788,  0.1159,  0.0539,  0.0817, -0.1026,  0.1222,
        -0.0799,  0.0745,  0.0413,  0.0097,  0.0343, -0.1300,  0.1265, -0.0085,
         0.1183,  0.0032, -0.0540,  0.0829, -0.0025, -0.0841,  0.0166, -0.1318,
        -0.0818, -0.1076,  0.0068, -0.0729, -0.0029, -0.1007, -0.1141, -0.0153,
         0.0156,  0.0953, -0.0453,  0.1260, -0.0947,  0.0459, -0.1192,  0.1196,
        -0.0500,  0.0736,  0.0064, -0.0533, -0.0316, -0.0495,  0.0810,  0.0967,
         0.1261, -0.0315, -0.0399, 

  checkpoint = torch.load("./model/mlp.pth")


KeyError: 'State_dict'

In [22]:
costomer = pd.read_csv('./data/train.csv')
costomer = costomer.dropna()
costomer.Churn = np.where(costomer.Churn == "Yes", 1, 0)
costomer.Churn

0        1
1        1
2        0
3        0
4        1
        ..
51035    0
51037    0
51040    0
51041    1
51043    0
Name: Churn, Length: 49752, dtype: int32