In [57]:
import pandas as pd
from sklearn.datasets import make_classification
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE, SMOTENC
from imblearn.under_sampling import RandomUnderSampler, TomekLinks, NearMiss, ClusterCentroids, EditedNearestNeighbours

import pandas as pd 
import numpy as np


from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression, LinearRegression

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.feature_selection import SelectFromModel, SequentialFeatureSelector
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import f1_score,accuracy_score,recall_score,precision_score,confusion_matrix,roc_auc_score

import warnings
warnings.simplefilter('ignore')



In [58]:
def model_basic(x_train, y_train, x_test, y_test): 
    models = [
        LogisticRegression(),
        LinearDiscriminantAnalysis(),
        GaussianNB(),
        RandomForestClassifier(),
        XGBClassifier()
    ]

    rdict={'model':[],'acc_train':[], 'auc_train':[], 'acc_test':[],'precision':[],'recall':[],'f1_score':[], 'AUC_test':[]}


    for clf in models:
        clf = clf.fit(x_train, y_train)
    #1열:Train
        y_hat = clf.predict(x_train)
        results_train  = (round(accuracy_score(y_train,y_hat),2),round(roc_auc_score(y_train,y_hat),2))
    #2열:Test
        y_hat = clf.predict(x_test)
        results = (round(accuracy_score(y_test,y_hat),2),
                        round(precision_score(y_test,y_hat),2),
                        round(recall_score(y_test,y_hat),2),
                        round(f1_score(y_test,y_hat),2),
                        round(roc_auc_score(y_test,y_hat),2))

        rdict['model'].append(clf); 
        rdict['acc_train'].append(results_train[0])
        rdict['auc_train'].append(results_train[1])
        
        rdict['acc_test'].append(results[0])
        rdict['precision'].append(results[1])
        rdict['recall'].append(results[2])
        rdict['f1_score'].append(results[3])
        rdict['AUC_test'].append(results[4])   

        confusion = confusion_matrix(y_test, y_hat)

        print(confusion)

    rdf_final = pd.DataFrame(data=rdict)
    return rdf_final

In [59]:
# def model_basic(x_train, y_train, x_test, y_test):
#     models = [
#         LogisticRegression(),
#         LinearDiscriminantAnalysis(),
#         GaussianNB(),
#         RandomForestClassifier(class_weight={0:1,1:10}),
#         XGBClassifier(),
        
#     ]

#     rdict = {'model': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1_score': [], 'auc_score': []}

#     for clf in models:
#         clf = clf.fit(x_train, y_train)
#         pred = clf.predict(x_test)
#         pred_prob_rf = clf.predict_proba(x_test)[:, 1].reshape(-1, 1)
#         auc_score = roc_auc_score(y_test, pred_prob_rf)
#         results = (
#             round(accuracy_score(y_test, pred), 4),
#             round(precision_score(y_test, pred), 4),
#             round(recall_score(y_test, pred), 4),
#             round(f1_score(y_test, pred), 4),
#             round(auc_score, 4)
#         )

#         rdict['model'].append(clf)
#         rdict['accuracy'].append(results[0])
#         rdict['precision'].append(results[1])
#         rdict['recall'].append(results[2])
#         rdict['f1_score'].append(results[3])
#         rdict['auc_score'].append(results[4])

#         confusion = confusion_matrix(y_test, pred)
#         print(confusion)

#     rdf = pd.DataFrame(data=rdict)
#     return rdf

In [60]:
def over(df, target, method='random', sampling_strategy=1.0):
    
    # 타겟 변수와 특성 변수를 분리
    X = df
    y = target
    
    if method == 'random':
        # RandomOverSampler 객체 생성, sampling_strategy 값 설정
        oversampler = RandomOverSampler(sampling_strategy=sampling_strategy)

    elif method == 'smote':
        # SMOTE 객체 생성, sampling_strategy 값 설정
        oversampler = SMOTE(sampling_strategy=sampling_strategy)

    elif method == 'adasyn':
        # adasyn 객체 생성, sampling_strategy 값 설정
        oversampler = ADASYN(sampling_strategy=sampling_strategy)

    elif method == 'Borderline-SMOTE':
        # Borderline-SMOTE 객체 생성, sampling_strategy 값 설정
        oversampler = BorderlineSMOTE(sampling_strategy=sampling_strategy)

    elif method == 'SMOTENC':
        # SMOTENC 객체 생성, sampling_strategy 값 설정
        oversampler = SMOTENC(sampling_strategy=sampling_strategy)

    
    # 오버샘플링 수행
    X_resampled, y_resampled = oversampler.fit_resample(X, y)

    # 오버샘플링된 데이터 프레임 생성
    df_over = pd.concat([X_resampled, y_resampled], axis=1)

    return df_over


In [61]:
def under(df, target, method='random', sampling_strategy=1.0):
    
    # 타겟 변수와 특성 변수를 분리
    X = df
    y = target

    if method == 'random':
        # RandomUnderSampler 객체 생성, sampling_strategy 값 설정
        undersampler = RandomUnderSampler(sampling_strategy=sampling_strategy)

    elif method == 'tomek':
        # TomekLinks 객체 생성, sampling_strategy 값 설정
        undersampler = TomekLinks(sampling_strategy=sampling_strategy)

    elif method == 'NearMiss':
        # NearMiss 객체 생성, sampling_strategy 값 설정
        undersampler = NearMiss(sampling_strategy=sampling_strategy)

    elif method == 'cluster_centroids':
        # ClusterCentroids 객체 생성, sampling_strategy 값 설정
        undersampler = ClusterCentroids(sampling_strategy=sampling_strategy)

    elif method == 'edited_nn':
        # EditedNearestNeighbours 객체 생성, sampling_strategy 값 설정
        undersampler = EditedNearestNeighbours(sampling_strategy=sampling_strategy)

    # 언더샘플링 수행
    X_resampled, y_resampled = undersampler.fit_resample(X, y)

    # 언더샘플링된 데이터 프레임 생성
    df_under = pd.concat([X_resampled, y_resampled], axis=1)

    return df_under

In [62]:
def evaluate_resampling_model(x_train, y_train, x_test, y_test, method, num_iterations=10, over_ratio = 0.5 ,under_ratio=0.5,random_state=None):
    

    # 모델 성능 저장을 위한 리스트
    score = {"accuracy_scores_mean" : [] ,"f1_scores_mean" :[], "recall_scores_mean" : [] , "precision_scores_mean" : [],  "auc_mean" : []}
    std_score = {"accuracy_scores_std" : [] ,"f1_scores_std" :[], "recall_scores_std" : [] , "precision_scores_std" : [],  "auc_std" : []}  

    recall_scores=[]
    accuracy_scores=[]
    f1_scores=[]
    precision_scores=[]
    auc_scores=[]
    
    
    # 랜덤샘플러 생성
    sampler_over = BorderlineSMOTE(sampling_strategy=over_ratio, random_state=random_state)
    sampler_under = RandomUnderSampler(sampling_strategy=under_ratio, random_state=random_state)

    

    # 랜덤 샘플링 반복 수행
    for i in range(num_iterations):
        
        x_train_over, y_train_over = sampler_over.fit_resample(x_train, y_train)
        x_test_under, y_test_under = sampler_under.fit_resample(x_test, y_test)

        if method == "Logistic":
            model = LogisticRegression().fit(x_train_over, y_train_over)
            y_pred = model.predict(x_test_under)
            y_pred_proba = model.predict_proba(x_test_under)[:,1].reshape(-1,1)

        if method == "LDA":
            model = LinearDiscriminantAnalysis().fit(x_train_over, y_train_over)
            y_pred = model.predict(x_test_under)
            y_pred_proba = model.predict_proba(x_test_under)[:,1].reshape(-1,1)
        
        if method == "Gaussian":
            model = GaussianNB().fit(x_train_over, y_train_over)
            y_pred = model.predict(x_test_under)
            y_pred_proba = model.predict_proba(x_test_under)[:,1].reshape(-1,1)
        
        #모델 성능 평가 
        accuracy = accuracy_score(y_test_under, y_pred)
        f1 = f1_score(y_test_under, y_pred)
        recall = recall_score(y_test_under, y_pred)
        precision = precision_score(y_test_under, y_pred)
        auc = roc_auc_score(y_test_under, y_pred_proba)
        
        # 리스트에 결과값 담아주기
        accuracy_scores.append(accuracy)
        f1_scores.append(f1)
        recall_scores.append(recall)
        precision_scores.append(precision)
        auc_scores.append(auc)

    

    # 모델 성능 평균 및 표준편차 계산
    accuracy_mean = np.mean(accuracy_scores)
    f1_mean = np.mean(f1_scores)
    recall_mean = np.mean(recall_scores)
    precision_mean = np.mean(precision_scores)
    auc_scores_mean = np.mean(auc_scores)


    score["accuracy_scores_mean"].append(accuracy_mean)
    score["f1_scores_mean"].append(f1_mean)
    score["recall_scores_mean"].append(recall_mean)
    score["precision_scores_mean"].append(precision_mean)
    score["auc_mean"].append(auc_scores_mean)

    accuracy_std = np.std(accuracy_scores)
    f1_std = np.std(f1_scores)
    recall_std = np.std(recall_scores)
    precision_std = np.std(precision_scores)
    auc_scores_std = np.std(auc_scores)

    std_score["accuracy_scores_std"].append(accuracy_std)
    std_score["f1_scores_std"].append(f1_std)
    std_score["recall_scores_std"].append(recall_std)
    std_score["precision_scores_std"].append(precision_std)
    std_score["auc_std"].append(auc_scores_std)
    
    print(score)
    print(std_score)

---
# target1

In [63]:
train = pd.read_csv("./datasets/통계검증완료/코스닥_power_train.csv", index_col=0)
test = pd.read_csv("./datasets/통계검증완료/코스닥_power_test.csv", index_col=0)

최종_col_1= ['EBIT/총자산', '자기자본순이익률', '누적수익성비율', '순운전자본비율', '당기전기영업손익', '시총/총자산',
       '총자본증가율', '매출액총이익률', '외국인지분율']


x_train = train[최종_col_1]
y_train = train["target_1"]

x_test = test[최종_col_1]
y_test = test["target_1"]



In [64]:
y_train.value_counts()

0    5086
1     174
Name: target_1, dtype: int64

In [65]:
y_test.value_counts()

0    3503
1      89
Name: target_1, dtype: int64

In [66]:
## 불균형 처리 안한 적합
model_basic(x_train, y_train, x_test, y_test)

[[3467   36]
 [  52   37]]
[[3425   78]
 [  44   45]]
[[3126  377]
 [  28   61]]
[[3443   60]
 [  43   46]]
[[3454   49]
 [  51   38]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.98,0.67,0.98,0.51,0.42,0.46,0.7
1,LinearDiscriminantAnalysis(),0.97,0.68,0.97,0.37,0.51,0.42,0.74
2,GaussianNB(),0.95,0.77,0.89,0.14,0.69,0.23,0.79
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.97,0.43,0.52,0.47,0.75
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.97,0.44,0.43,0.43,0.71


In [67]:
## train 데이터만 1:2 오버샘플링후 적합

train_1 =over(train[최종_col_1],train["target_1"], method='Borderline-SMOTE', sampling_strategy=0.5)

model_basic(train_1[최종_col_1], train_1["target_1"], x_test, y_test)

[[3181  322]
 [  19   70]]
[[3127  376]
 [  18   71]]
[[3019  484]
 [  13   76]]
[[3365  138]
 [  34   55]]
[[3362  141]
 [  34   55]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.91,0.9,0.91,0.18,0.79,0.29,0.85
1,LinearDiscriminantAnalysis(),0.9,0.88,0.89,0.16,0.8,0.26,0.85
2,GaussianNB(),0.9,0.9,0.86,0.14,0.85,0.23,0.86
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.95,0.28,0.62,0.39,0.79
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.95,0.28,0.62,0.39,0.79


In [68]:
## train 데이터 오버 1:0.5, test 데이터 언더 1:0.5 

train_1 =over(train[최종_col_1],train["target_1"], method='Borderline-SMOTE', sampling_strategy=0.5)

test_1 =under(test[최종_col_1],test["target_1"], method='random', sampling_strategy=0.5)

model_basic(train_1[최종_col_1], train_1["target_1"], test_1[최종_col_1], test_1["target_1"])


[[164  14]
 [ 17  72]]
[[159  19]
 [ 18  71]]
[[154  24]
 [ 13  76]]
[[168  10]
 [ 33  56]]
[[170   8]
 [ 34  55]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.92,0.91,0.88,0.84,0.81,0.82,0.87
1,LinearDiscriminantAnalysis(),0.9,0.89,0.86,0.79,0.8,0.79,0.85
2,GaussianNB(),0.9,0.91,0.86,0.76,0.85,0.8,0.86
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.84,0.85,0.63,0.72,0.79
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.84,0.87,0.62,0.72,0.79


In [69]:
evaluate_resampling_model(x_train, y_train,x_test,y_test,
                          method='Logistic',over_ratio=0.5,under_ratio=0.5,num_iterations=1000)

{'accuracy_scores_mean': [0.8681722846441947], 'f1_scores_mean': [0.80039758972014], 'recall_scores_mean': [0.7913033707865168], 'precision_scores_mean': [0.8105346517743638], 'auc_mean': [0.9069827042040147]}
{'accuracy_scores_std': [0.01415650870686526], 'f1_scores_std': [0.01750782422298729], 'recall_scores_std': [0.00986286477157297], 'precision_scores_std': [0.03470207237503974], 'auc_std': [0.008730371757033848]}


In [70]:
evaluate_resampling_model(x_train, y_train,x_test,y_test,
                          method='LDA',over_ratio=0.5,under_ratio=0.5,num_iterations=1000)

{'accuracy_scores_mean': [0.8574943820224719], 'f1_scores_mean': [0.7876907551154639], 'recall_scores_mean': [0.7911685393258426], 'precision_scores_mean': [0.7850723135430784], 'auc_mean': [0.9029624416109077]}
{'accuracy_scores_std': [0.015557548873033782], 'f1_scores_std': [0.018430308594209355], 'recall_scores_std': [0.006690857930865703], 'precision_scores_std': [0.035696892450745955], 'auc_std': [0.008751493815379086]}


--- 
# targer2

In [71]:
train = pd.read_csv("./datasets/통계검증완료/코스닥_power_train.csv", index_col=0)
test = pd.read_csv("./datasets/통계검증완료/코스닥_power_test.csv", index_col=0)


최종_col_2= ['EBIT/총자산', '자기자본순이익률', '누적수익성비율', '순운전자본비율', '당기전기영업손익', '이자부담률',
       '시총/총자산', '외국인지분율']


x_train = train[최종_col_2]
y_train = train["target_2"]

x_test = test[최종_col_2]
y_test = test["target_2"]



In [72]:
## 불균형 처리 안한 적합
model_basic(x_train, y_train, x_test, y_test)

[[3448   57]
 [  55   32]]
[[3455   50]
 [  56   31]]
[[3125  380]
 [  38   49]]
[[3448   57]
 [  48   39]]
[[3449   56]
 [  48   39]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.97,0.64,0.97,0.36,0.37,0.36,0.68
1,LinearDiscriminantAnalysis(),0.97,0.61,0.97,0.38,0.36,0.37,0.67
2,GaussianNB(),0.95,0.7,0.88,0.11,0.56,0.19,0.73
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.97,0.41,0.45,0.43,0.72
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.97,0.41,0.45,0.43,0.72


In [73]:
## train 데이터만 1:2 오버샘플링후 적합

train_1 =over(train[최종_col_2],train["target_2"], method='Borderline-SMOTE', sampling_strategy=0.5)

model_basic(train_1[최종_col_2], train_1["target_2"], x_test, y_test)

[[3001  504]
 [  22   65]]
[[2936  569]
 [  21   66]]
[[2886  619]
 [  20   67]]
[[3352  153]
 [  42   45]]
[[3316  189]
 [  38   49]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.88,0.87,0.85,0.11,0.75,0.2,0.8
1,LinearDiscriminantAnalysis(),0.87,0.86,0.84,0.1,0.76,0.18,0.8
2,GaussianNB(),0.87,0.87,0.82,0.1,0.77,0.17,0.8
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.95,0.23,0.52,0.32,0.74
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.94,0.21,0.56,0.3,0.75


In [74]:
## train 데이터 오버 1:0.5, test 데이터 언더 1:0.5 

train_1 =over(train[최종_col_2],train["target_2"], method='Borderline-SMOTE', sampling_strategy=0.5)

test_1 =under(test[최종_col_2],test["target_2"], method='random', sampling_strategy=0.5)

model_basic(train_1[최종_col_2], train_1["target_2"], test_1[최종_col_2], test_1["target_2"])

[[156  18]
 [ 21  66]]
[[151  23]
 [ 21  66]]
[[152  22]
 [ 20  67]]
[[168   6]
 [ 41  46]]
[[166   8]
 [ 36  51]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.88,0.87,0.85,0.79,0.76,0.77,0.83
1,LinearDiscriminantAnalysis(),0.88,0.86,0.83,0.74,0.76,0.75,0.81
2,GaussianNB(),0.87,0.86,0.84,0.75,0.77,0.76,0.82
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.82,0.88,0.53,0.66,0.75
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.83,0.86,0.59,0.7,0.77


In [75]:
evaluate_resampling_model(x_train, y_train,x_test,y_test,
                          method='Logistic',over_ratio=0.5,under_ratio=0.5,num_iterations=1000)

{'accuracy_scores_mean': [0.8235517241379312], 'f1_scores_mean': [0.7388499561892334], 'recall_scores_mean': [0.7471954022988505], 'precision_scores_mean': [0.7316321648058731], 'auc_mean': [0.8297516184436518]}
{'accuracy_scores_std': [0.017542785232715262], 'f1_scores_std': [0.020369605741347892], 'recall_scores_std': [0.012706032243930519], 'precision_scores_std': [0.0367776985302303], 'auc_std': [0.011072161470921067]}


In [76]:
evaluate_resampling_model(x_train, y_train,x_test,y_test,
                          method='LDA',over_ratio=0.5,under_ratio=0.5,num_iterations=1000)

{'accuracy_scores_mean': [0.8157777777777778], 'f1_scores_mean': [0.7343041272509847], 'recall_scores_mean': [0.7617586206896553], 'precision_scores_mean': [0.709670863916243], 'auc_mean': [0.8296761791518034]}
{'accuracy_scores_std': [0.018368950936542987], 'f1_scores_std': [0.019655453726058772], 'recall_scores_std': [0.0068046656566970315], 'precision_scores_std': [0.036153426971558426], 'auc_std': [0.010493832862763657]}


----
# target3

In [77]:
train = pd.read_csv("./datasets/통계검증완료/코스닥_power_train.csv", index_col=0)
test = pd.read_csv("./datasets/통계검증완료/코스닥_power_test.csv", index_col=0)

id_col = ['회사명', '거래소코드', '회계년도', '산업군','target_3']

최종_col_3= ['EBIT/총자산', '자기자본순이익률', 'EBITDA마진율', '자본금회전률', '당기전기영업손익', '시총/총자산',
       '총자본증가율', '매출액증가율', 'ROA변화율']


x_train = train[최종_col_3]
y_train = train["target_3"]

x_test = test[최종_col_3]
y_test = test["target_3"]



In [78]:
## 불균형 처리 안한 적합
model_basic(x_train, y_train, x_test, y_test)

[[3423   97]
 [  38   34]]
[[3374  146]
 [  32   40]]
[[3016  504]
 [  17   55]]
[[3440   80]
 [  35   37]]
[[3441   79]
 [  33   39]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.98,0.66,0.96,0.26,0.47,0.33,0.72
1,LinearDiscriminantAnalysis(),0.97,0.69,0.95,0.22,0.56,0.31,0.76
2,GaussianNB(),0.94,0.76,0.85,0.1,0.76,0.17,0.81
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.97,0.32,0.51,0.39,0.75
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.97,0.33,0.54,0.41,0.76


In [79]:
## train 데이터만 1:2 오버샘플링후 적합

train_1 =over(train[최종_col_3],train["target_3"], method='Borderline-SMOTE', sampling_strategy=0.5)

model_basic(train_1[최종_col_3], train_1["target_3"], x_test, y_test)

[[3104  416]
 [  16   56]]
[[3085  435]
 [  12   60]]
[[2920  600]
 [  10   62]]
[[3347  173]
 [  23   49]]
[[3340  180]
 [  22   50]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.92,0.91,0.88,0.12,0.78,0.21,0.83
1,LinearDiscriminantAnalysis(),0.91,0.9,0.88,0.12,0.83,0.21,0.85
2,GaussianNB(),0.9,0.9,0.83,0.09,0.86,0.17,0.85
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.95,0.22,0.68,0.33,0.82
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.94,0.22,0.69,0.33,0.82


In [80]:
## train 데이터 오버 1:0.5, test 데이터 언더 1:0.5 

train_1 =over(train[최종_col_3],train["target_3"], method='Borderline-SMOTE', sampling_strategy=0.5)

test_1 =under(test[최종_col_3],test["target_3"], method='random', sampling_strategy=0.5)

model_basic(train_1[최종_col_3], train_1["target_3"], test_1[최종_col_3], test_1["target_3"])


[[129  15]
 [ 13  59]]
[[130  14]
 [ 11  61]]
[[116  28]
 [ 10  62]]
[[137   7]
 [ 23  49]]
[[136   8]
 [ 27  45]]


Unnamed: 0,model,acc_train,auc_train,acc_test,precision,recall,f1_score,AUC_test
0,LogisticRegression(),0.92,0.91,0.87,0.8,0.82,0.81,0.86
1,LinearDiscriminantAnalysis(),0.91,0.9,0.88,0.81,0.85,0.83,0.88
2,GaussianNB(),0.9,0.89,0.82,0.69,0.86,0.77,0.83
3,"(DecisionTreeClassifier(max_features='sqrt', r...",1.0,1.0,0.86,0.88,0.68,0.77,0.82
4,"XGBClassifier(base_score=None, booster=None, c...",1.0,1.0,0.84,0.85,0.62,0.72,0.78


In [81]:
evaluate_resampling_model(x_train, y_train,x_test,y_test,
                          method='Logistic',over_ratio=0.5,under_ratio=0.5,num_iterations=1000)

{'accuracy_scores_mean': [0.8536527777777776], 'f1_scores_mean': [0.7856773247864997], 'recall_scores_mean': [0.8023194444444444], 'precision_scores_mean': [0.7708324109898514], 'auc_mean': [0.8883007330246914]}
{'accuracy_scores_std': [0.019010636484720442], 'f1_scores_std': [0.02286635761879442], 'recall_scores_std': [0.01463753815189386], 'precision_scores_std': [0.04078891933054859], 'auc_std': [0.012835869920322956]}


In [82]:
evaluate_resampling_model(x_train, y_train,x_test,y_test,
                          method='LDA',over_ratio=0.5,under_ratio=0.5,num_iterations=1000)

{'accuracy_scores_mean': [0.8686018518518517], 'f1_scores_mean': [0.8132194452978331], 'recall_scores_mean': [0.8553333333333332], 'precision_scores_mean': [0.7759347443799438], 'auc_mean': [0.8925042438271605]}
{'accuracy_scores_std': [0.017832234784055812], 'f1_scores_std': [0.020774280369601793], 'recall_scores_std': [0.00810977919049088], 'precision_scores_std': [0.03710980463450249], 'auc_std': [0.012292417475160865]}


-----------------------------
#target3   train 정상 : 부실 = 1:0.5 오버샘플링에서 성능이 가장좋음
 
------------------------------

In [83]:
가져가는_col = id_col + 최종_col_3
train_1.to_csv('./datasets/불균형처리/boder_코스닥_power_train.csv',encoding='utf-8-sig')
test[가져가는_col].to_csv('./datasets/불균형처리/코스닥_power_test.csv',encoding='utf-8-sig')