In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score ## F1 Score 구하기
from sklearn.metrics import precision_recall_fscore_support as sk
from imblearn.over_sampling import SMOTE

In [2]:
#먼저 데이터 읽어오기
df=pd.read_excel('../optimal_data2/Continous_2weeks_2day_1term.xlsx')
df.head()
X=df.iloc[:,[1,3,4,5,6,7]]
y=df.iloc[:,-1]

In [3]:
#스케일링 먼저 하고 ndarray를 dataframe으로 변환
scaler = StandardScaler()
X=scaler.fit_transform(X)
X=pd.DataFrame(X)

#결과 넣을 배열
Result=[[0 for j in range(4)] for i in range(10)]

Count= int(322/10)*83

In [4]:
for i in range(10):
    #마지막 그룹은 34명
    if i==9:
        X_test=X.iloc[Count*i:-1]
        X_train=X.drop(X.index[Count*i:-1])
        y_test=y.iloc[Count*i:-1]
        y_train=y.drop(y.index[Count*i:-1])
    
    #모든 그룹은 32명씩 (마지막은 34)
    X_test=X.iloc[Count*i:Count*(i+1)]
    X_train=X.drop(X.index[Count*i:Count*(i+1)])
    y_test=y.iloc[Count*i:Count*(i+1)]
    y_train=y.drop(y.index[Count*i:Count*(i+1)])
    
    #SMOTE 적용
    smote = SMOTE(random_state=0)
    X_train,y_train = smote.fit_resample(X_train,y_train)
    X_test,y_test = smote.fit_resample(X_test,y_test)
    
    #모델 정의, 예측
    model= LogisticRegression(C=1, class_weight='balanced',
                          random_state=1, multi_class='ovr',
                         n_jobs=-1, solver='lbfgs').fit(X_train,y_train)
    
    predict=model.predict(X_test)
    
    #Accuracy
    print("[{}]Accuracy : {}".format(i,model.score(X_test,y_test)))
    #f1score
    f1 = f1_score(y_test,predict,pos_label='positive', average='weighted')
    print("[{}]F1score : {}".format(i,f1))
    #precision/recall
    list=sk(y_test,predict,average='weighted')
    print("[{}]Precision : {}".format(i,list[0]))
    print("[{}]Recall : {}".format(i,list[1]))
    print()
    
    #결과 배열에 넣기
    Result[i][0]=model.score(X_test,y_test)
    Result[i][1]=f1
    Result[i][2]=list[0]
    Result[i][3]=list[1]

[0]Accuracy : 0.5014409221902018
[0]F1score : 0.4913802233791242
[0]Precision : 0.5064445276433173
[0]Recall : 0.5014409221902018





[1]Accuracy : 0.385
[1]F1score : 0.3634643696446932
[1]Precision : 0.3695399684758943
[1]Recall : 0.385





[2]Accuracy : 0.48484848484848486
[2]F1score : 0.48361990763095425
[2]Precision : 0.507754107364808
[2]Recall : 0.48484848484848486





[3]Accuracy : 0.42684224527799947
[3]F1score : 0.42412097271357485
[3]Precision : 0.43797504002094867
[3]Recall : 0.42684224527799947





[4]Accuracy : 0.43491644678979774
[4]F1score : 0.43511419603805457
[4]Precision : 0.43535249103522106
[4]Recall : 0.43491644678979774





[5]Accuracy : 0.335498714305048
[5]F1score : 0.2968320149041707
[5]Precision : 0.3333056793412781
[5]Recall : 0.335498714305048





[6]Accuracy : 0.39929567926317217
[6]F1score : 0.39349625181464604
[6]Precision : 0.3951072906153123
[6]Recall : 0.39929567926317217





[7]Accuracy : 0.5185964912280702
[7]F1score : 0.48496575539450587
[7]Precision : 0.5365227256002298
[7]Recall : 0.5185964912280702





[8]Accuracy : 0.35084882780921584
[8]F1score : 0.3137983782882136
[8]Precision : 0.33040651529754406
[8]Recall : 0.35084882780921584





[9]Accuracy : 0.3811400052534804
[9]F1score : 0.3550673930086006
[9]Precision : 0.34697627407461895
[9]Recall : 0.3811400052534804





In [5]:
Result_df=pd.DataFrame(Result,columns=['Accuracy','F1-Score','Precision','Recall'])
Result_df

Unnamed: 0,Accuracy,F1-Score,Precision,Recall
0,0.501441,0.49138,0.506445,0.501441
1,0.385,0.363464,0.36954,0.385
2,0.484848,0.48362,0.507754,0.484848
3,0.426842,0.424121,0.437975,0.426842
4,0.434916,0.435114,0.435352,0.434916
5,0.335499,0.296832,0.333306,0.335499
6,0.399296,0.393496,0.395107,0.399296
7,0.518596,0.484966,0.536523,0.518596
8,0.350849,0.313798,0.330407,0.350849
9,0.38114,0.355067,0.346976,0.38114


In [6]:
print("Average of Accuracy {}".format(Result_df['Accuracy'].mean()))
print("Average of F1-Score {}".format(Result_df['F1-Score'].mean()))
print("Average of Precision {}".format(Result_df['Precision'].mean()))
print("Average of Recall {}".format(Result_df['Recall'].mean()))

Average of Accuracy 0.42184278169654704
Average of F1-Score 0.4041859462816538
Average of Precision 0.4199384619469172
Average of Recall 0.42184278169654704
