In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score ## F1 Score 구하기
from sklearn.metrics import precision_recall_fscore_support as sk
from imblearn.over_sampling import SMOTE

In [7]:
#먼저 데이터 읽어오기
df=pd.read_excel('../optimal_data2/Continous_2weeks_28day_8term.xlsx')
df.head()
X=df.iloc[:,[1,3,4,5,6,7]]
y=df.iloc[:,-1]

#스케일링 먼저 하고 ndarray를 dataframe으로 변환
scaler = StandardScaler()
X=scaler.fit_transform(X)
X=pd.DataFrame(X)

#결과 넣을 배열
Result=[[0 for j in range(4)] for i in range(10)]

In [8]:
Count= int(322/10)*8
for i in range(10):
    #마지막 그룹은 34명
    if i==9:
        X_test=X.iloc[Count*i:-1]
        X_train=X.drop(X.index[Count*i:-1])
        y_test=y.iloc[Count*i:-1]
        y_train=y.drop(y.index[Count*i:-1])
    
    #모든 그룹은 32명씩 (마지막은 34)
    X_test=X.iloc[Count*i:Count*(i+1)]
    X_train=X.drop(X.index[Count*i:Count*(i+1)])
    y_test=y.iloc[Count*i:Count*(i+1)]
    y_train=y.drop(y.index[Count*i:Count*(i+1)])
    
    #SMOTE 적용
    smote = SMOTE(random_state=0)
    X_train,y_train = smote.fit_resample(X_train,y_train)
    X_test,y_test = smote.fit_resample(X_test,y_test)
    
    #모델 정의, 예측
    model = GaussianNB()
    model.fit(X_train, y_train)
    
    predict=model.predict(X_test)
    #Accuracy
    print("[{}]Accuracy : {}".format(i,model.score(X_test,y_test)))
    #f1score
    f1 = f1_score(y_test,predict, average='weighted')
    print("[{}]F1score : {}".format(i,f1))
    #precision/recall
    list=sk(y_test,predict,average='weighted')
    print("[{}]Precision : {}".format(i,list[0]))
    print("[{}]Recall : {}".format(i,list[1]))
    print()
    
    #결과 배열에 넣기
    Result[i][0]=model.score(X_test,y_test)
    Result[i][1]=f1
    Result[i][2]=list[0]
    Result[i][3]=list[1]

[0]Accuracy : 0.38650306748466257
[0]F1score : 0.3015379512509218
[0]Precision : 0.3746865480198813
[0]Recall : 0.38650306748466257

[1]Accuracy : 0.36075949367088606
[1]F1score : 0.24424988870752337
[1]Precision : 0.37401150251617543
[1]Recall : 0.36075949367088606

[2]Accuracy : 0.3973634651600753
[2]F1score : 0.31940556613045695
[2]Precision : 0.455906545561718
[2]Recall : 0.3973634651600753

[3]Accuracy : 0.35144927536231885
[3]F1score : 0.25900444090939223
[3]Precision : 0.32705425692839
[3]Recall : 0.35144927536231885

[4]Accuracy : 0.4166666666666667
[4]F1score : 0.3475596230904623
[4]Precision : 0.42965984632651305
[4]Recall : 0.4166666666666667

[5]Accuracy : 0.34767025089605735
[5]F1score : 0.25686570436607614
[5]Precision : 0.2665343733831809
[5]Recall : 0.34767025089605735

[6]Accuracy : 0.4026974951830443
[6]F1score : 0.3438456055548175
[6]Precision : 0.3693001060445387
[6]Recall : 0.4026974951830443

[7]Accuracy : 0.33564814814814814
[7]F1score : 0.22897837330665863
[7]Pr

In [9]:
Result_df=pd.DataFrame(Result,columns=['Accuracy','F1-Score','Precision','Recall'])
Result_df

Unnamed: 0,Accuracy,F1-Score,Precision,Recall
0,0.386503,0.301538,0.374687,0.386503
1,0.360759,0.24425,0.374012,0.360759
2,0.397363,0.319406,0.455907,0.397363
3,0.351449,0.259004,0.327054,0.351449
4,0.416667,0.34756,0.42966,0.416667
5,0.34767,0.256866,0.266534,0.34767
6,0.402697,0.343846,0.3693,0.402697
7,0.335648,0.228978,0.34728,0.335648
8,0.416162,0.349299,0.447885,0.416162
9,0.379487,0.289079,0.302127,0.379487


In [10]:
print("Average of Accuracy {}".format(Result_df['Accuracy'].mean()))
print("Average of F1-Score {}".format(Result_df['F1-Score'].mean()))
print("Average of Precision {}".format(Result_df['Precision'].mean()))
print("Average of Recall {}".format(Result_df['Recall'].mean()))

Average of Accuracy 0.3794406658220654
Average of F1-Score 0.2939825334441609
Average of Precision 0.36944453011890044
Average of Recall 0.3794406658220654
