In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [None]:
def create_data(root,sample_num=0):
# root = "./train/"
#   sample_num = 100
    face_files = os.listdir(root)
    df_face_all = []
    for face in face_files:
        cur_path = "%s/%s"%(root,face)
        f_name = os.listdir(cur_path)
        if sample_num != 0:
            f_name = f_name[:sample_num]
        f_name = ["%s/%s"%(cur_path,f) for f in f_name]
        df_face = pd.DataFrame()
        df_face['pic_name'] = f_name
        df_face['label'] = face
        df_face_all.append(df_face)
    df_face_all = pd.concat(df_face_all)
    return df_face_all



def get_feature(im):
    feature = []
    m = gm (im,[5],[0,np.pi/2],levels=256,symmetric = True)
    con = gp(m,'contrast')
#     feature.append(con[0,0])
#     feature.append(con[0,1])

    feature += [con[0,0]]
    feature += [con[0,1]]
    
    dis = gp(m,'dissimilarity')
#     feature.append(dis[0,0])
#     #feat
#     feature.append(dis[0,1])

    feature += [dis[0,0]]
    feature += [dis[0,1]]

    
    hom = gp(m,'homogeneity')
#     feature.append(hom[0,0])
#     feature.append(hom[0,1])
    
    feature += [hom[0,0]]
    feature += [hom[0,1]]
    
    
    '''asm = gp(m,'ASM')
    feature.append(asm[0,0])
    feature.append(asm[0,1])'''
    
    df = np.array(feature)
    
    df = df.flatten()
    return df

# def single_create_feature_label(path):
#     im = cv2.imread(path,cv2.COLOR_BGR2GRAY)
#     im = cv2.imread(path,0)
#     feature_list = get_feature(im)
#     return feature_list

def single_create_feature_label(path):
    im = cv2.imread(path)
    im = cv2.resize(im,
    (60,60),interpolation=cv2.INTER_CUBIC)
    img_gray = cv2.cvtColor(im,cv2.COLOR_RGB2GRAY)
#     print(img_gray.shape,"长度")
#     im = cv2.resize(im, (30,30)) 
    
#     im = im.flatten()
#     print("no nan len",len(im.dropna()))
#     im = cv2.imread(path,0)
#     feature_list = get_feature(im)
    return img_gray.flatten()





train = create_data("./train/")
val = create_data("./valid/")
test = create_data("./test/")
train.head()

Unnamed: 0,pic_name,label
0,./train//angry/10012Exp0fighting_people_350.jpg,angry
1,./train//angry/10018Exp0fighting_people_429.jpg,angry
2,./train//angry/10025Exp0fighting_people_5.jpg,angry
3,./train//angry/10037Exp0fighting_people_573.jpg,angry
4,./train//angry/10039Exp0fighting_people_59.jpg,angry


In [None]:
# train = create_data("./train/",3000)
# val = create_data("./valid/",600)
# test = create_data("./test/",600)

In [None]:
def create_feature(df):
    feature_list = []
    df_num = len(df)
    for i,name in enumerate(df['pic_name'].tolist()):
        print("%s/%s"%(i,df_num))
        feat_single = single_create_feature_label(name)
        feature_list.append(feat_single)
    feature_list = pd.DataFrame(feature_list)
    feature_list = feature_list.add_suffix("_feat")
    feature_list['label'] = df['label'].tolist()
    return feature_list

In [None]:
train_new = create_feature(train)
val_new = create_feature(val)
test_new = create_feature(test)


In [None]:
lable_map =  {'disgust': 0,
 'angry': 0,
 'neutral': 1,
 'surprise': 2,
 'fear': 0,
 'sad': 0,
 'happy': 2}

In [None]:
train_new['label'] = train_new['label'].map(lable_map)
val_new['label'] = val_new['label'].map(lable_map)
test_new['label'] = test_new['label'].map(lable_map)

In [None]:
def split_feat_label(df):
    feat_cols = df.drop(['label'],axis=1).columns.tolist()
    label_col = 'label'
    feature = df[feat_cols]
    label = df[label_col]
    return feature,label

In [None]:
train_x,train_y = split_feat_label(train_new)
val_x,val_y = split_feat_label(val_new)
test_x,test_y = split_feat_label(test_new)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=20)
train_x_pca = pca.fit_transform(train_x)
val_x_pca = pca.transform(val_x)
test_x_pca = pca.transform(test_x)


In [None]:
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import BaggingClassifier




clf_rf = RandomForestClassifier(random_state=1111,max_depth=6)
clf_rf.fit(train_x_pca, train_y)


clf_lr = LogisticRegression()
clf_lr.fit(train_x_pca, train_y)


clf_ada = AdaBoostClassifier(random_state=1111)
clf_ada.fit(train_x_pca, train_y)


clf_svc = SVC(kernel='rbf', class_weight='balanced',)
clf_svc.fit(train_x_pca, train_y)



clf_gbdt = GradientBoostingClassifier(random_state=1111,max_depth=5)
clf_gbdt.fit(train_x_pca, train_y)


clf_gb_lr = BaggingClassifier(base_estimator=clf_lr, n_estimators=10, max_samples=0.8)
clf_gb_lr.fit(train_x_pca, train_y)

# clf_nb = MultinomialNB()
# clf_nb.fit(train_x_pca, train_y)

In [None]:
from sklearn.metrics import roc_auc_score,accuracy_score
def get_metric_acc(model,x,y):
    pre_y = model.predict(x)
    acc = accuracy_score(y,pre_y)
    return acc




def model_metric(model,mark):

    train_acc = get_metric_acc(model,train_x_pca,train_y)

    val_acc = get_metric_acc(model,val_x_pca,val_y)

    test_acc = get_metric_acc(model,test_x_pca,test_y)


    print("%s train_acc"%(mark),train_acc)
    print("%s val_acc"%(mark),val_acc)
    print("%s test_acc"%(mark),test_acc)
    
model_metric(clf_rf,'RandomForest')  
    
model_metric(clf_lr,'LogisticRegression') 

model_metric(clf_ada,'AdaBoost') 

model_metric(clf_gbdt,'GradientBoosting')

model_metric(clf_gb_lr,'BaggingClassifier')

# model_metric(clf_svc,'SVM') 