In [1]:
%matplotlib inline

In [2]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

original_data_dir_path = "../data/raw_data/jaffe"
semantic_rating_data_path = "../data/raw_data/jaffe/jaffe_semantic_rating_data"

rebuild_dir_pre_path = "../data/jaffe"
class_list = ["NEU", "HAP", "SAD", "SUR", "ANG", "DIS", "FEA"]

In [3]:
# 读取图片列表
filenames_list = os.listdir(original_data_dir_path)
filenames_map = {}
for filename in filenames_list.copy():
    if filename.split(".")[-1] != "tiff":
        filenames_list.remove(filename)
    else:
        filenames_map[filename] = ""
print(len(filenames_list), len(filenames_map))

213 213


In [4]:
# 建立图片构建的路径目录
dirs = [rebuild_dir_pre_path+"/"+c for c in class_list]
dirs.insert(0, rebuild_dir_pre_path)
for dir in dirs:
    if not os.path.exists(dir):
        os.mkdir(dir)
    else:
        fl = os.listdir(original_data_dir_path)
        for filename in fl:
            try:
                os.remove(os.path.join(dir, filename))
            except:
                pass

In [5]:
def read_semantic_rating_data(file_path, with_FER = True):
    '''
    读取语义评级数据
    '''
    with open(file_path, "r") as file:
        lines = file.readlines()
    if with_FER:
        return lines[1:220]
    else:
        return lines[222:409]

In [6]:
def get_class_with_semantic_rating_data(lines, filenames_map):
    '''
    根据给出的语义评级数据来决定分类
    '''
    for line in lines:
        if line[0] == "#":
            continue
        if with_FER:
            IND, HAP, SAD, SUR, ANG, DIS, FEA, PIC = line.split(" ")
        else:
            IND, HAP, SAD, SUR, ANG, DIS, PIC = line.split(" ")
            FEA = -1.0
        if PIC[-1] == "\n":
            PIC=PIC[:-1]
        PIC = PIC.replace("-", ".")
        filename = PIC+"."+IND+".tiff"
        if filename in filenames_map:
            HAP_rate, SAD_rate, SUR_rate, ANG_rate, DIS_rate, FER_rate = float(HAP), float(SAD), float(SUR), float(ANG), float(DIS), float(FEA)
#             print(HAP_rate, SAD_rate, SUR_rate, ANG_rate, DIS_rate, DIS_rate, FER_rate)
            class_should_be_rate = max(HAP_rate, SAD_rate, SUR_rate, ANG_rate, DIS_rate, FER_rate)
            if HAP_rate == class_should_be_rate:
                filenames_map[filename] = "HAP"
            if SAD_rate == class_should_be_rate:
                filenames_map[filename] = "SAD"
            if SUR_rate == class_should_be_rate:
                filenames_map[filename] = "SUR"
            if ANG_rate == class_should_be_rate:
                filenames_map[filename] = "ANG"
            if DIS_rate == class_should_be_rate:
                filenames_map[filename] = "DIS"
            if FER_rate == class_should_be_rate:
                filenames_map[filename] = "FEA"
        else:
            print(filename, "Not Found")
            pass

In [7]:
def get_class_with_PIC_name(filenames_map):
    '''
    根据给出的照片名字来决定分类（即数据库定义的分类）
    '''
    for filename, _ in filenames_map.items():
        if filename[3:5] == "NE":
            filenames_map[filename] = "NEU"
        if filename[3:5] == "HA":
            filenames_map[filename] = "HAP"
        if filename[3:5] == "SA":
            filenames_map[filename] = "SAD"
        if filename[3:5] == "SU":
            filenames_map[filename] = "SUR"
        if filename[3:5] == "AN":
            filenames_map[filename] = "ANG"
        if filename[3:5] == "DI":
            filenames_map[filename] = "DIS"
        if filename[3:5] == "FE":
            filenames_map[filename] = "FEA"

In [8]:
# 以下两种方式选其一

# with_FER = True # 是否加入FER数据（有研究者发现这些人表达FER情绪有些不太准确）
# lines = read_semantic_rating_data(semantic_rating_data_path, with_FER)
# get_class_with_semantic_rating_data(lines, filenames_map)

get_class_with_PIC_name(filenames_map)

In [9]:
for filename, c in filenames_map.items():
    if c == "":
        print(filename, "not get semantic rating data")
        continue
    img = Image.open(os.path.join(original_data_dir_path, filename))
    img.save(os.path.join(rebuild_dir_pre_path, c, filename))