In [1]:
import pandas as pd
import numpy as np
import keras

from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from IPython.display import display
from collections import OrderedDict

pd.options.display.max_rows = None
pd.options.display.float_format = '{:,.3f}'.format

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
def load_utasp():
    df = pd.read_csv('./2014UTASP.csv')
    utasp_name = df.iloc[:, 0]
    utasp_y = to_categorical(df.iloc[:, 1].values - 1)
    utasp_X = df.iloc[:, 2:].values.astype('float32') / 6
    train_X, test_X, train_y, test_y = train_test_split(utasp_X, np.c_[utasp_name, utasp_y], test_size=0.1, random_state=42)
    train_name = train_y[:, 0]
    train_y = train_y[:, 1:]
    test_name = test_y[:, 0]
    test_y = test_y[:, 1:]
    return (train_X, train_y, train_name), (test_X, test_y, test_name)


def utasp_model():
    model = Sequential()
    model.add(Dense(units=150, activation='relu', input_shape=(33,)))
    model.add(Dense(units=50, activation='relu'))
    model.add(Dense(units=6, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
    return model

In [3]:
(train_X, train_y, train_name), (test_X, test_y, test_name) = load_utasp()
model = utasp_model()
model.fit(train_X, train_y, batch_size=30, epochs=20, verbose=0)
pred_y = model.predict(test_X)
label = ['自', '民', '維', '公', '次', '共']

res_df = pd.DataFrame(OrderedDict([('氏名', test_name) ,
                    ('所属', np.vectorize(lambda x: label[x])(np.argmax(test_y, 1))),
                    ('予測', np.vectorize(lambda x: label[x])(np.argmax(pred_y, 1))),
                    (label[0], pred_y[:, 0]),
                    (label[1], pred_y[:, 1]),
                    (label[2], pred_y[:, 2]),
                    (label[3], pred_y[:, 3]),
                    (label[4], pred_y[:, 4]),
                    (label[5], pred_y[:, 5])]))

In [4]:
display(res_df)

Unnamed: 0,氏名,所属,予測,自,民,維,公,次,共
0,牧野＝次郎,共,共,0.0,0.003,0.0,0.0,0.0,0.997
1,小浜＝一輝,共,共,0.0,0.002,0.0,0.0,0.0,0.998
2,岸＝信夫,自,自,0.992,0.001,0.0,0.003,0.003,0.0
3,畠中＝光成,維,維,0.0,0.009,0.991,0.0,0.0,0.0
4,佐々木＝紀,自,自,0.988,0.0,0.0,0.0,0.012,0.0
5,和泉＝信丈,共,共,0.0,0.001,0.0,0.0,0.0,0.999
6,西村＝真悟,次,次,0.044,0.001,0.001,0.0,0.954,0.0
7,江崎＝鉄磨,自,自,0.771,0.078,0.069,0.063,0.011,0.008
8,小里＝泰弘,自,自,0.979,0.002,0.0,0.015,0.004,0.0
9,簗＝和生,自,自,0.856,0.03,0.042,0.002,0.071,0.0


In [5]:
print('F値: %f' % f1_score(np.argmax(test_y, 1), np.argmax(pred_y, 1), average='macro'))

F値: 0.860267
