In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

print(tf.__version__)

2.4.1


In [2]:
# csvファイルからPandas DataFrameへ読み込み
train = pd.read_csv('train.csv', delimiter=',', low_memory=False)
submit_data = pd.read_csv('test.csv', delimiter=',', low_memory=False)

In [3]:
#冒頭を表示
train.head()

Unnamed: 0,id,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,target
0,0,0,0,6,1,0,0,0,0,7,...,0,0,0,0,0,0,2,0,0,Class_6
1,1,0,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,0,1,0,Class_6
2,2,0,0,0,0,0,1,0,3,0,...,0,0,0,0,1,0,0,0,0,Class_2
3,3,0,0,7,0,1,5,2,2,0,...,0,4,0,2,2,0,4,3,0,Class_8
4,4,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Class_2


In [4]:
#データの形を確認
train.shape

(200000, 77)

In [5]:
#データのタイプを確認
train.dtypes

id             int64
feature_0      int64
feature_1      int64
feature_2      int64
feature_3      int64
               ...  
feature_71     int64
feature_72     int64
feature_73     int64
feature_74     int64
target        object
Length: 77, dtype: object

In [6]:
#trainの基本的統計量を表示
train.describe(include='all').transpose()

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
id,200000,,,,99999.5,57735.2,0,49999.8,99999.5,149999,199999
feature_0,200000,,,,0.97271,3.94184,0,0,0,1,61
feature_1,200000,,,,1.16837,3.99341,0,0,0,1,51
feature_2,200000,,,,2.21932,6.47657,0,0,0,1,64
feature_3,200000,,,,2.29673,7.55186,0,0,0,1,70
...,...,...,...,...,...,...,...,...,...,...,...
feature_71,200000,,,,0.806895,2.45874,0,0,0,1,30
feature_72,200000,,,,1.28293,4.26142,0,0,0,1,61
feature_73,200000,,,,2.94021,10.7847,0,0,0,1,130
feature_74,200000,,,,0.632005,3.92531,0,0,0,0,52


In [7]:
#trainのtargetをカテゴリーに変換
train.target = train.target.astype('category')

train.dtypes

id               int64
feature_0        int64
feature_1        int64
feature_2        int64
feature_3        int64
                ...   
feature_71       int64
feature_72       int64
feature_73       int64
feature_74       int64
target        category
Length: 77, dtype: object

In [8]:
# ラベルエンコーディング（LabelEncoder）
le = LabelEncoder()
encoded = le.fit_transform(train.target.values)
decoded = le.inverse_transform(encoded)
train.target = encoded

#冒頭を表示して確認
train.target.head()

0    5
1    5
2    1
3    7
4    1
Name: target, dtype: int64

In [9]:
# 遺伝子座に基づいて特徴量抽出する
drop_count = 0
dna = np.array([1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0])
input_num = np.sum(dna)
for i, d in enumerate(dna):
    if d == 0:
        train = train.drop(train.columns[[i+1-drop_count]], axis=1)
        drop_count += 1

# 冒頭を表示して確認
train.head()

Unnamed: 0,id,feature_0,feature_2,feature_4,feature_5,feature_6,feature_7,feature_8,feature_10,feature_12,...,feature_62,feature_63,feature_64,feature_66,feature_67,feature_68,feature_69,feature_70,feature_72,target
0,0,0,6,0,0,0,0,7,0,3,...,1,0,0,0,0,0,0,0,2,5
1,1,0,0,0,0,0,0,0,0,1,...,0,0,0,2,0,0,0,0,0,5
2,2,0,0,0,1,0,3,0,1,0,...,2,0,0,0,0,0,0,1,0,1
3,3,0,7,1,5,2,2,0,2,5,...,7,0,0,0,4,0,2,2,4,7
4,4,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [10]:
#訓練データを分割する
train_x, train_y = train.drop(['target'], axis=1).drop(['id'], axis=1), train.target

In [11]:
#trainデータを学習データと検証データに分割する
x_train, x_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2)

#冒頭を表示して確認
x_train.head()

Unnamed: 0,feature_0,feature_2,feature_4,feature_5,feature_6,feature_7,feature_8,feature_10,feature_12,feature_13,...,feature_58,feature_62,feature_63,feature_64,feature_66,feature_67,feature_68,feature_69,feature_70,feature_72
90017,0,0,0,0,0,0,1,1,2,0,...,0,0,0,0,0,0,0,0,1,0
81636,1,2,2,2,2,0,0,0,7,0,...,4,2,0,1,0,1,0,0,4,0
109466,0,0,0,1,0,0,5,0,8,0,...,0,0,0,0,0,0,0,0,0,0
189641,2,0,0,2,0,1,0,0,0,0,...,0,0,0,0,0,0,0,2,0,0
165137,0,0,0,3,0,0,0,0,1,0,...,0,3,0,0,0,2,4,2,0,0


In [12]:
#モデルを構築
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(input_num,)),
    keras.layers.Dense(387, activation='relu'),
    keras.layers.Dropout(0.32),
    keras.layers.Dense(364, activation='relu'),
    keras.layers.Dropout(0.18),
    keras.layers.Dense(9, activation='softmax')
])

In [13]:
#モデルをコンパイルする
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [14]:
#学習させる
model.fit(x_train, y_train, batch_size=1460, epochs=283)

Epoch 1/283
Epoch 2/283
Epoch 3/283
Epoch 4/283
Epoch 5/283
Epoch 6/283
Epoch 7/283
Epoch 8/283
Epoch 9/283
Epoch 10/283
Epoch 11/283
Epoch 12/283
Epoch 13/283
Epoch 14/283
Epoch 15/283
Epoch 16/283
Epoch 17/283
Epoch 18/283
Epoch 19/283
Epoch 20/283
Epoch 21/283
Epoch 22/283
Epoch 23/283
Epoch 24/283
Epoch 25/283
Epoch 26/283
Epoch 27/283
Epoch 28/283
Epoch 29/283
Epoch 30/283
Epoch 31/283
Epoch 32/283
Epoch 33/283
Epoch 34/283
Epoch 35/283
Epoch 36/283
Epoch 37/283
Epoch 38/283
Epoch 39/283
Epoch 40/283
Epoch 41/283
Epoch 42/283
Epoch 43/283
Epoch 44/283
Epoch 45/283
Epoch 46/283
Epoch 47/283
Epoch 48/283
Epoch 49/283
Epoch 50/283
Epoch 51/283
Epoch 52/283
Epoch 53/283
Epoch 54/283
Epoch 55/283
Epoch 56/283
Epoch 57/283
Epoch 58/283
Epoch 59/283
Epoch 60/283
Epoch 61/283
Epoch 62/283
Epoch 63/283
Epoch 64/283
Epoch 65/283
Epoch 66/283
Epoch 67/283
Epoch 68/283
Epoch 69/283
Epoch 70/283
Epoch 71/283
Epoch 72/283
Epoch 73/283
Epoch 74/283
Epoch 75/283
Epoch 76/283
Epoch 77/283
Epoch 78

Epoch 81/283
Epoch 82/283
Epoch 83/283
Epoch 84/283
Epoch 85/283
Epoch 86/283
Epoch 87/283
Epoch 88/283
Epoch 89/283
Epoch 90/283
Epoch 91/283
Epoch 92/283
Epoch 93/283
Epoch 94/283
Epoch 95/283
Epoch 96/283
Epoch 97/283
Epoch 98/283
Epoch 99/283
Epoch 100/283
Epoch 101/283
Epoch 102/283
Epoch 103/283
Epoch 104/283
Epoch 105/283
Epoch 106/283
Epoch 107/283
Epoch 108/283
Epoch 109/283
Epoch 110/283
Epoch 111/283
Epoch 112/283
Epoch 113/283
Epoch 114/283
Epoch 115/283
Epoch 116/283
Epoch 117/283
Epoch 118/283
Epoch 119/283
Epoch 120/283
Epoch 121/283
Epoch 122/283
Epoch 123/283
Epoch 124/283
Epoch 125/283
Epoch 126/283
Epoch 127/283
Epoch 128/283
Epoch 129/283
Epoch 130/283
Epoch 131/283
Epoch 132/283
Epoch 133/283
Epoch 134/283
Epoch 135/283
Epoch 136/283
Epoch 137/283
Epoch 138/283
Epoch 139/283
Epoch 140/283
Epoch 141/283
Epoch 142/283
Epoch 143/283
Epoch 144/283
Epoch 145/283
Epoch 146/283
Epoch 147/283
Epoch 148/283
Epoch 149/283
Epoch 150/283
Epoch 151/283
Epoch 152/283
Epoch 153/2

Epoch 160/283
Epoch 161/283
Epoch 162/283
Epoch 163/283
Epoch 164/283
Epoch 165/283
Epoch 166/283
Epoch 167/283
Epoch 168/283
Epoch 169/283
Epoch 170/283
Epoch 171/283
Epoch 172/283
Epoch 173/283
Epoch 174/283
Epoch 175/283
Epoch 176/283
Epoch 177/283
Epoch 178/283
Epoch 179/283
Epoch 180/283
Epoch 181/283
Epoch 182/283
Epoch 183/283
Epoch 184/283
Epoch 185/283
Epoch 186/283
Epoch 187/283
Epoch 188/283
Epoch 189/283
Epoch 190/283
Epoch 191/283
Epoch 192/283
Epoch 193/283
Epoch 194/283
Epoch 195/283
Epoch 196/283
Epoch 197/283
Epoch 198/283
Epoch 199/283
Epoch 200/283
Epoch 201/283
Epoch 202/283
Epoch 203/283
Epoch 204/283
Epoch 205/283
Epoch 206/283
Epoch 207/283
Epoch 208/283
Epoch 209/283
Epoch 210/283
Epoch 211/283
Epoch 212/283
Epoch 213/283
Epoch 214/283
Epoch 215/283
Epoch 216/283
Epoch 217/283
Epoch 218/283
Epoch 219/283
Epoch 220/283
Epoch 221/283
Epoch 222/283
Epoch 223/283
Epoch 224/283
Epoch 225/283
Epoch 226/283
Epoch 227/283
Epoch 228/283
Epoch 229/283
Epoch 230/283
Epoch 

Epoch 238/283
Epoch 239/283
Epoch 240/283
Epoch 241/283
Epoch 242/283
Epoch 243/283
Epoch 244/283
Epoch 245/283
Epoch 246/283
Epoch 247/283
Epoch 248/283
Epoch 249/283
Epoch 250/283
Epoch 251/283
Epoch 252/283
Epoch 253/283
Epoch 254/283
Epoch 255/283
Epoch 256/283
Epoch 257/283
Epoch 258/283
Epoch 259/283
Epoch 260/283
Epoch 261/283
Epoch 262/283
Epoch 263/283
Epoch 264/283
Epoch 265/283
Epoch 266/283
Epoch 267/283
Epoch 268/283
Epoch 269/283
Epoch 270/283
Epoch 271/283
Epoch 272/283
Epoch 273/283
Epoch 274/283
Epoch 275/283
Epoch 276/283
Epoch 277/283
Epoch 278/283
Epoch 279/283
Epoch 280/283
Epoch 281/283
Epoch 282/283
Epoch 283/283


<tensorflow.python.keras.callbacks.History at 0x7fdcdad2b4c0>

In [15]:
# 検証データへの精度
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)

print('\nTest accuracy:', test_acc)

1250/1250 - 1s - loss: 1.8566 - accuracy: 0.3282

Test accuracy: 0.3282249867916107


In [19]:
#提出データを適用できる形にする
# 遺伝子座に基づいて特徴量抽出する
drop_count = 0
for i, d in enumerate(dna):
    if d == 0:
        submit_data = submit_data.drop(submit_data.columns[[i+1-drop_count]], axis=1)
        drop_count += 1

# 冒頭を表示して確認
submit_data.head()

Unnamed: 0,id,feature_0,feature_2,feature_4,feature_5,feature_6,feature_7,feature_8,feature_10,feature_12,...,feature_58,feature_62,feature_63,feature_64,feature_66,feature_67,feature_68,feature_69,feature_70,feature_72
0,200000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,200001,1,0,0,0,0,0,0,1,2,...,0,7,0,1,1,3,0,0,0,3
2,200002,0,7,0,0,0,0,6,1,0,...,2,0,0,0,0,0,0,0,3,2
3,200003,0,0,3,1,0,0,0,7,0,...,1,0,0,4,0,0,1,0,0,4
4,200004,0,5,0,0,0,0,0,1,2,...,0,0,0,0,0,0,0,0,0,0


In [20]:
#提出データを適用できる形にする
x_submit = submit_data.drop(['id'], axis=1)

#冒頭を表示して確認
x_submit.head()

Unnamed: 0,feature_0,feature_2,feature_4,feature_5,feature_6,feature_7,feature_8,feature_10,feature_12,feature_13,...,feature_58,feature_62,feature_63,feature_64,feature_66,feature_67,feature_68,feature_69,feature_70,feature_72
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,1,2,2,...,0,7,0,1,1,3,0,0,0,3
2,0,7,0,0,0,0,6,1,0,0,...,2,0,0,0,0,0,0,0,3,2
3,0,0,3,1,0,0,0,7,0,0,...,1,0,0,4,0,0,1,0,0,4
4,0,5,0,0,0,0,0,1,2,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
#提出データを識別する
predictions = model.predict(x_submit)

#データの形を確認
predictions.shape

(100000, 9)

In [22]:
#結果を提出形式に変形
df_predictions = pd.DataFrame(predictions)
df_predictions.columns = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
submit_data = pd.concat([submit_data.id,df_predictions],axis=1)
submit_data = submit_data.set_index('id')
submit_data.head()

Unnamed: 0_level_0,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
200000,0.086867,0.349243,0.144604,0.028992,0.011927,0.173033,0.025344,0.062861,0.11713
200001,0.029691,0.043544,0.034429,0.027634,0.010282,0.243266,0.082336,0.398771,0.130047
200002,0.01159,0.179997,0.040725,0.003332,0.000941,0.539751,0.063115,0.072061,0.088488
200003,0.011402,0.245297,0.029195,0.024251,0.059654,0.219073,0.044584,0.257629,0.108915
200004,0.031247,0.137974,0.03562,0.0096,0.016679,0.335124,0.067292,0.290975,0.075488


In [23]:
# CSVファイルとして出力
submit_data.to_csv("submission_NN_3.csv")