In [1]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
dir_path = '/content/drive/Othercomputers/macbook-air/TPS-Feb2022/notebooks'
os.chdir(dir_path)

In [4]:
! pip install japanize-matplotlib
! pip install shap

Collecting japanize-matplotlib
  Downloading japanize-matplotlib-1.1.3.tar.gz (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 5.2 MB/s 
Building wheels for collected packages: japanize-matplotlib
  Building wheel for japanize-matplotlib (setup.py) ... [?25l[?25hdone
  Created wheel for japanize-matplotlib: filename=japanize_matplotlib-1.1.3-py3-none-any.whl size=4120275 sha256=f4f782f9c621b8f0327e2a98290e919cade0458a72e219345c54200f3e34f795
  Stored in directory: /root/.cache/pip/wheels/83/97/6b/e9e0cde099cc40f972b8dd23367308f7705ae06cd6d4714658
Successfully built japanize-matplotlib
Installing collected packages: japanize-matplotlib
Successfully installed japanize-matplotlib-1.1.3
Collecting shap
  Downloading shap-0.40.0-cp37-cp37m-manylinux2010_x86_64.whl (564 kB)
[K     |████████████████████████████████| 564 kB 5.2 MB/s 
Collecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap
Successfully installed shap-

In [5]:
import glob
import sys,os
import json
import pprint
import time
import re
import datetime
import pickle
import string
import gc
import warnings
import yaml
import os
warnings.filterwarnings("ignore")
sys.path.append(os.pardir)
sys.path.append('../..')
sys.path.append('../../..')

import numpy as np
import pandas as pd
import pandas_profiling as pdp
import matplotlib.pyplot as plt
import japanize_matplotlib # 日本語対応
import seaborn as sns
# pandasのオプション
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_colwidth', 5000)
pd.options.display.float_format = '{:.3f}'.format
%matplotlib inline
# sns.set_style('whitegrid')
plt.style.use('fivethirtyeight')

from joblib import Parallel, delayed # よりお手軽にサクっと並列処理を実行出来るモジュール
from tqdm import tqdm, tqdm_notebook # プログレスバーを表示できる
from PIL import Image
tqdm.pandas()

# 外部モジュールを自動的にリロードする
%load_ext autoreload
%autoreload 2


# パスの定義
CONFIG_FILE = '../configs/config.yaml'

with open(CONFIG_FILE) as file:
    yml = yaml.load(file)
MODEL_DIR_NAME = yml['SETTING']['MODEL_DIR_NAME']
FEATURE_DIR_NAME = yml['SETTING']['FEATURE_DIR_NAME']
RAW_DIR_NAME = yml['SETTING']['RAW_DIR_NAME']

## 特徴量作成

In [6]:
# train = pd.read_csv(RAW_DIR_NAME + 'train.csv')
# test =  pd.read_csv(RAW_DIR_NAME + 'test.csv')

In [7]:
# ! python ../code/generate_feature.py

## 予測

In [8]:
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

In [9]:
! python ../code/run.py

[2022-02-15 10:18:36] - DEBUG MODE False
[2022-02-15 10:18:36] - keras_0215_1018 - train_x shape: (200000, 286)
[2022-02-15 10:18:36] - keras_0215_1018 - train_y shape: (200000, 10)
[2022-02-15 10:18:36] - keras_0215_1018 - start training cv
[2022-02-15 10:18:36] - keras_0215_1018 - cv method: KFold
[2022-02-15 10:18:36] - keras_0215_1018 fold 0 - start training
2022-02-15 10:18:37.942248: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/1

## データ確認

In [None]:
def load_datasets_train(feats):
    dfs = [pd.read_pickle(FEATURE_DIR_NAME + f'{f}_train.pkl') for f in feats]
    X_train = pd.concat(dfs, axis=1)
    return X_train

def load_datasets_both(feats):
    dfs = [pd.read_pickle(FEATURE_DIR_NAME + f'{f}_train.pkl') for f in feats]
    X_train = pd.concat(dfs, axis=1)
    dfs = [pd.read_pickle(FEATURE_DIR_NAME + f'{f}_test.pkl') for f in feats]
    X_test = pd.concat(dfs, axis=1)
    return X_train, X_test

# 欠損値の確認
def missing_values_table(data):
    total = data.isnull().sum()
    percent = (data.isnull().sum()/data.isnull().count()*100)
    tt = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    types = []
    for col in data.columns:
        dtype = str(data[col].dtype)
        types.append(dtype)
    tt['Types'] = types
    return(np.transpose(tt))

In [None]:
from keras.utils import np_utils

dfs = [pd.read_pickle(FEATURE_DIR_NAME + f'{f}_train.pkl') for f in ['target']]
train_y = pd.concat(dfs, axis=1)
train_y = np_utils.to_categorical(train_y)

In [None]:
train_x = pd.read_pickle(FEATURE_DIR_NAME + 'rawdata_train.pkl')

In [None]:
# train_y = pd.DataFrame(train_y)
np.argmax(np.array(train_y), axis=1)

array([0, 1, 1, ..., 5, 6, 9])

In [None]:
train_y.query()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
4,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...
199995,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
199996,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
199997,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000
199998,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000


In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import np_utils
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
 
'''
データ準備
'''
np.random.seed(0) # 乱数を固定値で初期化し再現性を持たせる
 
iris = datasets.load_iris()
X = iris.data
T = iris.target
 
T = np_utils.to_categorical(T) # 数値を、位置に変換 [0,1,2] ==> [ [1,0,0],[0,1,0],[0,0,1] ]
tr_x, va_x, tr_y, va_y = train_test_split(train_x, train_y, train_size=0.8, test_size=0.2) # 訓練とテストで分割
 
'''
モデル作成
'''
model = Sequential()
model.add(Dense(input_dim=286, units=10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1))
 
'''
トレーニング
'''
model.fit(tr_x, tr_y, epochs=1, batch_size=10)
 
'''
学習済みモデルでテストデータで分類する
'''
Y = model.predict_classes(va_x, batch_size=10)
 
'''
結果検証
'''
_, T_index = np.where(va_y > 0) # to_categorical の逆変換
print()
print('RESULT')
print(Y == T_index)




AttributeError: ignored