In [15]:
# ライブラリのインポート
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


# deviceの設定 (cpu or gpu)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [41]:
# data.csvをインポート
df = pd.read_csv('dataframe.csv')
print(df.shape)
print(df.head())
# データのサイズを確認
print(df.shape)
# 型を確認
type(df)

(25000, 101)
     0    1    2    3    4    5    6    7    8    9  ...   91   92   93   94  \
0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0 -1.0   
1 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0 -1.0   
2 -1.0 -1.0 -1.0  1.0  1.0  1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0  1.0  1.0   
3 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ...  1.0  1.0  1.0  1.0   
4 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0 -1.0   

    95   96   97   98   99   100  
0 -1.0 -1.0 -1.0 -1.0 -1.0  0.01  
1 -1.0 -1.0 -1.0 -1.0 -1.0  0.01  
2  1.0 -1.0 -1.0 -1.0 -1.0  0.01  
3  1.0  1.0  1.0  1.0  1.0  0.01  
4 -1.0 -1.0 -1.0 -1.0 -1.0  0.01  

[5 rows x 101 columns]
(25000, 101)


pandas.core.frame.DataFrame

In [43]:
# 教師データを分離する
data = df.iloc[:, 0: 100]
teacher_data = df.iloc[:, 100]
print(data)
print(teacher_data)

         0    1    2    3    4    5    6    7    8    9  ...   90   91   92  \
0     -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0   
1     -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0   
2     -1.0 -1.0 -1.0  1.0  1.0  1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0   
3     -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ...  1.0  1.0  1.0   
4     -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  ... -1.0 -1.0 -1.0   
...    ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
24995 -1.0 -1.0 -1.0  1.0 -1.0  1.0 -1.0  1.0 -1.0 -1.0  ...  1.0 -1.0 -1.0   
24996 -1.0 -1.0 -1.0 -1.0 -1.0  1.0  1.0  1.0  1.0  1.0  ...  1.0 -1.0 -1.0   
24997 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0  1.0  1.0 -1.0  1.0  ...  1.0 -1.0 -1.0   
24998 -1.0 -1.0 -1.0  1.0  1.0 -1.0 -1.0  1.0  1.0 -1.0  ... -1.0 -1.0 -1.0   
24999  1.0  1.0  1.0  1.0  1.0 -1.0 -1.0  1.0 -1.0  1.0  ... -1.0  1.0 -1.0   

        93   94   95   96   97   98   99  
0     -1

In [45]:
# pytorchで機械学習を行うためデータの型を
tensor_df = torch.tensor(df.values, dtype=torch.float32)
# 型を確認
print(type(tensor_df))
# 訓練用データとテスト用データに比率5:5でランダムに分割
X_train, X_test, y_train, y_test = train_test_split(data, teacher_data, train_size=0.5)

<class 'torch.Tensor'>


ここまでで機械学習用のデータを作成することができた．以降，RBMの実装を行っていく