NPYファイルを読み込みRVMで線形補間<br>
PTHファイルを読み込みNPYに

In [9]:
# 必要なライブラリをインポートする
import numpy as np
import os
import torch
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from tqdm import notebook
import joblib

#DINOの出力PATH
DATASET_PATH = "DINO_OUTPUT/NonCenter0326_5epo"
file_type = "test"

feature_data_pth = torch.load(os.path.join(DATASET_PATH, file_type+"feat.pth"))
label_data_pth = torch.load(os.path.join(DATASET_PATH, file_type+"labels.pth"))
np.save(os.path.join(DATASET_PATH, file_type+"feat.npy"), feature_data_pth)
np.save(os.path.join(DATASET_PATH, file_type+"labels.npy"), label_data_pth)

feature_data_npy = np.load(os.path.join(DATASET_PATH, file_type+"feat.npy"))
label_data_npy = np.load(os.path.join(DATASET_PATH, file_type+"labels.npy"))

# データを読み込む
feature_data_npy = np.load(os.path.join(DATASET_PATH, file_type+"feat.npy"))
label_data_npy = np.load(os.path.join(DATASET_PATH, file_type+"labels.npy"))

# データを正規化する（重要な前処理手順）
scaler = StandardScaler()
feature_data_npy = scaler.fit_transform(feature_data_npy)

# 訓練データとテストデータに分割する
X_train, X_test, y_train, y_test = train_test_split(feature_data_npy, label_data_npy, test_size=0.2, random_state=42)

print("feature shape", feature_data_npy.shape)
print("Train shape:", X_train.shape, "Test shape", X_test.shape)


feature shape (14320, 768)
Train shape: (11456, 768) Test shape (2864, 768)


SVR学習

In [4]:
# SVRモデルを作成する
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# プログレスバーを表示しながらモデルを訓練する
num_epochs = 100
for epoch in notebook.tqdm(range(num_epochs), desc=file_type+"ing"):
    svr_model.fit(X_train, y_train)

# テストデータでモデルを評価する
predictions = svr_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)

#model 保存
save_path = os.path.join(DATASET_PATH, "SVR")
if os.path.exists(save_path)==False:
    os.mkdir(save_path)
joblib.dump(svr_model, os.path.join(save_path, file_type+'svr_model.pkl'))

testing:   0%|          | 0/100 [00:00<?, ?it/s]

学習後にモデル検証

In [17]:
# モデルの読み込み
save_path = os.path.join(DATASET_PATH, "SVR")
file_path = os.path.join(save_path, 'svr_model.pkl')
model = joblib.load(file_path)

# データを読み込む
file_type = "test"
feature_data_npy = np.load(os.path.join(DATASET_PATH, file_type+"feat.npy"))
label_data_npy = np.load(os.path.join(DATASET_PATH, file_type+"labels.npy"))

# データを正規化する（重要な前処理手順）
scaler = StandardScaler()
feature_data_npy = scaler.fit_transform(feature_data_npy)

#推論
from tqdm import tqdm
predictions = []
for i in tqdm(range(len(feature_data_npy)), desc="Predicting"):
    prediction = model.predict([feature_data_npy[i]])
    predictions.append(prediction)
    
print("TEST data shape", feature_data_npy.shape)
print("prediction shape", len(predictions))

mse = mean_squared_error(label_data_npy, predictions)

print(predictions)
print(y_test)
print("Mean Squared Error:", mse)


Predicting: 100%|██████████| 14320/14320 [00:39<00:00, 360.95it/s]


TEST data shape (14320, 768)
prediction shape 14320
[array([0.335295]), array([-0.0675097]), array([0.15200466]), array([0.46137914]), array([0.55829383]), array([0.21300977]), array([-0.24704032]), array([2.13082876]), array([2.46941918]), array([0.2218905]), array([0.10952322]), array([0.51964046]), array([0.83722606]), array([0.30060963]), array([0.06244054]), array([0.47411227]), array([1.32333201]), array([0.50957593]), array([1.75146556]), array([1.3918828]), array([-0.22102323]), array([-0.09250577]), array([1.18344325]), array([1.23075596]), array([0.39881739]), array([-0.0923407]), array([1.34227973]), array([1.8797779]), array([1.71417757]), array([1.29902477]), array([2.06367117]), array([0.33138563]), array([0.30181794]), array([2.6534091]), array([0.17022421]), array([0.59677949]), array([0.57012467]), array([0.52166432]), array([0.22795674]), array([-0.50592503]), array([1.53877678]), array([0.05998645]), array([0.92992176]), array([-0.2839662]), array([1.66660941]), arra