In [1]:
import torch
from torch.utils.data import DataLoader
from torchmetrics import MeanAbsoluteError, MeanSquaredError
import numpy as np
from data.perovskite_dataset import PerovskiteDataset1d
from data.augmentations import *

from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
from matplotlib import cm

## KNN

In [10]:
data_dir = '/home/s522r/Desktop/perovskite/new_data/2021_KIT_PerovskiteDeposition/preprocessed'

val_mse_folds = []
val_mae_folds = []

for fold_nb in range(5):
    print('Fold: ', fold_nb)
    
    train_mean, train_std = PerovskiteDataset1d(data_dir=data_dir, transform=None, fold=fold_nb,
                                                                  split='train', label='PCE_mean').get_stats()
    
    trainset = PerovskiteDataset1d(data_dir=data_dir,
                                           transform=normalize_1d(train_mean, train_std), fold=fold_nb,
                                           split='train', label='PCE_mean', val=False)
    trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=False)
    
    valset = PerovskiteDataset1d(data_dir=data_dir,
                                         transform=normalize_1d(train_mean, train_std), fold=fold_nb,
                                         split='train', label='PCE_mean', val=True)
    valloader = DataLoader(valset, batch_size=len(valset), shuffle=False)
    
    for timeseries, label in trainloader:
        
        time = timeseries.numpy().reshape(-1,4*719)#[:,3,:]
        
        knn = KNeighborsRegressor(n_neighbors=20)
        knn.fit(time, label.numpy())
        
    for timeseries, label in valloader:
        
        time = timeseries.numpy().reshape(-1,4*719)#[:,3,:]
        scores = knn.predict(time)
        
        mse = MeanSquaredError()
        mae = MeanAbsoluteError()
        
        val_mse = mse(torch.from_numpy(scores), label)
        print('MSE: ',val_mse)
        val_mse_folds.append(val_mse)
        
        val_mae = mae(torch.from_numpy(scores), label)
        print('MAE:',val_mae)
        val_mae_folds.append(val_mae)
    
    
print('Mean MSE: ', np.mean(val_mse_folds))
print('Mean MAE: ', np.mean(val_mae_folds))

Fold:  0
MSE:  tensor(0.0221)
MAE: tensor(0.1102)
Fold:  1
MSE:  tensor(0.0047)
MAE: tensor(0.0512)
Fold:  2
MSE:  tensor(0.0104)
MAE: tensor(0.0763)
Fold:  3
MSE:  tensor(0.0151)
MAE: tensor(0.0811)
Fold:  4
MSE:  tensor(0.0136)
MAE: tensor(0.0805)
Mean MSE:  0.013186842
Mean MAE:  0.07985188


## KMeans

In [78]:
data_dir = '/home/s522r/Desktop/perovskite/new_data/2021_KIT_PerovskiteDeposition/preprocessed'

val_mse_folds = []
val_mae_folds = []

for fold_nb in range(5):
    print('Fold: ', fold_nb)
    
    train_mean, train_std = PerovskiteDataset1d(data_dir=data_dir, transform=None, fold=fold_nb,
                                                                  split='train', label='PCE_mean').get_stats()
    
    trainset = PerovskiteDataset1d(data_dir=data_dir,
                                           transform=normalize_1d(train_mean, train_std), fold=fold_nb,
                                           split='train', label='PCE_mean', val=False)
    trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=False)
    
    valset = PerovskiteDataset1d(data_dir=data_dir,
                                         transform=normalize_1d(train_mean, train_std), fold=fold_nb,
                                         split='train', label='PCE_mean', val=True)
    valloader = DataLoader(valset, batch_size=len(valset), shuffle=False)
    
    for timeseries, label in trainloader:
        
        nb_clusters = 10
        
        time = timeseries.numpy().reshape(-1,4*719)
        label = label.numpy()
        
        kmeans = KMeans(nb_clusters)
        clusters = kmeans.fit_predict(time)
        
        cluster_means = []
        for i in range(nb_clusters):
            cluster_means.append(label[(clusters==i)].mean())
        
    for timeseries, label in valloader:
        
        time = timeseries.numpy().reshape(-1,4*719)
        
        val_clusters = kmeans.predict(time)
        scores = np.array([cluster_means[i] for i in val_clusters])
        
        mse = MeanSquaredError()
        mae = MeanAbsoluteError()
        
        val_mse = mse(torch.from_numpy(scores), label)
        print('MSE: ',val_mse)
        val_mse_folds.append(val_mse)
        
        val_mae = mae(torch.from_numpy(scores), label)
        print('MAE:',val_mae)
        val_mae_folds.append(val_mae)
        
    
print('Mean MSE: ', np.mean(val_mse_folds))
print('Mean MAE: ', np.mean(val_mae_folds))

Fold:  0
MSE:  tensor(0.0211)
MAE: tensor(0.1085)
Fold:  1
MSE:  tensor(0.0068)
MAE: tensor(0.0619)
Fold:  2
MSE:  tensor(0.0146)
MAE: tensor(0.0954)
Fold:  3
MSE:  tensor(0.0182)
MAE: tensor(0.0920)
Fold:  4
MSE:  tensor(0.0168)
MAE: tensor(0.0918)
Mean MSE:  0.015525341
Mean MAE:  0.08992393
