In [None]:
import os, sys
project_root_dir = os.path.join(os.getcwd(),'../..')
if project_root_dir not in sys.path:
    sys.path.append(project_root_dir)

import config
import numpy as np

# wavelength
from scipy.io import loadmat

mat = loadmat('/home/abian/Data/Dataset/IUMA/Experimento (Abian)/preProcessedWavelength.mat')
wv = mat['preProcessedVnirWl']

from utils import get_data, plot_boxplot, outlier_removal

In [None]:
dataset_dir = os.path.join(config.BRAIN_HSI_DIR, 'original')
save_dir =  os.path.join(config.BRAIN_HSI_DIR, 'preprocessed')

from matplotlib import pyplot as plt
import pandas as pd

files = ['VNIRimagesOp8C1', 'VNIRimagesOp8C2', 'VNIRimagesOp12C1', 'VNIRimagesOp12C2', 'VNIRimagesOp15C1', 'VNIRimagesOp20C1']
for file in files:
    X, y = get_data(os.path.join(dataset_dir, '{}.mat'.format(file)))
    X_df, y_df = pd.DataFrame(X), pd.DataFrame(y)
    fig = plot_boxplot(X_df, labels=np.round(wv.flatten(), 0).astype(int), n_ticks=12, figsize=(16,8))
    fig.savefig(os.path.join(save_dir, 'imgs/outliers', '{}.pdf'.format(file)), bbox_inches='tight')

    X_df, idx = outlier_removal(X_df)
    y_df = y_df[~idx]
    X_df = X_df.reset_index(drop=True)
    y_df = y_df.reset_index(drop=True)

    fig = plot_boxplot(X_df, labels=np.round(wv.flatten(), 0).astype(int), n_ticks=12, figsize=(16,8))
    fig.savefig(os.path.join(save_dir, 'imgs/no_outliers', '{}.pdf'.format(file)), bbox_inches='tight')

    subject_save_dir = os.path.join(save_dir, 'data/no_outliers/{}'.format(file))
    if not os.path.exists(subject_save_dir):
        os.makedirs(subject_save_dir)

    X_df.to_csv(os.path.join(subject_save_dir, 'X.csv'), index=False)
    y_df.to_csv(os.path.join(subject_save_dir, 'y.csv'), index=False)

In [None]:
def plot_mean_std(X_df, labels = None, n_ticks=12, figsize=(8,6)):
    mean = X_df.mean().values
    std = X_df.std().values

    fig = plt.figure(figsize=figsize)
    plt.plot(mean)
    plt.fill_between(np.arange(len(std)), mean-std, mean+std, alpha=0.2)

    ticks = np.linspace(0, len(X_df.columns)-1, n_ticks, dtype=int)
    plt.xticks(ticks, labels[ticks] if labels is not None else None)

    plt.tick_params(axis='x', labelrotation=45, labelsize='large')
    plt.tick_params(axis='y', labelsize='large')

    plt.ylabel('Reflectance', fontsize='x-large')
    plt.xlabel('Wavelength (nm)', fontsize='x-large')
    
    plt.margins(0)
    plt.grid(True)
    
    return fig

for file in files:
    X_df = pd.read_csv(os.path.join(save_dir, 'data/no_outliers/{}/X.csv'.format(file)))
    mean = X_df.mean().values
    std = X_df.std().values

    fig = plot_mean_std(X_df, labels=np.round(wv.flatten(), 0).astype(int), n_ticks=12, figsize=(8,5))
    fig.savefig(os.path.join(save_dir, 'imgs/no_outliers/mean_std', '{}.pdf'.format(file)), bbox_inches='tight')

    # save mean and std to csv
    mean_std_df = pd.DataFrame({'mean': mean, 'std': std})
    mean_std_df.to_csv(os.path.join(save_dir, 'data/no_outliers/{}/mean_std.csv'.format(file)), index=False)


In [None]:
std = X_df.std(axis=0).to_numpy()
mean = X_df.mean(axis=0).to_numpy()

plt.figure(figsize=(12, 8))
plt.plot(mean)
plt.fill_between(np.arange(len(std)), mean-std, mean+std, alpha=0.2)
ticks = np.linspace(0, len(X_df.columns)-1, 12, dtype=int)
plt.xticks(ticks)
# set x ticks labels
# plt.xticks(ticks, config.WAVELENGTH[ticks], rotation=45, horizontalalignment='right', fontsize='large')
plt.xlabel('Wavelength (nm)', fontsize='x-large')
plt.ylabel('Reflectance', fontsize='x-large')
plt.show()