# Impots

In [None]:
import pandas as pd
import numpy as np
import os, glob
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
from time import time

import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler

plt.rcParams["figure.figsize"] = (22,16)
plt.rcParams['figure.dpi'] = 200

#gpus = tf.config.experimental.list_physical_devices('GPU')
#tf.config.experimental.set_memory_growth(gpus[0], True)

# Functions

In [2]:
def weighted_mean_absolute_percentage_error(true, pred, mean=False):
    true = np.array(true)
    pred = np.array(pred)
    if true.ndim == 1:
        true = true.reshape(1,true.shape[0])
        pred = pred.reshape(1,pred.shape[0])
    wmape = np.abs(pred-true).sum(axis=1) / np.abs(true).sum(axis=1) * 100
    return wmape

In [8]:
def load_train_test_set(data_path, test_path, train_rows=None, test_rows=None):
    if train_rows:
        df = pd.read_csv(data_path, nrows=train_rows)
    else:
        df = pd.read_csv(data_path)
        
    if test_rows:
        test_df = pd.read_csv(test_path, nrows=test_rows)
    else:
        test_df = pd.read_csv(test_path)
    return df, test_df

In [9]:
def load_resolution(df, test_df, all_stk_dim, resolution, xcols, ycols, shift=0):
    mid_point = int(all_stk_dim/2) + shift
    stks = []
    for s in y_cols:
        for i in range(int(mid_point - np.floor(resolution/2)), int(mid_point + np.ceil(resolution/2))):
            stks.append(s+str(i))
    stk_dim = int(len(stks) / 4)
    x = df[xcols]
    y = df[stks]

    xt = test_df[xcols]
    yt = test_df[stks]
    
    return x,y,xt,yt

In [10]:
def scaler(train, kind='standard', transforms=[]):
    if kind == 'standard':
        scaler = StandardScaler()
    elif kind == 'minmax':
        scaler = MinMaxScaler()
    elif kind == 'maxabs':
        scaler = MaxAbsScaler()
    elif kind == 'quantile':
        scaler = QuantileTransformer()
        
    scaler.fit(train)
    results = []
    for transform in transforms:
        results.append(scaler.transform(transform))
    
    return scaler, results

In [11]:
def fully_connected_model(hidden_layers, activation, output, l2):
    model = tf.keras.Sequential()
    for neurons in hidden_layers:
        model.add(tf.keras.layers.Dense(neurons, activation=activation, 
                                        kernel_regularizer=tf.keras.regularizers.l2(l2)))
        
    #model.add(tf.keras.layers.BatchNormalization())
    #model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(output))
    
    return model

def model_train(x_train, y_train, model, loss, epochs, patience, batch_size, optimizer, 
                verbose, scheduler=None):
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    callbacks = [callback]
    if scheduler:
        scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
        callbacks.append(scheduler_callback)
            
    model.compile(loss=loss, optimizer=optimizer, metrics=['mae', 'mape', 'mse'])
    history = model.fit(x_train, y_train, epochs=epochs, verbose=verbose, batch_size=batch_size, 
                        validation_split=0.15, callbacks=callbacks)
    
    return model, history

In [12]:
def make_result_csv(y_test, y_pred):
    e = ['I', 'Q', 'U', 'V']
    data = {'stokes I MSE': [], 'stokes Q MSE': [], 'stokes U MSE': [], 'stokes V MSE': [], 'stokes I WMAPE': [], 'stokes Q WMAPE': [], 'stokes U WMAPE': [], 'stokes V WMAPE': [], 'Amplitude stokes I':[], 'Amplitude stokes Q':[], 'Amplitude stokes U':[], 'Amplitude stokes V':[]}
    for i in range(4):
        wmape = weighted_mean_absolute_percentage_error(y_test[:,i*32:(i+1)*32], y_pred[:,i*32:(i+1)*32])
        mse = tf.keras.metrics.mean_squared_error(y_test[:,i*32:(i+1)*32], y_pred[:,i*32:(i+1)*32]).numpy()
        data['stokes ' + e[i] + ' MSE'] = mse
        data['stokes ' + e[i] + ' WMAPE'] = wmape
        if i != 0:
            data['Amplitude stokes ' + e[i]] = np.max(np.abs(y_test[:,i*32:(i+1)*32]), axis=1)
        else:
            data['Amplitude stokes ' + e[i]] = np.min(np.abs(y_test[:,i*32:(i+1)*32]), axis=1)

    data = pd.DataFrame(data)
    data['WMAPE'] = (data['stokes I WMAPE'] + data['stokes Q WMAPE'] + data['stokes U WMAPE'] + data['stokes V WMAPE'])/4

    return data

# Code

In [None]:
data_path = '../fe6311/'#'../data/'
results_path = '../results/'
size_dataset = int(2e6)
test_size_dataset = int(2e5)

In [None]:
df_high, test_df_high = load_train_test_set(data_path+'cossam_train_data_high.csv', 
                                  data_path+'cossam_test_data_high.csv', 
                                  size_dataset, test_size_dataset)
print('Train shape', df_high.shape)
print('Test shape', test_df_high.shape)

# Preprocessing

In [None]:
cols = ['fmag', 'incl', 'alpha', 'beta', 'gamma', 'y2', 'y3', 'phase'] #vrot
y_cols = ['stki_' , 'stkq_', 'stku_', 'stkv_']

In [None]:
x_train_high, y_train_high, x_test_high, y_test_high = load_resolution(df_high, test_df_high, int((df_high.shape[1] - 11)/5), 32, cols, y_cols)

In [None]:
x_train_high = x_train_high.to_numpy()
x_test_high = x_test_high.to_numpy()
y_train_high = y_train_high.to_numpy()
y_test_high = y_test_high.to_numpy()

In [None]:
high_scalerX, (x_train_high_s, x_test_high_s) = scaler(x_train_high, 'maxabs', [x_train_high, x_test_high])
high_scalerY, (y_train_high_s, y_test_high_s) = scaler(y_train_high, 'standard', [y_train_high, y_test_high])

# Training

In [None]:
size_dataset = x_train_high.shape[0]
opt = tf.keras.optimizers.SGD(learning_rate=1e-1, momentum=0.95, 
                              decay=1/(2 * size_dataset))
model = fully_connected_model([4096]*7, 'relu', y_train_high_s.shape[1], 0)
model, history = model_train(x_train_high_s, y_train_high_s, model, 'mse', 1000, 25, 1024, opt, 1)
model.save('new results/1.3M_maxabs_standard_high.h5')
pred = high_scalerY.inverse_transform(model.predict(x_test_high_s, batch_size=1024))
res = make_result_csv(y_test_high, pred)
res.to_csv('new results/1.3M_maxabs_standard_high.csv', index=False)
res.describe()

model.summary()

In [None]:
get_history(history, 'loss', 1e-2)