In [67]:
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from keras.regularizers import l1, l2
from tensorflow.keras import initializers
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import KFold
import keras_tuner
import keras
from keras import layers
import itertools
from sklearn.model_selection import train_test_split
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.callbacks import LearningRateScheduler
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
import keras.backend as K
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from math import ceil
import pickle

In [68]:
def mee(y_true, y_pred):
    return (y_true - y_pred).square().sum(axis=1).sqrt().mean()

def mse(y_true, y_pred):
    return (y_true - y_pred).square().sum(axis=1).mean()

In [69]:
x_scaler = None
y_scaler = None

def load_dataset(scale=True):
    global x_scaler
    global y_scaler
    
    x_scaler = StandardScaler()
    y_scaler = StandardScaler()
    
    train = pd.read_csv("../data/ML-CUP22-INTERNAL-TR.csv", header=None, sep=",")
    test = pd.read_csv("../data/ML-CUP22-INTERNAL-TS.csv", header=None, sep=",")
    train.drop(0, axis=1, inplace=True)
    test.drop(0, axis=1, inplace=True)

    x_train = train.iloc[:, :9].to_numpy().astype(np.float64)
    y_train = train.iloc[:, 9:].to_numpy().astype(np.float64)
    x_test = test.iloc[:, :9].to_numpy().astype(np.float64)
    y_test = test.iloc[:, 9:].to_numpy().astype(np.float64)
    
    blind_test = pd.read_csv("../data/ML-CUP22-TS.csv", header=None, sep=",")
    blind_test.drop(0, axis=1, inplace=True)

    x_blind_test = blind_test.iloc[:, :9].to_numpy().astype(np.float64)    
    
    if scale:
        x_scaler.fit(x_train)
        x_train = x_scaler.transform(x_train)
        x_test = x_scaler.transform(x_test)
        x_blind_test = x_scaler.transform(x_blind_test)
        
        y_scaler.fit(y_train)
        y_train = y_scaler.transform(y_train)
        y_test = y_scaler.transform(y_test)     
        
    return x_train, y_train, x_test, y_test, x_blind_test

def get_mee(y_true, y_pred):
    return np.mean(np.sqrt(np.sum(np.square(y_true - y_pred), axis=1)))


def get_mlp_pred(model, x):
    global y_scaler
    
    y = model.predict(x)
    y = y_scaler.inverse_transform(y)
    return y

# Loading the data

In [70]:
x_train, y_train, x_test, y_test, x_blind_test = load_dataset(False)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
print(x_blind_test.shape)

(1194, 9) (1194, 2)
(298, 9) (298, 2)
(529, 9)


In [71]:
x_train_scaled, y_train_scaled, x_test_scaled, y_test_scaled, x_blind_test_scaled = load_dataset(True)
print(x_train_scaled.shape, y_train_scaled.shape)
print(x_test_scaled.shape, y_test_scaled.shape)
print(x_blind_test_scaled.shape)

(1194, 9) (1194, 2)
(298, 9) (298, 2)
(529, 9)


# Loading the models

In [72]:
mlp = keras.models.load_model("./../models/keras_model.hdf5", custom_objects={"mee": mee})
svm = load('./../models/svm.z')
knn = load('./../models/knr_mlcup.z')

# Getting the average prediction on training and internal test set

In [73]:
mlp_pred_test = get_mlp_pred(mlp, x_test_scaled)
svm_pred_test = svm.predict(x_test_scaled)
knn_pred_test = knn.predict(x_test_scaled)

mlp_pred_train = get_keras_pred(keras_model, x_train_scaled)
svm_pred_train = svm.predict(x_train_scaled)
knn_pred_train = knn.predict(x_train_scaled)

avg_pred_train = np.mean([mlp_pred_train, svm_pred_train, knn_pred_train], axis=0)
avg_pred_test = np.mean([mlp_pred_test, svm_pred_test, knn_pred_test], axis=0)



In [74]:
train_mee = get_mee(y_train, avg_pred_train)
test_mee = get_mee(y_test, avg_pred_test)

print(f"Train MEE: {train_mee}")
print(f"Test MEE: {test_mee}")

Train MEE: 1.3173527940228287
Test MEE: 1.426876231849489


# Blind test set prediction

In [75]:
y_mlp_blind = get_mlp_pred(mlp, x_blind_test_scaled)
y_svm_blind = svm.predict(x_blind_test_scaled)
y_knn_blind = knn.predict(x_blind_test_scaled)

avg_blind_pred = np.mean([y_mlp_blind, y_svm_blind, y_knn_blind], axis=0)
print(avg_blind_pred.shape)

(529, 2)


In [76]:
csv_file = ""

for i in range(avg_blind_pred.shape[0]):
    csv_file += f"{i+1},{avg_blind_pred[i, 0]},{avg_blind_pred[i, 1]}\n"
    
csv_file = csv_file[:-1]

with open("blind_result.csv", "w") as f:
    f.write(csv_file)