# My package

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

## Preprocessing functions

In [None]:
def preprocessing(data):
    '''Dynamic_Payment_Segment'''
    new_dynamic = {'0) NonPayer': 0, '1) ExPayer': 1, '2) Minnow': 2, '3) Dolphin': 3, '4) Whale': 4}
    data['dynamic_payment_segment'] = data['dynamic_payment_segment'].map(new_dynamic)

    '''Global_Competition_Level'''
    data['global_competition_level'].fillna(0, inplace = True)

    '''Season'''
    data.drop('season', axis = 1, inplace=True)
    return data

In [None]:
def basic_preprocessing(data):
    data = preprocessing(data)
    '''registration country'''
    data = data.drop('registration_country', axis=1)

    '''registration platform'''
    data = data.drop('registration_platform_specific', axis=1)
    return data

In [None]:
def heavy_preprocessing(data):
    data = preprocessing(data)
    '''registration country'''
    top_20_countries = [x for x in data.registration_country.value_counts().head(20).index]
    for label in top_20_countries:
        data[label] = np.where(data['registration_country'] == label, 1, 0)
    data = data.drop('registration_country', axis=1)

    '''registration platform'''
    platforms = list(data['registration_platform_specific'].unique())[:10]
    for label in platforms:
        data[label] = np.where(data['registration_platform_specific'] == label, 1, 0)
    data = data.drop('registration_platform_specific', axis=1)
    return data

## Feature Selection

In [None]:
def Feature_Selection(data):
    data = data.drop('avg_stars_top_14_players', axis = 1)
    data = data.drop('cohort_season', axis = 1)
    return data
def Radical_Feature_Selection(data):
    data = Feature_Selection(data)
    data = data.drop('tokens_stash', axis = 1)
    data = data.drop('rests_stash', axis = 1)
    return data
def Super_Radical_Feature_Selection(data):
    data = Feature_Selection(data)
    data = data.drop('dynamic_payment_segment', axis = 1)
    data = data.drop('league_match_watched_count_last_28_days', axis = 1)
    return data

## Feature ingineering

In [None]:
def Averaging_by_leagues(data):
    list_of_features = data.columns.tolist()
    list_of_features.remove('league_rank')
    list_of_features.remove('league_id')
    for feature in list_of_features:
        updated_feature = data.groupby('league_id')[feature].transform('mean')
        data['averaged_' + feature] = data[feature] / updated_feature
        data['averaged_' + feature].fillna(0, inplace = True)
        data = data.drop(feature, axis = 1)
    return data

## Train test val split

In [None]:
def train_val_test_split_adapted_shuffled(data):
    list_of_choices = np.random.choice([0,1,2], size = int(data.shape[0] / 14), p = [0.7, 0.15, 0.15])
    permutation = np.arange(int(data.shape[0] / 14))
    np.random.shuffle(permutation)


    index_league_rank = data.columns.get_loc('league_rank')
    X = data.iloc[:,:].values
    train_set, val_set, test_set = [], [], []

    for p in permutation:
        if list_of_choices[p] == 0:
                train_set.append(X[14*p:(p+1)*14, :])
        elif list_of_choices[p] == 1:
                val_set.append(X[14*p:(p+1)*14, :])
        else:
                test_set.append(X[14*p:(p+1)*14, :])

    train_set, val_set, test_set = np.concatenate(train_set), np.concatenate(val_set), np.concatenate(test_set)

    return (np.concatenate((train_set[:, :index_league_rank], train_set[:, index_league_rank+1:]), axis=1),
            train_set[:, index_league_rank],
            np.concatenate((val_set[:, :index_league_rank], val_set[:, index_league_rank+1:]), axis=1),
            val_set[:, index_league_rank],
            np.concatenate((test_set[:, :index_league_rank], test_set[:, index_league_rank+1:]), axis=1),
            test_set[:, index_league_rank]
            )
#X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split_adapted(dataset)

## Postprocessing functions

In [None]:
import copy
def post_processing(y_pred):
    for i in range(len(y_pred)):
        if y_pred[i] < 1: y_pred[i] = 1
        elif y_pred[i] > 14: y_pred[i] = 14
        else: y_pred[i] = np.round(y_pred[i])

In [None]:
def post_processing_1(y):
    y_pred = copy.deepcopy(y)
    for i in range(len(y_pred)):
        if y_pred[i] < 1: y_pred[i] = 1
        elif y_pred[i] > 14: y_pred[i] = 14
        else: y_pred[i] = np.round(y_pred[i])
    return y_pred

In [None]:
def one_post_process(y_p):
    y_p_copy = y_p[:]
    temp = np.argsort(np.array(y_p))
    for i in range(len(y_p)):
        y_p_copy[temp[i]] = i + 1
    return y_p_copy

def post_sorting(y_pred, length):
    y_prediction = copy.deepcopy(y_pred)
    n = int(len(y_prediction) / length)

    for i in range(n):
        y_prediction[i*length:(i+1)*length] = one_post_process(y_prediction[i*length:(i+1)*length])

    return y_prediction