# FeatureBook Alternative 1

In [1]:
import gc
import numpy as np
import pandas as pd
import xgboost as xgb

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

## MultipleNN Angle Features

In [2]:
def generate_angles(df, df_structures):
    # Map XYZ-coordinates to the dataframe
    def map_atom_info(df, df_merge, atom_idx):
        df = pd.merge(df, df_merge.drop_duplicates(subset=['molecule_name', 'atom_index']), how = 'left',
                      left_on  = ['molecule_name', f'atom_index_{atom_idx}'],
                      right_on = ['molecule_name',  'atom_index'])

        df = df.drop('atom_index', axis=1)

        return df

    for atom_idx in [0, 1]:
        df = map_atom_info(df, df_structures, atom_idx)
        df = df.rename(columns={
            'atom': f'atom_{atom_idx}',
            'x': f'x_{atom_idx}',
            'y': f'y_{atom_idx}',
            'z': f'z_{atom_idx}'})
        
        df_structures['c_x'] = df_structures.groupby('molecule_name')['x'].transform('mean')
        df_structures['c_y'] = df_structures.groupby('molecule_name')['y'].transform('mean')
        df_structures['c_z'] = df_structures.groupby('molecule_name')['z'].transform('mean')
        df_structures['atom_n'] = df_structures.groupby('molecule_name')['atom_index'].transform('max')
    
    # Calculate initial distances
    def calculate_init_distances(df):
        df['dx'] = df['x_1'] - df['x_0']
        df['dy'] = df['y_1'] - df['y_0']
        df['dz'] = df['z_1'] - df['z_0']
        df['distance'] = (df['dx']**2 + df['dy']**2 + df['dz']**2)**(1/2)
        return df

    df = calculate_init_distances(df)
    
    # Extend the distance calculations
    def extended_distance_calculations(df):
        df_temp = df.loc[:, ["molecule_name","atom_index_0","atom_index_1","distance","x_0","y_0","z_0","x_1","y_1","z_1"]].copy()
        df_temp_ = df_temp.copy()
        df_temp_ = df_temp_.rename(columns={
            'atom_index_0': 'atom_index_1',
            'atom_index_1': 'atom_index_0',
            'x_0': 'x_1', 'y_0': 'y_1',
            'z_0': 'z_1', 'x_1': 'x_0',
            'y_1': 'y_0', 'z_1': 'z_0'})
        
        df_temp_all = pd.concat((df_temp, df_temp_), axis=0)
        
        df_temp_all["min_distance"] = df_temp_all.groupby(['molecule_name', 'atom_index_0'])['distance'].transform('min')
        df_temp_all["max_distance"] = df_temp_all.groupby(['molecule_name', 'atom_index_0'])['distance'].transform('max')
        
        df_temp = df_temp_all[df_temp_all["min_distance"] == df_temp_all["distance"]].copy()
        df_temp = df_temp.drop(['x_0', 'y_0', 'z_0', 'min_distance'], axis=1)
        df_temp = df_temp.rename(columns={
            'atom_index_0': 'atom_index',
            'atom_index_1': 'atom_index_closest',
            'distance': 'distance_closest',
            'x_1': 'x_closest',
            'y_1': 'y_closest',
            'z_1': 'z_closest'})
        
        for atom_idx in [0, 1]:
            df = map_atom_info(df, df_temp, atom_idx)
            df = df.rename(columns={
                'atom_index_closest': f'atom_index_closest_{atom_idx}',
                'distance_closest': f'distance_closest_{atom_idx}',
                'x_closest': f'x_closest_{atom_idx}',
                'y_closest': f'y_closest_{atom_idx}',
                'z_closest': f'z_closest_{atom_idx}'})
        
        df_temp = df_temp_all[df_temp_all["max_distance"] == df_temp_all["distance"]].copy()
        df_temp = df_temp.drop(['x_0', 'y_0', 'z_0', 'max_distance'], axis=1)
        df_temp = df_temp.rename(columns={
            'atom_index_0': 'atom_index',
            'atom_index_1': 'atom_index_farthest',
            'distance': 'distance_farthest',
            'x_1': 'x_farthest',
            'y_1': 'y_farthest',
            'z_1': 'z_farthest'})
        
        for atom_idx in [0, 1]:
            df = map_atom_info(df, df_temp, atom_idx)
            df = df.rename(columns={
                'atom_index_farthest': f'atom_index_farthest_{atom_idx}',
                'distance_farthest': f'distance_farthest_{atom_idx}',
                'x_farthest': f'x_farthest_{atom_idx}',
                'y_farthest': f'y_farthest_{atom_idx}',
                'z_farthest': f'z_farthest_{atom_idx}'})
        
        return df
    
    df = extended_distance_calculations(df)
    
    # Add angle features
    def add_angles(df):
        df["distance_center0"]=((df['x_0']-df['c_x'])**2+(df['y_0']-df['c_y'])**2+(df['z_0']-df['c_z'])**2)**(1/2)
        df["distance_center1"]=((df['x_1']-df['c_x'])**2+(df['y_1']-df['c_y'])**2+(df['z_1']-df['c_z'])**2)**(1/2)
        df["distance_c0"]=((df['x_0']-df['x_closest_0'])**2+(df['y_0']-df['y_closest_0'])**2+(df['z_0']-df['z_closest_0'])**2)**(1/2)
        df["distance_c1"]=((df['x_1']-df['x_closest_1'])**2+(df['y_1']-df['y_closest_1'])**2+(df['z_1']-df['z_closest_1'])**2)**(1/2)
        df["distance_f0"]=((df['x_0']-df['x_farthest_0'])**2+(df['y_0']-df['y_farthest_0'])**2+(df['z_0']-df['z_farthest_0'])**2)**(1/2)
        df["distance_f1"]=((df['x_1']-df['x_farthest_1'])**2+(df['y_1']-df['y_farthest_1'])**2+(df['z_1']-df['z_farthest_1'])**2)**(1/2)
        df["vec_center0_x"]=(df['x_0']-df['c_x'])/(df["distance_center0"]+1e-10)
        df["vec_center0_y"]=(df['y_0']-df['c_y'])/(df["distance_center0"]+1e-10)
        df["vec_center0_z"]=(df['z_0']-df['c_z'])/(df["distance_center0"]+1e-10)
        df["vec_center1_x"]=(df['x_1']-df['c_x'])/(df["distance_center1"]+1e-10)
        df["vec_center1_y"]=(df['y_1']-df['c_y'])/(df["distance_center1"]+1e-10)
        df["vec_center1_z"]=(df['z_1']-df['c_z'])/(df["distance_center1"]+1e-10)
        df["vec_c0_x"]=(df['x_0']-df['x_closest_0'])/(df["distance_c0"]+1e-10)
        df["vec_c0_y"]=(df['y_0']-df['y_closest_0'])/(df["distance_c0"]+1e-10)
        df["vec_c0_z"]=(df['z_0']-df['z_closest_0'])/(df["distance_c0"]+1e-10)
        df["vec_c1_x"]=(df['x_1']-df['x_closest_1'])/(df["distance_c1"]+1e-10)
        df["vec_c1_y"]=(df['y_1']-df['y_closest_1'])/(df["distance_c1"]+1e-10)
        df["vec_c1_z"]=(df['z_1']-df['z_closest_1'])/(df["distance_c1"]+1e-10)
        df["vec_f0_x"]=(df['x_0']-df['x_farthest_0'])/(df["distance_f0"]+1e-10)
        df["vec_f0_y"]=(df['y_0']-df['y_farthest_0'])/(df["distance_f0"]+1e-10)
        df["vec_f0_z"]=(df['z_0']-df['z_farthest_0'])/(df["distance_f0"]+1e-10)
        df["vec_f1_x"]=(df['x_1']-df['x_farthest_1'])/(df["distance_f1"]+1e-10)
        df["vec_f1_y"]=(df['y_1']-df['y_farthest_1'])/(df["distance_f1"]+1e-10)
        df["vec_f1_z"]=(df['z_1']-df['z_farthest_1'])/(df["distance_f1"]+1e-10)
        df["vec_x"]=(df['x_1']-df['x_0'])/df["distance"]
        df["vec_y"]=(df['y_1']-df['y_0'])/df["distance"]
        df["vec_z"]=(df['z_1']-df['z_0'])/df["distance"]
        df["cos_c0_c1"]=df["vec_c0_x"]*df["vec_c1_x"]+df["vec_c0_y"]*df["vec_c1_y"]+df["vec_c0_z"]*df["vec_c1_z"]
        df["cos_f0_f1"]=df["vec_f0_x"]*df["vec_f1_x"]+df["vec_f0_y"]*df["vec_f1_y"]+df["vec_f0_z"]*df["vec_f1_z"]
        df["cos_center0_center1"]=df["vec_center0_x"]*df["vec_center1_x"]+df["vec_center0_y"]*df["vec_center1_y"]+df["vec_center0_z"]*df["vec_center1_z"]
        df["cos_c0"]=df["vec_c0_x"]*df["vec_x"]+df["vec_c0_y"]*df["vec_y"]+df["vec_c0_z"]*df["vec_z"]
        df["cos_c1"]=df["vec_c1_x"]*df["vec_x"]+df["vec_c1_y"]*df["vec_y"]+df["vec_c1_z"]*df["vec_z"]
        df["cos_f0"]=df["vec_f0_x"]*df["vec_x"]+df["vec_f0_y"]*df["vec_y"]+df["vec_f0_z"]*df["vec_z"]
        df["cos_f1"]=df["vec_f1_x"]*df["vec_x"]+df["vec_f1_y"]*df["vec_y"]+df["vec_f1_z"]*df["vec_z"]
        df["cos_center0"]=df["vec_center0_x"]*df["vec_x"]+df["vec_center0_y"]*df["vec_y"]+df["vec_center0_z"]*df["vec_z"]
        df["cos_center1"]=df["vec_center1_x"]*df["vec_x"]+df["vec_center1_y"]*df["vec_y"]+df["vec_center1_z"]*df["vec_z"]
        # original
        df=df.drop(['vec_c0_x','vec_c0_y','vec_c0_z','vec_c1_x','vec_c1_y','vec_c1_z',
                    'vec_f0_x','vec_f0_y','vec_f0_z','vec_f1_x','vec_f1_y','vec_f1_z',
                    'vec_center0_x','vec_center0_y','vec_center0_z','vec_center1_x','vec_center1_y','vec_center1_z',
                    'vec_x','vec_y','vec_z'], axis=1)
        # extra 
        df=df.drop(['dy', 'dx', 'dz',
                    'x_0', 'x_1', 'c_x', 'x_closest_0', 'x_closest_1', 'x_farthest_0', 'x_farthest_1',
                    'y_0', 'y_1', 'c_y', 'y_closest_0', 'y_closest_1', 'y_farthest_0', 'y_farthest_1',
                    'z_0', 'z_1', 'c_z', 'z_closest_0', 'z_closest_1', 'z_farthest_0', 'z_farthest_1'], axis=1)
        return df
    
    df = add_angles(df)
    
    return df

## General Methods

In [3]:
coupling_types = ["1JHC", "1JHN", "2JHH", "2JHC", "2JHN", "3JHH", "3JHC", "3JHN"]

def select_wanted(df, wanted_rows):
    df = df[df["type"] == wanted_rows]
    return df

def memory_optimization(dfs):
    for df in dfs:
        del df
    gc.collect()

def merge_multiple(df, df_merge, atom_idx):
    df = pd.merge(df, df_merge, how = 'left',
                  left_on  = ['molecule_name', f'atom_index_{atom_idx}'],
                  right_on = ['molecule_name',  'atom_index'])
    
    df = df.drop('atom_index', axis=1)
    return df

def merge_single(df, df_merge):
    df = pd.merge(df, df_merge, how = 'left',
                  left_on  = ['molecule_name'],
                  right_on = ['molecule_name'])
        
    return df

def merge_custom(df, df_merge):
    df = pd.merge(df, df_merge, how = 'left',
                 left_on  = ['id', 'molecule_name'],
                 right_on = ['id', 'molecule_name'])
    
    return df

def drop_col(df, cols):
    columns = list(df.columns.values)
    
    for col in cols:
        if col in columns:
            df = df.drop(col, axis=1)
            
    return df

## Append Custom Features to DataFrame

In [4]:
def loader(df, train_or_test):
    # Sorted distances
    df_sorted_distances = pd.read_csv('./input/generated/sorted_distances.csv')
    df = merge_multiple(df, df_sorted_distances, 0)
    df = merge_multiple(df, df_sorted_distances, 1)
    memory_optimization([df_sorted_distances])
    
    # Dipole moments
    df_dipole_moments = pd.read_csv(f'./input/generated/{train_or_test}_ob_dipoles_mmff44.csv')
    df_dipole_moments = df_dipole_moments.loc[:, ~df_dipole_moments.columns.str.contains('^Unnamed')]
    df_dipole_moments['size'] = (df_dipole_moments['X']**2 + df_dipole_moments['Y']**2 + df_dipole_moments['Z']**2)**(1/2)
    df = merge_single(df, df_dipole_moments)
    memory_optimization([df_dipole_moments])

    # Mulliken charges
    df_mulliken_charges = pd.read_csv(f'./input/generated/best_ob_mulliken_{train_or_test}.csv')
    df_mulliken_charges = df_mulliken_charges.loc[:, ~df_mulliken_charges.columns.str.contains('^Unnamed')]
    df = merge_multiple(df, df_mulliken_charges, 0)
    df = merge_multiple(df, df_mulliken_charges, 1)
    memory_optimization([df_mulliken_charges])
    
    # Number of atoms and max distances
    df_natoms_maxdist = pd.read_csv(f'./input/generated/natoms_maxdist.csv')
    df = merge_multiple(df, df_natoms_maxdist, 0)
    df = df.drop(['atom'], axis = 1)
    memory_optimization([df_natoms_maxdist])

    # Coupling distances
    df_coupling_distances = pd.read_csv(f'./input/generated/coupling_distances.csv')
    df = merge_custom(df, df_coupling_distances)
    memory_optimization([df_coupling_distances])
    
    df = drop_col(df, ['atom_index_0', 'atom_index_1', 'atom_index_closest_0', 'atom_index_closest_1', 
                       'atom_index_farthest_0', 'atom_index_farthest_1', 'id', 'X', 'Y', 'Z'])
    
    return df

## Convert Data Types

In [5]:
def convert_df(df):
    atom_representation = {
        'H': 1, 'C': 6, 'N': 7, 'O': 8, 'F': 9,
    }

    coupling_representation = {
        '1JHC': 1, '1JHN': 2, '2JHH': 3, '2JHC': 4, '2JHN': 5, '3JHH': 6, '3JHC': 7, '3JHN': 8,
    }

    df = df.replace({
        'type': coupling_representation,
        'atom_x': atom_representation,
        'atom_y': atom_representation,
        'atom_0': atom_representation,
        'atom_1': atom_representation,
    })
    df.type = df.type.astype('int64')
    df.atom_x = df.atom_x.astype('int64')
    df.atom_y = df.atom_y.astype('int64')
    df.atom_0 = df.atom_0.astype('int64')
    df.atom_1 = df.atom_1.astype('int64')
    
    df['molecule_name'] = df['molecule_name'].astype('category')
    df_cats = df.select_dtypes(['category']).columns
    df[df_cats] = df[df_cats].apply(lambda x: x.cat.codes)
    
    return df

## Column Drop Test

In [6]:
def col_dropping(df):
    df = df.drop(['molecule_name'], axis=1)
    df = df.drop(['atom_n'], axis=1)
    
    # Drop distances
    df = drop_col(df, ['d10_x', 'd11_x', 'd12_x', 'd13_x', 'd14_x', 'd15_x', 'd16_x', 'd17_x', 'd18_x', 'd19_x', 'd20_x', 'd21_x', 'd22_x', 'd23_x', 'd24_x', 'd25_x', 'd26_x', 'd27_x', 'd28_x',
                  'd10_y', 'd11_y', 'd12_y', 'd13_y', 'd14_y', 'd15_y', 'd16_y', 'd17_y', 'd18_y', 'd19_y', 'd20_y', 'd21_y', 'd22_y', 'd23_y', 'd24_y', 'd25_y', 'd26_y', 'd27_y', 'd28_y'])
    
    # Drop types
    df = drop_col(df, ['t10_x', 't11_x', 't12_x', 't13_x', 't14_x', 't15_x', 't16_x', 't17_x', 't18_x', 't19_x', 't20_x', 't21_x', 't22_x', 't23_x', 't24_x', 't25_x', 't26_x', 't27_x', 't28_x',
                  't10_y', 't11_y', 't12_y', 't13_y', 't14_y', 't15_y', 't16_y', 't17_y', 't18_y', 't19_y', 't20_y', 't21_y', 't22_y', 't23_y', 't24_y', 't25_y', 't26_y', 't27_y', 't28_y'])
    
    df = drop_col(df, ['type', 'atom_0', 'atom_1', 'eem_x', 'eem_y', 'X', 'Y', 'Z'])
    
    return df

## Features Master Organizer

In [7]:
def feature_master(coupling_type):
    # Load structures
    df_structures = pd.read_csv('./input/structures.csv')
    
    # Generate train for current coupling type
    df_train = pd.read_csv('./input/train.csv')
    df_train = select_wanted(df_train, coupling_type)
    df_train = generate_angles(df_train, df_structures)
    df_train = loader(df_train, 'train')
    df_train = convert_df(df_train)
    df_train = col_dropping(df_train)
    print(df_train.head(25))
    
    df_structures = None
    df_structures = pd.read_csv('./input/structures.csv')
    
    # Generate test for current coupling type
    df_test = pd.read_csv('./input/test.csv')
    df_test = select_wanted(df_test, coupling_type)
    df_test = generate_angles(df_test, df_structures)
    df_test = loader(df_test, 'test')
    df_test = convert_df(df_test)
    df_test = col_dropping(df_test)
    print(df_test.head(25))
    
    # Return results
    return df_train, df_test

## Split Training and Validation

In [8]:
def split(df_train):
    train_X, validation_X = train_test_split(df_train, test_size = 0.1, random_state = 0)

    train_X = train_X.reset_index()
    validation_X = validation_X.reset_index()

    train_y = train_X['scalar_coupling_constant']
    train_y = train_y.replace([np.inf, -np.inf], np.nan)
    train_y = train_y.reset_index()
    train_y = train_y.drop(['index'], axis = 1)
    validation_y = validation_X['scalar_coupling_constant']
    validation_y = validation_y.replace([np.inf, -np.inf], np.nan)
    validation_y = validation_y.reset_index()
    validation_y = validation_y.drop(['index'], axis = 1)

    train_X = train_X.drop('scalar_coupling_constant', axis = 1)
    validation_X = validation_X.drop('scalar_coupling_constant', axis = 1)
    
    train_X = train_X.drop(['index'], axis = 1)
    validation_X = validation_X.drop(['index'], axis = 1)

    print(train_X.head())
    print(train_y.head())
    print(validation_X.head())
    print(validation_y.head())
    
    return train_X, train_y, validation_X, validation_y

## XGB Training

In [None]:
def xgb_train(coupling_type, train_X, train_y, validation_X, validation_y):
    model_name_wrt = ('./featurebook/xgb_30.07/featurebook_%s.hdf5' % coupling_type)

    xgb_model = xgb.XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.5, gamma=0, importance_type='gain',
       learning_rate=0.02, max_delta_step=0, max_depth=9, # was 9
       min_child_weight=5, missing=None, n_estimators=10000, n_jobs=-1,
       nthread=None, objective='reg:squarederror', random_state=101, reg_alpha=0.5,
       reg_lambda=0.2, scale_pos_weight=1, seed=None, silent=False, 
       subsample=1)

    xgb_model.fit(train_X, train_y, eval_set = [(validation_X, validation_y)], eval_metric = 'mae', 
              early_stopping_rounds = 32, verbose = True)   
    xgb_model.save_model(model_name_wrt)
    
    cv_predict = xgb_model.predict(validation_X)
    accuracy = np.mean(np.abs(validation_y.iloc[:, 0]-cv_predict))
    cv_score = np.log(accuracy)
    
    print((validation_y.iloc[:, 0]).shape, cv_predict.shape)
    print(f"cv score for {coupling_type} is {cv_score}\n")
    print(train_X.columns)
    print(xgb_model.feature_importances_)
    
    return xgb_model, model_name_wrt, cv_score

In [None]:
def importance(xgb_model, train_X):
    input_features = train_X.columns.values

    import operator
    feat_imp = xgb_model.feature_importances_
    np.split(feat_imp, len(input_features))
    feat_imp_dict = {}
    for i in range(0, len(input_features)):
        feat_imp_dict[feat_imp[i]]=input_features[i]

    sorted_feats = sorted(feat_imp_dict.items(), key=operator.itemgetter(0))

    for i in range( len(sorted_feats)-1, 0, -1):
        print(sorted_feats[i])

In [None]:
def make_prediction(xgb_model, model_name, coupling_type, df_submission, df_test):
    test_predict = xgb_model.predict(df_test)
    test_predict = list(test_predict)
    
    new_results = df_test[['id']].copy()
    new_results.insert(1, "scalar_coupling_constant", test_predict, True)
    print(new_results.head(15))
    
    df_submission.loc[df_submission.id.isin(new_results.id), ['scalar_coupling_constant']] = new_results[['scalar_coupling_constant']].values
    print(df_submission.head(25))
    
    return df_submission

## Master Cell

In [None]:
csv_file = './submissions/submission_31-07-2019.csv'

best_accuracy_scores = {
    "1JHC": -0.01916, "1JHN": -1.0809040175903528, "2JHH": -1.27420, "2JHC": -0.685838,
    "2JHN": -1.12772, "3JHH": -0.11169, "3JHC": 0.079127, "3JHN": -1.103490,
}

for coupling_type in coupling_types:
    # Get training and test dataframes
    df_train, df_test = feature_master(coupling_type)
    
    # Split into train and validation
    train_X, train_y, validation_X, validation_y = split(df_train)
    memory_optimization([df_train])
    
    # Train
    xgb_model, model_name, cv_score = xgb_train(coupling_type, train_X, train_y, validation_X, validation_y)
    
    # Feature importance
    importance(xgb_model, train_X)
    memory_optimization([train_X, train_y, validation_X, validation_y])
    
    # Make prediction
    if cv_score < best_accuracy_scores[coupling_type]:
        # Load submission file
        df_submission = pd.read_csv(csv_file)
        
        # Make submission
        df_submission_temp = make_prediction(xgb_model, model_name, coupling_type, df_submission, df_test)
        df_submission = df_submission_temp
        
        # Save submission file
        print(f'Saving new results to submission file: {csv_file}')
        df_submission.to_csv(csv_file, index=False)
        df_submission = None

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




    scalar_coupling_constant  atom_n  distance  distance_closest_0  \
0                    32.6889       3  1.017190            1.017190   
1                    32.6891       3  1.017187            1.017187   
2                    32.6905       3  1.017208            1.017208   
3                    55.5252       5  1.007511            1.007511   
4                    54.7359       5  1.004933            1.004933   
5                    54.0640       8  1.004771            1.004771   
6                    56.1860       8  1.006952            1.006952   
7                    37.7190       7  1.014969            1.014969   
8                    38.3495       7  1.015277            1.015277   
9                    59.3010       9  1.004448            1.004448   
10                   57.3979       8  1.005982            1.005982   
11                   58.1002       7  1.006813            1.006813   
12                   57.2400       7  1.004746            1.004746   
13                  

    atom_n  distance  distance_closest_0  max_distance_x  distance_closest_1  \
0        7  1.009036            1.009036        1.009036            1.008709   
1        7  1.008709            1.008709        1.008709            1.008709   
2        7  1.009039            1.009039        1.009039            1.008711   
3        7  1.008711            1.008711        1.008711            1.008711   
4        8  1.006898            1.006898        1.006898            1.006898   
5        7  1.007077            1.007077        1.007077            1.005058   
6        7  1.005058            1.005058        1.005058            1.005058   
7       11  1.004926            1.004926        1.004926            1.004926   
8       11  1.006899            1.006899        1.006899            1.004926   
9       11  1.005559            1.005559        1.005559            1.005559   
10       9  1.005768            1.005768        1.005768            1.005248   
11       9  1.005248            1.005248

[0]	validation_0-mae:42.0936
Will train until validation_0-mae hasn't improved in 32 rounds.
[1]	validation_0-mae:37.8777
[2]	validation_0-mae:34.0882
[3]	validation_0-mae:30.6749
[4]	validation_0-mae:27.6041
[5]	validation_0-mae:24.8402
[6]	validation_0-mae:22.3534
[7]	validation_0-mae:20.1164
[8]	validation_0-mae:18.1012
[9]	validation_0-mae:16.289
[10]	validation_0-mae:14.66
[11]	validation_0-mae:13.1934
[12]	validation_0-mae:11.8747
[13]	validation_0-mae:10.6854
[14]	validation_0-mae:9.61572
[15]	validation_0-mae:8.65407
[16]	validation_0-mae:7.78869
[17]	validation_0-mae:7.00977
[18]	validation_0-mae:6.30799
[19]	validation_0-mae:5.67629
[20]	validation_0-mae:5.10708
[21]	validation_0-mae:4.59711
[22]	validation_0-mae:4.13832
[23]	validation_0-mae:3.72548
[24]	validation_0-mae:3.35533
[25]	validation_0-mae:3.02259
[26]	validation_0-mae:2.72363
[27]	validation_0-mae:2.4544
[28]	validation_0-mae:2.21413
[29]	validation_0-mae:2.00023
[30]	validation_0-mae:1.80931
[31]	validation_0-ma

[260]	validation_0-mae:0.372349
[261]	validation_0-mae:0.372324
[262]	validation_0-mae:0.37228
[263]	validation_0-mae:0.372192
[264]	validation_0-mae:0.372015
[265]	validation_0-mae:0.371791
[266]	validation_0-mae:0.371728
[267]	validation_0-mae:0.371653
[268]	validation_0-mae:0.371481
[269]	validation_0-mae:0.371349
[270]	validation_0-mae:0.371195
[271]	validation_0-mae:0.371169
[272]	validation_0-mae:0.371022
[273]	validation_0-mae:0.370926
[274]	validation_0-mae:0.370764
[275]	validation_0-mae:0.370654
[276]	validation_0-mae:0.37057
[277]	validation_0-mae:0.370513
[278]	validation_0-mae:0.370294
[279]	validation_0-mae:0.370157
[280]	validation_0-mae:0.370051
[281]	validation_0-mae:0.369896
[282]	validation_0-mae:0.369641
[283]	validation_0-mae:0.369235
[284]	validation_0-mae:0.369183
[285]	validation_0-mae:0.369111
[286]	validation_0-mae:0.368989
[287]	validation_0-mae:0.368817
[288]	validation_0-mae:0.368681
[289]	validation_0-mae:0.368508
[290]	validation_0-mae:0.368235
[291]	vali

[517]	validation_0-mae:0.351077
[518]	validation_0-mae:0.351013
[519]	validation_0-mae:0.350992
[520]	validation_0-mae:0.350965
[521]	validation_0-mae:0.350947
[522]	validation_0-mae:0.350917
[523]	validation_0-mae:0.35084
[524]	validation_0-mae:0.350814
[525]	validation_0-mae:0.350774
[526]	validation_0-mae:0.350741
[527]	validation_0-mae:0.35071
[528]	validation_0-mae:0.350678
[529]	validation_0-mae:0.350627
[530]	validation_0-mae:0.350563
[531]	validation_0-mae:0.350537
[532]	validation_0-mae:0.350498
[533]	validation_0-mae:0.350431
[534]	validation_0-mae:0.35038
[535]	validation_0-mae:0.350337
[536]	validation_0-mae:0.3503
[537]	validation_0-mae:0.35024
[538]	validation_0-mae:0.350203
[539]	validation_0-mae:0.350164
[540]	validation_0-mae:0.350098
[541]	validation_0-mae:0.350048
[542]	validation_0-mae:0.350018
[543]	validation_0-mae:0.349936
[544]	validation_0-mae:0.349892
[545]	validation_0-mae:0.349858
[546]	validation_0-mae:0.349806
[547]	validation_0-mae:0.349771
[548]	validati

[774]	validation_0-mae:0.344226
[775]	validation_0-mae:0.344221
[776]	validation_0-mae:0.344212
[777]	validation_0-mae:0.344204
[778]	validation_0-mae:0.344171
[779]	validation_0-mae:0.344161
[780]	validation_0-mae:0.344141
[781]	validation_0-mae:0.34413
[782]	validation_0-mae:0.344115
[783]	validation_0-mae:0.344083
[784]	validation_0-mae:0.344075
[785]	validation_0-mae:0.344073
[786]	validation_0-mae:0.344053
[787]	validation_0-mae:0.344042
[788]	validation_0-mae:0.344035
[789]	validation_0-mae:0.344012
[790]	validation_0-mae:0.344001
[791]	validation_0-mae:0.34398
[792]	validation_0-mae:0.343964
[793]	validation_0-mae:0.34395
[794]	validation_0-mae:0.34392
[795]	validation_0-mae:0.343905
[796]	validation_0-mae:0.343912
[797]	validation_0-mae:0.343883
[798]	validation_0-mae:0.343885
[799]	validation_0-mae:0.343863
[800]	validation_0-mae:0.343851
[801]	validation_0-mae:0.343846
[802]	validation_0-mae:0.343835
[803]	validation_0-mae:0.343822
[804]	validation_0-mae:0.343813
[805]	valida

[1030]	validation_0-mae:0.34137
[1031]	validation_0-mae:0.34136
[1032]	validation_0-mae:0.341345
[1033]	validation_0-mae:0.341352
[1034]	validation_0-mae:0.341353
[1035]	validation_0-mae:0.341348
[1036]	validation_0-mae:0.341329
[1037]	validation_0-mae:0.341321
[1038]	validation_0-mae:0.341323
[1039]	validation_0-mae:0.341306
[1040]	validation_0-mae:0.341296
[1041]	validation_0-mae:0.34128
[1042]	validation_0-mae:0.341263
[1043]	validation_0-mae:0.341257
[1044]	validation_0-mae:0.341244
[1045]	validation_0-mae:0.341239
[1046]	validation_0-mae:0.34123
[1047]	validation_0-mae:0.341218
[1048]	validation_0-mae:0.341213
[1049]	validation_0-mae:0.3412
[1050]	validation_0-mae:0.341195
[1051]	validation_0-mae:0.341188
[1052]	validation_0-mae:0.341188
[1053]	validation_0-mae:0.341178
[1054]	validation_0-mae:0.341176
[1055]	validation_0-mae:0.34117
[1056]	validation_0-mae:0.341158
[1057]	validation_0-mae:0.341151
[1058]	validation_0-mae:0.34115
[1059]	validation_0-mae:0.341139
[1060]	validation_

[1280]	validation_0-mae:0.339863
[1281]	validation_0-mae:0.339866
[1282]	validation_0-mae:0.339861
[1283]	validation_0-mae:0.339858
[1284]	validation_0-mae:0.339848
[1285]	validation_0-mae:0.339843
[1286]	validation_0-mae:0.339835
[1287]	validation_0-mae:0.339837
[1288]	validation_0-mae:0.339835
[1289]	validation_0-mae:0.339823
[1290]	validation_0-mae:0.339815
[1291]	validation_0-mae:0.339807
[1292]	validation_0-mae:0.339801
[1293]	validation_0-mae:0.339795
[1294]	validation_0-mae:0.339796
[1295]	validation_0-mae:0.339792
[1296]	validation_0-mae:0.339793
[1297]	validation_0-mae:0.33978
[1298]	validation_0-mae:0.339775
[1299]	validation_0-mae:0.339767
[1300]	validation_0-mae:0.339767
[1301]	validation_0-mae:0.339755
[1302]	validation_0-mae:0.33975
[1303]	validation_0-mae:0.339742
[1304]	validation_0-mae:0.339736
[1305]	validation_0-mae:0.339727
[1306]	validation_0-mae:0.339713
[1307]	validation_0-mae:0.339701
[1308]	validation_0-mae:0.339692
[1309]	validation_0-mae:0.339677
[1310]	valid

    scalar_coupling_constant  atom_n  distance  distance_closest_0  \
0                  -11.25700       4  1.783120            1.783120   
1                  -11.25480       4  1.783147            1.783120   
2                  -11.25430       4  1.783157            1.783120   
3                  -11.25410       4  1.783158            1.783120   
4                  -11.25480       4  1.783148            1.783120   
5                  -11.25430       4  1.783148            1.783147   
6                  -11.18660       3  1.618523            1.618523   
7                  -11.17570       3  1.618710            1.618523   
8                  -11.17580       3  1.618706            1.618523   
9                   -9.94641       2  1.513358            1.513358   
10                 -11.70050       7  1.765219            1.765219   
11                 -11.69790       7  1.765264            1.765219   
12                 -11.69960       7  1.765247            1.765219   
13                 -

    atom_n  distance  distance_closest_0  max_distance_x  distance_closest_1  \
0        8  1.782964            1.782964        1.785017            1.782964   
1        8  1.785017            1.782964        1.785017            1.785001   
2        8  1.785001            1.782964        1.785001            1.785001   
3        8  1.782964            1.782964        1.785018            1.782964   
4        8  1.785018            1.782964        1.785018            1.785001   
5        8  1.785001            1.782964        1.785001            1.785001   
6        8  1.818920            1.818920        1.818920            1.818920   
7        8  1.818967            1.818967        1.818967            1.818967   
8        8  1.818892            1.818892        1.818892            1.818892   
9        7  1.692657            1.692657        1.692657            1.692657   
10       7  1.692645            1.692645        1.692645            1.692645   
11      11  1.767873            1.767873

    index  atom_n  distance  distance_closest_0  max_distance_x  \
0  218064      19  1.761034            1.761034        1.785647   
1   47595      17  1.759979            1.759979        1.777035   
2  145368      16  1.773990            1.773990        1.773990   
3  239145      22  1.777398            1.777398        1.777398   
4   28068      16  1.634377            1.634377        1.634377   

   distance_closest_1  max_distance_y  distance_farthest_0  min_distance_x  \
0            1.761034        1.787341             1.785647        1.761034   
1            1.759979        1.771520             1.777035        1.759979   
2            1.773990        1.773990             1.773990        1.773990   
3            1.777398        1.777398             1.777398        1.777398   
4            1.634377        1.634377             1.634377        1.634377   

   distance_farthest_1  min_distance_y  distance_center0  distance_center1  \
0             1.787341        1.761034          3.

[0]	validation_0-mae:9.8914
Will train until validation_0-mae hasn't improved in 32 rounds.
[1]	validation_0-mae:8.90391
[2]	validation_0-mae:8.01449
[3]	validation_0-mae:7.21494
[4]	validation_0-mae:6.49448
[5]	validation_0-mae:5.84632
[6]	validation_0-mae:5.26302
[7]	validation_0-mae:4.73828
[8]	validation_0-mae:4.2663
[9]	validation_0-mae:3.84185
[10]	validation_0-mae:3.45991
[11]	validation_0-mae:3.11656
[12]	validation_0-mae:2.80797
[13]	validation_0-mae:2.53141
[14]	validation_0-mae:2.28253
[15]	validation_0-mae:2.06004
[16]	validation_0-mae:1.86067
[17]	validation_0-mae:1.68233
[18]	validation_0-mae:1.52337
[19]	validation_0-mae:1.38161
[20]	validation_0-mae:1.25568
[21]	validation_0-mae:1.14338
[22]	validation_0-mae:1.04348
[23]	validation_0-mae:0.95542
[24]	validation_0-mae:0.877431
[25]	validation_0-mae:0.808502
[26]	validation_0-mae:0.747848
[27]	validation_0-mae:0.694773
[28]	validation_0-mae:0.648536
[29]	validation_0-mae:0.608502
[30]	validation_0-mae:0.574212
[31]	valida

[259]	validation_0-mae:0.244588
[260]	validation_0-mae:0.244331
[261]	validation_0-mae:0.244166
[262]	validation_0-mae:0.244035
[263]	validation_0-mae:0.243949
[264]	validation_0-mae:0.243759
[265]	validation_0-mae:0.243664
[266]	validation_0-mae:0.243415
[267]	validation_0-mae:0.243193
[268]	validation_0-mae:0.242939
[269]	validation_0-mae:0.242881
[270]	validation_0-mae:0.242775
[271]	validation_0-mae:0.242719
[272]	validation_0-mae:0.242457
[273]	validation_0-mae:0.242308
[274]	validation_0-mae:0.242001
[275]	validation_0-mae:0.241857
[276]	validation_0-mae:0.241737
[277]	validation_0-mae:0.241446
[278]	validation_0-mae:0.241245
[279]	validation_0-mae:0.241104
[280]	validation_0-mae:0.241079
[281]	validation_0-mae:0.24101
[282]	validation_0-mae:0.240868
[283]	validation_0-mae:0.240742
[284]	validation_0-mae:0.240432
[285]	validation_0-mae:0.240012
[286]	validation_0-mae:0.239856
[287]	validation_0-mae:0.239581
[288]	validation_0-mae:0.239412
[289]	validation_0-mae:0.239248
[290]	val

[516]	validation_0-mae:0.215567
[517]	validation_0-mae:0.21549
[518]	validation_0-mae:0.215459
[519]	validation_0-mae:0.215412
[520]	validation_0-mae:0.215361
[521]	validation_0-mae:0.215288
[522]	validation_0-mae:0.21524
[523]	validation_0-mae:0.215206
[524]	validation_0-mae:0.215159
[525]	validation_0-mae:0.215098
[526]	validation_0-mae:0.215056
[527]	validation_0-mae:0.21496
[528]	validation_0-mae:0.214912
[529]	validation_0-mae:0.214835
[530]	validation_0-mae:0.214785
[531]	validation_0-mae:0.214739
[532]	validation_0-mae:0.214711
[533]	validation_0-mae:0.214675
[534]	validation_0-mae:0.214636
[535]	validation_0-mae:0.214579
[536]	validation_0-mae:0.214566
[537]	validation_0-mae:0.214485
[538]	validation_0-mae:0.214381
[539]	validation_0-mae:0.214358
[540]	validation_0-mae:0.214303
[541]	validation_0-mae:0.214167
[542]	validation_0-mae:0.2141
[543]	validation_0-mae:0.213986
[544]	validation_0-mae:0.213926
[545]	validation_0-mae:0.21386
[546]	validation_0-mae:0.213843
[547]	validati

[773]	validation_0-mae:0.203341
[774]	validation_0-mae:0.203324
[775]	validation_0-mae:0.203304
[776]	validation_0-mae:0.20329
[777]	validation_0-mae:0.203263
[778]	validation_0-mae:0.203235
[779]	validation_0-mae:0.203212
[780]	validation_0-mae:0.203128
[781]	validation_0-mae:0.203085
[782]	validation_0-mae:0.20308
[783]	validation_0-mae:0.203043
[784]	validation_0-mae:0.202994
[785]	validation_0-mae:0.202961
[786]	validation_0-mae:0.202894
[787]	validation_0-mae:0.202852
[788]	validation_0-mae:0.202819
[789]	validation_0-mae:0.202813
[790]	validation_0-mae:0.202809
[791]	validation_0-mae:0.202788
[792]	validation_0-mae:0.202774
[793]	validation_0-mae:0.202721
[794]	validation_0-mae:0.202682
[795]	validation_0-mae:0.202652
[796]	validation_0-mae:0.2026
[797]	validation_0-mae:0.202579
[798]	validation_0-mae:0.202544
[799]	validation_0-mae:0.202516
[800]	validation_0-mae:0.20246
[801]	validation_0-mae:0.202357
[802]	validation_0-mae:0.202305
[803]	validation_0-mae:0.202277
[804]	validat

[1030]	validation_0-mae:0.195743
[1031]	validation_0-mae:0.195738
[1032]	validation_0-mae:0.195729
[1033]	validation_0-mae:0.195721
[1034]	validation_0-mae:0.19571
[1035]	validation_0-mae:0.195684
[1036]	validation_0-mae:0.195671
[1037]	validation_0-mae:0.195635
[1038]	validation_0-mae:0.195619
[1039]	validation_0-mae:0.195615
[1040]	validation_0-mae:0.195605
[1041]	validation_0-mae:0.195577
[1042]	validation_0-mae:0.195535
[1043]	validation_0-mae:0.195519
[1044]	validation_0-mae:0.1955
[1045]	validation_0-mae:0.195484
[1046]	validation_0-mae:0.19547
[1047]	validation_0-mae:0.195424
[1048]	validation_0-mae:0.195397
[1049]	validation_0-mae:0.195376
[1050]	validation_0-mae:0.195364
[1051]	validation_0-mae:0.195355
[1052]	validation_0-mae:0.195339
[1053]	validation_0-mae:0.195321
[1054]	validation_0-mae:0.195308
[1055]	validation_0-mae:0.195296
[1056]	validation_0-mae:0.195258
[1057]	validation_0-mae:0.195208
[1058]	validation_0-mae:0.195199
[1059]	validation_0-mae:0.195187
[1060]	validat

[1279]	validation_0-mae:0.191099
[1280]	validation_0-mae:0.191096
[1281]	validation_0-mae:0.19107
[1282]	validation_0-mae:0.191061
[1283]	validation_0-mae:0.191052
[1284]	validation_0-mae:0.191001
[1285]	validation_0-mae:0.190974
[1286]	validation_0-mae:0.190963
[1287]	validation_0-mae:0.190955
[1288]	validation_0-mae:0.190945
[1289]	validation_0-mae:0.190937
[1290]	validation_0-mae:0.190934
[1291]	validation_0-mae:0.190923
[1292]	validation_0-mae:0.19091
[1293]	validation_0-mae:0.190906
[1294]	validation_0-mae:0.190899
[1295]	validation_0-mae:0.190884
[1296]	validation_0-mae:0.190867
[1297]	validation_0-mae:0.190862
[1298]	validation_0-mae:0.190859
[1299]	validation_0-mae:0.19083
[1300]	validation_0-mae:0.190813
[1301]	validation_0-mae:0.190808
[1302]	validation_0-mae:0.190795
[1303]	validation_0-mae:0.190791
[1304]	validation_0-mae:0.190785
[1305]	validation_0-mae:0.190768
[1306]	validation_0-mae:0.190767
[1307]	validation_0-mae:0.190764
[1308]	validation_0-mae:0.190739
[1309]	valida

[1529]	validation_0-mae:0.187646
[1530]	validation_0-mae:0.187631
[1531]	validation_0-mae:0.187629
[1532]	validation_0-mae:0.187622
[1533]	validation_0-mae:0.187606
[1534]	validation_0-mae:0.187594
[1535]	validation_0-mae:0.187585
[1536]	validation_0-mae:0.18757
[1537]	validation_0-mae:0.187536
[1538]	validation_0-mae:0.187527
[1539]	validation_0-mae:0.187513
[1540]	validation_0-mae:0.18751
[1541]	validation_0-mae:0.187481
[1542]	validation_0-mae:0.187463
[1543]	validation_0-mae:0.18744
[1544]	validation_0-mae:0.187435
[1545]	validation_0-mae:0.187414
[1546]	validation_0-mae:0.187402
[1547]	validation_0-mae:0.187397
[1548]	validation_0-mae:0.187386
[1549]	validation_0-mae:0.187379
[1550]	validation_0-mae:0.187377
[1551]	validation_0-mae:0.187372
[1552]	validation_0-mae:0.187365
[1553]	validation_0-mae:0.187351
[1554]	validation_0-mae:0.187339
[1555]	validation_0-mae:0.187328
[1556]	validation_0-mae:0.187324
[1557]	validation_0-mae:0.187311
[1558]	validation_0-mae:0.1873
[1559]	validati

[1779]	validation_0-mae:0.185075
[1780]	validation_0-mae:0.185069
[1781]	validation_0-mae:0.185065
[1782]	validation_0-mae:0.185061
[1783]	validation_0-mae:0.185045
[1784]	validation_0-mae:0.185039
[1785]	validation_0-mae:0.18503
[1786]	validation_0-mae:0.185026
[1787]	validation_0-mae:0.18502
[1788]	validation_0-mae:0.185011
[1789]	validation_0-mae:0.185004
[1790]	validation_0-mae:0.184999
[1791]	validation_0-mae:0.184985
[1792]	validation_0-mae:0.184983
[1793]	validation_0-mae:0.184978
[1794]	validation_0-mae:0.184974
[1795]	validation_0-mae:0.184971
[1796]	validation_0-mae:0.184966
[1797]	validation_0-mae:0.184958
[1798]	validation_0-mae:0.184953
[1799]	validation_0-mae:0.184944
[1800]	validation_0-mae:0.184939
[1801]	validation_0-mae:0.184927
[1802]	validation_0-mae:0.184911
[1803]	validation_0-mae:0.184905
[1804]	validation_0-mae:0.184894
[1805]	validation_0-mae:0.184875
[1806]	validation_0-mae:0.184862
[1807]	validation_0-mae:0.184864
[1808]	validation_0-mae:0.184856
[1809]	valid

[2029]	validation_0-mae:0.183129
[2030]	validation_0-mae:0.183124
[2031]	validation_0-mae:0.183118
[2032]	validation_0-mae:0.183105
[2033]	validation_0-mae:0.183098
[2034]	validation_0-mae:0.183092
[2035]	validation_0-mae:0.183084
[2036]	validation_0-mae:0.183076
[2037]	validation_0-mae:0.18307
[2038]	validation_0-mae:0.183067
[2039]	validation_0-mae:0.18306
[2040]	validation_0-mae:0.183053
[2041]	validation_0-mae:0.183048
[2042]	validation_0-mae:0.183037
[2043]	validation_0-mae:0.183032
[2044]	validation_0-mae:0.183013
[2045]	validation_0-mae:0.183007
[2046]	validation_0-mae:0.183
[2047]	validation_0-mae:0.182994
[2048]	validation_0-mae:0.182991
[2049]	validation_0-mae:0.182988
[2050]	validation_0-mae:0.182985
[2051]	validation_0-mae:0.182979
[2052]	validation_0-mae:0.182978
[2053]	validation_0-mae:0.182969
[2054]	validation_0-mae:0.182963
[2055]	validation_0-mae:0.182961
[2056]	validation_0-mae:0.182946
[2057]	validation_0-mae:0.182943
[2058]	validation_0-mae:0.182936
[2059]	validati

[2279]	validation_0-mae:0.181471
[2280]	validation_0-mae:0.181468
[2281]	validation_0-mae:0.181468
[2282]	validation_0-mae:0.181464
[2283]	validation_0-mae:0.181457
[2284]	validation_0-mae:0.181449
[2285]	validation_0-mae:0.181443
[2286]	validation_0-mae:0.181438
[2287]	validation_0-mae:0.181434
[2288]	validation_0-mae:0.181423
[2289]	validation_0-mae:0.181422
[2290]	validation_0-mae:0.18142
[2291]	validation_0-mae:0.181415
[2292]	validation_0-mae:0.181408
[2293]	validation_0-mae:0.181407
[2294]	validation_0-mae:0.1814
[2295]	validation_0-mae:0.1814
[2296]	validation_0-mae:0.181395
[2297]	validation_0-mae:0.181391
[2298]	validation_0-mae:0.181386
[2299]	validation_0-mae:0.181374
[2300]	validation_0-mae:0.18136
[2301]	validation_0-mae:0.181347
[2302]	validation_0-mae:0.181342
[2303]	validation_0-mae:0.181338
[2304]	validation_0-mae:0.181335
[2305]	validation_0-mae:0.181334
[2306]	validation_0-mae:0.181331
[2307]	validation_0-mae:0.181322
[2308]	validation_0-mae:0.181318
[2309]	validatio

[2529]	validation_0-mae:0.180302
[2530]	validation_0-mae:0.180301
[2531]	validation_0-mae:0.180297
[2532]	validation_0-mae:0.180293
[2533]	validation_0-mae:0.180292
[2534]	validation_0-mae:0.180293
[2535]	validation_0-mae:0.180292
[2536]	validation_0-mae:0.180286
[2537]	validation_0-mae:0.18028
[2538]	validation_0-mae:0.180278
[2539]	validation_0-mae:0.180276
[2540]	validation_0-mae:0.180274
[2541]	validation_0-mae:0.180271
[2542]	validation_0-mae:0.180267
[2543]	validation_0-mae:0.180259
[2544]	validation_0-mae:0.180258
[2545]	validation_0-mae:0.180253
[2546]	validation_0-mae:0.180245
[2547]	validation_0-mae:0.180241
[2548]	validation_0-mae:0.180239
[2549]	validation_0-mae:0.180237
[2550]	validation_0-mae:0.180236
[2551]	validation_0-mae:0.18023
[2552]	validation_0-mae:0.180227
[2553]	validation_0-mae:0.180224
[2554]	validation_0-mae:0.18022
[2555]	validation_0-mae:0.180215
[2556]	validation_0-mae:0.180216
[2557]	validation_0-mae:0.180212
[2558]	validation_0-mae:0.180205
[2559]	valida

[2779]	validation_0-mae:0.179381
[2780]	validation_0-mae:0.179379
[2781]	validation_0-mae:0.179376
[2782]	validation_0-mae:0.179376
[2783]	validation_0-mae:0.179373
[2784]	validation_0-mae:0.179365
[2785]	validation_0-mae:0.179365
[2786]	validation_0-mae:0.179363
[2787]	validation_0-mae:0.179362
[2788]	validation_0-mae:0.179359
[2789]	validation_0-mae:0.179357
[2790]	validation_0-mae:0.179356
[2791]	validation_0-mae:0.179354
[2792]	validation_0-mae:0.179351
[2793]	validation_0-mae:0.17935
[2794]	validation_0-mae:0.17935
[2795]	validation_0-mae:0.179349
[2796]	validation_0-mae:0.179345
[2797]	validation_0-mae:0.179338
[2798]	validation_0-mae:0.179329
[2799]	validation_0-mae:0.179326
[2800]	validation_0-mae:0.179319
[2801]	validation_0-mae:0.179314
[2802]	validation_0-mae:0.179311
[2803]	validation_0-mae:0.179309
[2804]	validation_0-mae:0.179308
[2805]	validation_0-mae:0.1793
[2806]	validation_0-mae:0.179295
[2807]	validation_0-mae:0.17929
[2808]	validation_0-mae:0.179288
[2809]	validati

[3029]	validation_0-mae:0.178486
[3030]	validation_0-mae:0.178482
[3031]	validation_0-mae:0.178479
[3032]	validation_0-mae:0.178476
[3033]	validation_0-mae:0.178475
[3034]	validation_0-mae:0.178475
[3035]	validation_0-mae:0.178469
[3036]	validation_0-mae:0.178467
[3037]	validation_0-mae:0.178463
[3038]	validation_0-mae:0.178456
[3039]	validation_0-mae:0.178456
[3040]	validation_0-mae:0.178453
[3041]	validation_0-mae:0.178448
[3042]	validation_0-mae:0.178445
[3043]	validation_0-mae:0.178443
[3044]	validation_0-mae:0.178443
[3045]	validation_0-mae:0.178438
[3046]	validation_0-mae:0.178435
[3047]	validation_0-mae:0.178432
[3048]	validation_0-mae:0.178427
[3049]	validation_0-mae:0.178424
[3050]	validation_0-mae:0.178416
[3051]	validation_0-mae:0.178408
[3052]	validation_0-mae:0.178405
[3053]	validation_0-mae:0.178403
[3054]	validation_0-mae:0.178399
[3055]	validation_0-mae:0.178394
[3056]	validation_0-mae:0.17839
[3057]	validation_0-mae:0.178389
[3058]	validation_0-mae:0.178387
[3059]	vali

[3278]	validation_0-mae:0.177738
[3279]	validation_0-mae:0.177735
[3280]	validation_0-mae:0.177728
[3281]	validation_0-mae:0.177724
[3282]	validation_0-mae:0.177722
[3283]	validation_0-mae:0.177721
[3284]	validation_0-mae:0.177719
[3285]	validation_0-mae:0.177714
[3286]	validation_0-mae:0.177713
[3287]	validation_0-mae:0.177711
[3288]	validation_0-mae:0.177708
[3289]	validation_0-mae:0.177707
[3290]	validation_0-mae:0.177703
[3291]	validation_0-mae:0.177698
[3292]	validation_0-mae:0.177692
[3293]	validation_0-mae:0.177688
[3294]	validation_0-mae:0.177686
[3295]	validation_0-mae:0.177685
[3296]	validation_0-mae:0.177685
[3297]	validation_0-mae:0.177676
[3298]	validation_0-mae:0.177671
[3299]	validation_0-mae:0.17767
[3300]	validation_0-mae:0.17767
[3301]	validation_0-mae:0.177666
[3302]	validation_0-mae:0.177663
[3303]	validation_0-mae:0.177659
[3304]	validation_0-mae:0.177656
[3305]	validation_0-mae:0.177651
[3306]	validation_0-mae:0.17765
[3307]	validation_0-mae:0.177648
[3308]	valida

[3528]	validation_0-mae:0.177075
[3529]	validation_0-mae:0.177072
[3530]	validation_0-mae:0.177071
[3531]	validation_0-mae:0.177066
[3532]	validation_0-mae:0.177064
[3533]	validation_0-mae:0.177062
[3534]	validation_0-mae:0.177057
[3535]	validation_0-mae:0.177055
[3536]	validation_0-mae:0.177053
[3537]	validation_0-mae:0.177052
[3538]	validation_0-mae:0.177051
[3539]	validation_0-mae:0.17705
[3540]	validation_0-mae:0.177044
[3541]	validation_0-mae:0.177044
[3542]	validation_0-mae:0.177044
[3543]	validation_0-mae:0.177041
[3544]	validation_0-mae:0.177039
[3545]	validation_0-mae:0.177035
[3546]	validation_0-mae:0.177032
[3547]	validation_0-mae:0.177029
[3548]	validation_0-mae:0.177028
[3549]	validation_0-mae:0.177022
[3550]	validation_0-mae:0.177019
[3551]	validation_0-mae:0.177017
[3552]	validation_0-mae:0.177014
[3553]	validation_0-mae:0.177011
[3554]	validation_0-mae:0.177011
[3555]	validation_0-mae:0.177007
[3556]	validation_0-mae:0.177006
[3557]	validation_0-mae:0.177004
[3558]	vali

[3778]	validation_0-mae:0.176504
[3779]	validation_0-mae:0.176502
[3780]	validation_0-mae:0.176496
[3781]	validation_0-mae:0.176493
[3782]	validation_0-mae:0.17649
[3783]	validation_0-mae:0.176485
[3784]	validation_0-mae:0.176482
[3785]	validation_0-mae:0.176479
[3786]	validation_0-mae:0.176479
[3787]	validation_0-mae:0.176477
[3788]	validation_0-mae:0.176474
[3789]	validation_0-mae:0.176472
[3790]	validation_0-mae:0.176469
[3791]	validation_0-mae:0.176465
[3792]	validation_0-mae:0.176464
[3793]	validation_0-mae:0.17646
[3794]	validation_0-mae:0.176459
[3795]	validation_0-mae:0.176456
[3796]	validation_0-mae:0.176453
[3797]	validation_0-mae:0.17645
[3798]	validation_0-mae:0.176448
[3799]	validation_0-mae:0.176448
[3800]	validation_0-mae:0.176445
[3801]	validation_0-mae:0.176444
[3802]	validation_0-mae:0.176444
[3803]	validation_0-mae:0.176442
[3804]	validation_0-mae:0.176438
[3805]	validation_0-mae:0.176436
[3806]	validation_0-mae:0.176435
[3807]	validation_0-mae:0.176434
[3808]	valida

In [None]:
df_submission = pd.read_csv('./submissions/submission_31-07-2019.csv')

In [None]:
#'''
better_coupling_types = ["1JHN"] # , "2JHH"]

for coupling_type in better_coupling_types:
    print(coupling_type)
    model_name = './featurebook/xgb_30.07/featurebook_%s.hdf5' % coupling_type
    
    xgb_model = xgb.XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, importance_type='gain',
       learning_rate=0.1, max_delta_step=0, max_depth=9, # was 9
       min_child_weight=1, missing=None, n_estimators=10000, n_jobs=-1,
       nthread=None, objective='reg:squarederror', random_state=101, reg_alpha=2,
       reg_lambda=0.2, scale_pos_weight=1, seed=None, silent=False, 
       subsample=1)
    
    xgb_model.load_model(model_name)
    
    df_train, df_test = feature_master(coupling_type)
    train_X, train_y, validation_X, validation_y = split(df_train)
    
    # Memory optimization
    memory_optimization([df_train, validation_X, validation_y])
    
    df_submission_temp = make_prediction(xgb_model, model_name, coupling_type, df_submission, df_test)
    df_submission = df_submission_temp

df_submission.to_csv(f"./submissions/submission_31-07-2019.csv", index=False)
#'''