In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import gc
import os
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import itertools
import pickle, gzip
import glob
from sklearn.preprocessing import StandardScaler
from tsfresh.feature_extraction import extract_features
np.warnings.filterwarnings('ignore')
import dask.dataframe as dd
import missingno as msno
from pandasql import sqldf
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import KFold
import matplotlib.gridspec as gridspec
from sklearn import preprocessing
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

In [2]:
#Always seed the randomness of this universe
np.random.seed(51)

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
%%time
train_metadata_kaggle = dd.read_csv('mydata_train_metadata.csv')
test_metadata_kaggle = dd.read_csv('mydata_test_metadata.csv')
train_metadata_kaggle = train_metadata_kaggle.compute()
test_metadata_kaggle = test_metadata_kaggle.compute()
print(train_metadata_kaggle.shape,test_metadata_kaggle.shape)

(7848, 139) (3492890, 138)
CPU times: user 3min 28s, sys: 13.5 s, total: 3min 41s
Wall time: 41 s


In [5]:
%%time
train_metadata = dd.read_csv('train_metadata_final.csv')
test_metadata = dd.read_csv('test_metadata_final.csv')
train_metadata = train_metadata.compute()
test_metadata = test_metadata.compute()
print(train_metadata.shape,test_metadata.shape)

(7848, 233) (3492890, 232)
CPU times: user 3min 51s, sys: 13 s, total: 4min 4s
Wall time: 46 s


In [6]:
%%time
test_metadata_kaggle = test_metadata_kaggle.reset_index(drop=True)
test_metadata = test_metadata.reset_index(drop=True)

CPU times: user 6.93 s, sys: 9.64 s, total: 16.6 s
Wall time: 16.6 s


In [7]:
print(train_metadata_kaggle['object_id'].equals(train_metadata['object_id']))

True


In [8]:
print(test_metadata_kaggle['object_id'].equals(test_metadata['object_id']))

True


In [9]:
temp_columns = ['object_id','A0_max_flux','A0_min_flux','A0_mean_flux','A0_median_flux','A0_std_flux',
'A1_max_flux','A1_min_flux','A1_mean_flux','A1_median_flux','A1_std_flux',
'A2_max_flux','A2_min_flux','A2_mean_flux','A2_median_flux','A2_std_flux',
'A3_max_flux','A3_min_flux','A3_mean_flux','A3_median_flux','A3_std_flux',
'A4_max_flux','A4_min_flux','A4_mean_flux','A4_median_flux','A4_std_flux',
'A5_max_flux','A5_min_flux','A5_mean_flux','A5_median_flux','A5_std_flux']

In [10]:
train_metadata.head()

Unnamed: 0,object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target,NG_min_flux_err,NG_max_flux_err,NG_std_flux_err,NG_sum_flux_err,NG_mean_flux_err,NG_median_flux_err,NG_min_flux,NG_max_flux,NG_std_flux,NG_sum_flux,NG_mean_flux,NG_median_flux,NG_count_detected,NG_std_detected,NG_sum_detected,NG_mean_detected,NG_median_detected,A0_min_flux_err,A0_max_flux_err,A0_std_flux_err,A0_sum_flux_err,A0_mean_flux_err,A0_median_flux_err,A0_min_flux,A0_max_flux,A0_std_flux,A0_sum_flux,A0_mean_flux,A0_median_flux,A0_count_detected,A0_std_detected,A0_sum_detected,A0_mean_detected,A0_median_detected,A1_min_flux_err,A1_max_flux_err,A1_std_flux_err,A1_sum_flux_err,A1_mean_flux_err,A1_median_flux_err,A1_min_flux,A1_max_flux,A1_std_flux,A1_sum_flux,A1_mean_flux,A1_median_flux,A1_count_detected,A1_std_detected,A1_sum_detected,A1_mean_detected,A1_median_detected,A2_min_flux_err,A2_max_flux_err,A2_std_flux_err,A2_sum_flux_err,A2_mean_flux_err,A2_median_flux_err,A2_min_flux,A2_max_flux,A2_std_flux,A2_sum_flux,A2_mean_flux,A2_median_flux,A2_count_detected,A2_std_detected,A2_sum_detected,A2_mean_detected,A2_median_detected,A3_min_flux_err,A3_max_flux_err,A3_std_flux_err,A3_sum_flux_err,A3_mean_flux_err,A3_median_flux_err,A3_min_flux,A3_max_flux,A3_std_flux,A3_sum_flux,A3_mean_flux,A3_median_flux,A3_count_detected,A3_std_detected,A3_sum_detected,A3_mean_detected,A3_median_detected,A4_min_flux_err,A4_max_flux_err,A4_std_flux_err,A4_sum_flux_err,A4_mean_flux_err,A4_median_flux_err,A4_min_flux,A4_max_flux,A4_std_flux,A4_sum_flux,A4_mean_flux,A4_median_flux,A4_count_detected,A4_std_detected,A4_sum_detected,A4_mean_detected,A4_median_detected,A5_min_flux_err,A5_max_flux_err,A5_std_flux_err,A5_sum_flux_err,A5_mean_flux_err,A5_median_flux_err,A5_min_flux,A5_max_flux,A5_std_flux,A5_sum_flux,A5_mean_flux,A5_median_flux,A5_count_detected,A5_std_detected,A5_sum_detected,A5_mean_detected,A5_median_detected,p0_region_minus_4,p0_region_minus_3,p0_region_minus_2,p0_region_minus_1,p0_region_plus_1,p0_region_plus_2,p0_region_plus_3,p0_region_plus_4,p1_region_minus_4,p1_region_minus_3,p1_region_minus_2,p1_region_minus_1,p1_region_plus_1,p1_region_plus_2,p1_region_plus_3,p1_region_plus_4,p2_region_minus_4,p2_region_minus_3,p2_region_minus_2,p2_region_minus_1,p2_region_plus_1,p2_region_plus_2,p2_region_plus_3,p2_region_plus_4,p3_region_minus_4,p3_region_minus_3,p3_region_minus_2,p3_region_minus_1,p3_region_plus_1,p3_region_plus_2,p3_region_plus_3,p3_region_plus_4,p4_region_minus_4,p4_region_minus_3,p4_region_minus_2,p4_region_minus_1,p4_region_plus_1,p4_region_plus_2,p4_region_plus_3,p4_region_plus_4,p5_region_minus_4,p5_region_minus_3,p5_region_minus_2,p5_region_minus_1,p5_region_plus_1,p5_region_plus_2,p5_region_plus_3,p5_region_plus_4,number_of_0s,number_of_1s,number_of_2s,number_of_3s,number_of_4s,number_of_5s,percent_p0_region_minus_4,percent_p0_region_minus_3,percent_p0_region_minus_2,percent_p0_region_minus_1,percent_p0_region_plus_1,percent_p0_region_plus_2,percent_p0_region_plus_3,percent_p0_region_plus_4,percent_p1_region_minus_4,percent_p1_region_minus_3,percent_p1_region_minus_2,percent_p1_region_minus_1,percent_p1_region_plus_1,percent_p1_region_plus_2,percent_p1_region_plus_3,percent_p1_region_plus_4,percent_p2_region_minus_4,percent_p2_region_minus_3,percent_p2_region_minus_2,percent_p2_region_minus_1,percent_p2_region_plus_1,percent_p2_region_plus_2,percent_p2_region_plus_3,percent_p2_region_plus_4,percent_p3_region_minus_4,percent_p3_region_minus_3,percent_p3_region_minus_2,percent_p3_region_minus_1,percent_p3_region_plus_1,percent_p3_region_plus_2,percent_p3_region_plus_3,percent_p3_region_plus_4,percent_p4_region_minus_4,percent_p4_region_minus_3,percent_p4_region_minus_2,percent_p4_region_minus_1,percent_p4_region_plus_1,percent_p4_region_plus_2,percent_p4_region_plus_3,percent_p4_region_plus_4,percent_p5_region_minus_4,percent_p5_region_minus_3,percent_p5_region_minus_2,percent_p5_region_minus_1,percent_p5_region_plus_1,percent_p5_region_plus_2,percent_p5_region_plus_3,percent_p5_region_plus_4
0,615,349.0,-61.94,320.8,-51.75,1,0.0,0.0,0.0,,0.017,92,2.13,12.845472,1.744747,1577.9254,4.482743,3.836,-1100.4401,660.62634,394.10986,-43330.145,-123.097,-89.5,352,0.2263,333.0,0.946,1.0,2.8442,4.737393,0.493621,240.87724,3.823448,3.86638,-116.91322,125.18281,83.94473,-205.03693,-3.254554,-10.015225,63,0.3528,54,0.857,1.0,3.035,6.953,1.168,291.6828,5.027,4.98,-1100.0,660.62634,601.7873,-22370.596,-385.8,-488.0,58,0.1841,56,0.9653,1.0,2.13,4.332,0.5796,196.0,3.38,3.389,-682.0,611.98456,455.12134,-7780.501,-134.14656,-265.8,58,0.1313,57,0.983,1.0,2.5,4.01,0.3474,193.72672,3.34,3.389,-530.5,445.73706,335.42505,-7024.003,-121.1035,-162.1,58,0.1313,57,0.983,1.0,2.9,11.4,1.089,220.32811,3.799,3.625,-422.1845,381.95374,291.80344,-3245.3665,-55.954594,-103.54137,58,0.1313,57,0.983,1.0,6.074,12.84,1.318,435.25317,7.637,7.31,-422.8151,378.18814,294.7795,-2704.6414,-47.44985,-85.52431,57,0.2854,52,0.912,1.0,,,16.0,17.0,13.0,17.0,,,,,11.0,22.0,13.0,12.0,,,,,12.0,21.0,10.0,15.0,,,,,14.0,17.0,13.0,14.0,,,,,14.0,18.0,12.0,14.0,,,,,17.0,13.0,12.0,15.0,,,63.0,58.0,58.0,58.0,58.0,57.0,,,0.254,0.2698,0.2063,0.2698,,,,,0.1897,0.3794,0.2241,0.2069,,,,,0.2069,0.362,0.1724,0.2585,,,,,0.2413,0.2932,0.2241,0.2413,,,,,0.2413,0.3103,0.2069,0.2413,,,,,0.2983,0.228,0.2106,0.2632,,
1,713,53.1,-27.78,223.5,-54.47,1,1.818,1.627,0.2551,45.4,0.007,88,0.6396,9.115748,1.509888,825.867,2.35962,1.998,-14.735178,14.770886,6.471144,-498.17276,-1.423351,-0.873,350,0.3774,60.0,0.1714,0.0,1.470152,3.348282,0.411563,163.19803,2.3314,2.273303,-14.735178,14.509829,7.113509,-190.42786,-2.720398,-3.096805,70,0.3525,10,0.1428,0.0,0.707,3.545,0.707,79.35102,1.417,1.115,-11.72,9.129021,5.712334,-57.109047,-1.02,-0.5615,56,0.4468,15,0.2678,0.0,0.6396,2.531,0.4397,66.8,1.193,1.0625,-10.07,10.529041,5.770738,-44.477325,-0.794238,-0.118,56,0.4468,15,0.2678,0.0,0.9683,2.97,0.427,91.848694,1.641,1.63,-12.4,11.330316,6.450413,-55.270115,-0.986966,-0.0739,56,0.4468,15,0.2678,0.0,1.43,3.555,0.5015,127.05581,2.27,2.217,-12.286801,9.827934,6.406989,-50.414646,-0.900261,-0.792176,56,0.2878,5,0.0893,0.0,3.523,9.12,1.1875,297.60904,5.312,5.082,-14.211164,14.770886,7.094073,-100.47377,-1.794175,-2.463012,56,0.0,0,0.0,0.0,,,13.0,22.0,23.0,11.0,1.0,,,,13.0,14.0,19.0,10.0,,,,,11.0,14.0,21.0,10.0,,,,,12.0,11.0,23.0,10.0,,,,,10.0,18.0,18.0,10.0,,,,,12.0,18.0,16.0,9.0,1.0,,70.0,56.0,56.0,56.0,56.0,56.0,,,0.1857,0.3142,0.3286,0.1571,0.01428,,,,0.2322,0.25,0.3394,0.1786,,,,,0.1964,0.25,0.375,0.1786,,,,,0.2142,0.1964,0.4106,0.1786,,,,,0.1786,0.3215,0.3215,0.1786,,,,,0.2142,0.3215,0.2856,0.1608,0.01785,
2,730,33.56,-6.58,170.5,-61.56,1,0.232,0.2262,0.0157,40.25,0.021,42,0.6953,11.281384,1.721134,815.45026,2.471062,1.991,-19.159811,47.31006,8.022239,748.25323,2.267434,0.4092,330,0.2551,23.0,0.0697,0.0,1.132809,3.110694,0.42808,148.17484,2.057984,2.020452,-3.45996,5.942166,1.828872,-3.46179,-0.04808,0.024093,72,0.0,0,0.0,0.0,0.6987,3.564,0.7837,75.164604,1.445,1.134,-3.393,5.693109,1.807229,7.334944,0.1411,0.1714,52,0.0,0,0.0,0.0,0.6953,2.484,0.4487,65.25,1.255,1.115,-2.85,20.99471,5.559483,124.84525,2.40087,0.4917,52,0.3447,7,0.1346,0.0,1.143,3.25,0.4758,92.66059,1.782,1.654,-5.438,33.5721,8.191987,168.28052,3.236164,0.6606,52,0.3447,7,0.1346,0.0,1.614,3.994,0.5444,127.618454,2.502,2.363,-5.83631,41.15998,10.710344,219.74513,4.308728,1.004354,51,0.3003,5,0.098,0.0,4.176,11.28,1.31,306.58023,6.01,5.71,-19.159811,47.31006,13.332758,231.50917,4.539396,2.542647,51,0.2715,4,0.0784,0.0,,,14.0,22.0,27.0,8.0,,1.0,,,6.0,19.0,19.0,7.0,,1.0,,,,40.0,6.0,2.0,1.0,3.0,,,1.0,38.0,9.0,1.0,,3.0,,,,39.0,7.0,1.0,1.0,3.0,,,4.0,28.0,14.0,1.0,2.0,2.0,72.0,52.0,52.0,52.0,51.0,51.0,,,0.1945,0.3057,0.375,0.1111,,0.013885,,,0.11536,0.3655,0.3655,0.1346,,0.01923,,,,0.769,0.11536,0.03845,0.01923,0.05768,,,0.01923,0.731,0.1731,0.01923,,0.05768,,,,0.7646,0.1372,0.0196,0.0196,0.05884,,,0.0784,0.549,0.2744,0.0196,0.0392,0.0392
3,745,0.1898,-45.6,328.2,-69.0,1,0.3037,0.2812,1.152,40.78,0.007,90,0.5674,55.892746,3.537324,897.007,2.555576,1.82,-15.494463,220.79521,27.558208,3127.1313,8.909205,1.036,351,0.3794,61.0,0.1738,0.0,0.957792,3.093587,0.520573,140.74007,1.954723,1.877306,-3.874349,18.014029,4.374445,129.42166,1.797523,1.056714,72,0.1655,2,0.02777,0.0,0.5674,3.586,0.7637,73.72757,1.316,1.024,-3.62,192.2443,25.964659,320.17404,5.72,0.888,56,0.3337,7,0.125,0.0,0.6045,2.334,0.4482,64.7,1.155,1.027,-2.16,220.79521,31.957998,543.84576,9.711532,0.4243,56,0.4558,16,0.2856,0.0,1.028,2.871,0.4255,93.37897,1.668,1.615,-4.945,203.2507,34.967697,807.1238,14.412925,1.361,56,0.4558,16,0.2856,0.0,1.588,24.75,3.03,156.04224,2.787,2.316,-15.494463,183.63312,33.069054,735.52844,13.134436,1.27015,56,0.426,13,0.2322,0.0,3.738,55.9,6.926,368.409,6.7,5.4,-10.249387,141.51329,26.06013,591.0376,10.746138,2.749555,55,0.3364,7,0.1273,0.0,,,6.0,38.0,21.0,2.0,3.0,2.0,,,,49.0,5.0,1.0,,1.0,,,,43.0,11.0,,1.0,1.0,,,,43.0,9.0,1.0,2.0,1.0,,,,43.0,8.0,1.0,3.0,1.0,,,,40.0,10.0,1.0,3.0,1.0,72.0,56.0,56.0,56.0,56.0,55.0,,,0.0833,0.528,0.2917,0.02777,0.04166,0.02777,,,,0.875,0.0893,0.01785,,0.01785,,,,0.768,0.1964,,0.01785,0.01785,,,,0.768,0.1608,0.01785,0.0357,0.01785,,,,0.768,0.1428,0.01785,0.05356,0.01785,,,,0.727,0.1818,0.01819,0.05453,0.01819
4,1124,352.8,-63.8,317.0,-51.06,1,0.1934,0.2415,0.0176,40.4,0.024,90,0.6953,11.38369,1.933838,969.0573,2.753004,2.215,-16.543753,143.60019,20.051722,2515.287,7.145702,1.142,352,0.3792,61.0,0.1733,0.0,1.208098,3.658313,0.566168,141.82178,2.251139,2.153805,-6.804703,5.330927,2.360085,41.63972,0.660948,0.581027,63,0.0,0,0.0,0.0,0.6953,3.73,0.88,98.694405,1.701,1.306,-2.623,37.170177,8.107525,268.80893,4.633,1.154,58,0.4207,13,0.2241,0.0,0.7373,2.63,0.509,82.6,1.424,1.259,-2.084,106.67169,21.319853,594.15015,10.243968,0.889,58,0.4668,18,0.3103,0.0,1.192,2.857,0.4622,111.78342,1.928,1.853,-2.8,139.8184,26.270649,643.0202,11.086555,1.014,58,0.451,16,0.276,0.0,1.848,10.42,1.133,162.31221,2.799,2.6,-16.543753,143.60019,26.865913,574.5539,9.906102,1.745012,58,0.381,10,0.1724,0.0,4.59,11.38,1.439,371.84573,6.523,6.03,-10.86054,109.157585,21.434628,393.11426,6.896741,1.973272,57,0.2578,4,0.0702,0.0,1.0,1.0,7.0,23.0,22.0,9.0,,,,,,40.0,12.0,3.0,1.0,2.0,,,,41.0,11.0,3.0,1.0,2.0,,,,44.0,9.0,2.0,1.0,2.0,,,,46.0,9.0,,1.0,2.0,,,,44.0,10.0,,1.0,2.0,63.0,58.0,58.0,58.0,58.0,57.0,0.01587,0.01587,0.1111,0.365,0.349,0.1428,,,,,,0.6895,0.2069,0.05173,0.01724,0.0345,,,,0.707,0.1897,0.05173,0.01724,0.0345,,,,0.759,0.1552,0.0345,0.01724,0.0345,,,,0.793,0.1552,,0.01724,0.0345,,,,0.772,0.1754,,0.01755,0.0351


In [11]:
train_metadata.drop([x for x in train_metadata.columns if x not in temp_columns], axis=1,inplace=True)
gc.collect()

511

In [12]:
test_metadata.drop([x for x in test_metadata.columns if x not in temp_columns], axis=1,inplace=True)
gc.collect()

7

In [13]:
print(train_metadata.shape,test_metadata.shape)

(7848, 31) (3492890, 31)


In [14]:
######################################TRAIN####################################################3
#A0
train_metadata['A0_max_min_diff_flux'] = train_metadata['A0_max_flux'] - train_metadata['A0_min_flux']
train_metadata['A0_max_mean_diff_flux'] = train_metadata['A0_max_flux'] - train_metadata['A0_mean_flux']
train_metadata['A0_max_median_diff_flux'] = train_metadata['A0_max_flux'] - train_metadata['A0_median_flux']
train_metadata['A0_median_mean_diff_flux'] = train_metadata['A0_median_flux'] - train_metadata['A0_mean_flux']
train_metadata['A0_median_min_diff_flux'] = train_metadata['A0_median_flux'] - train_metadata['A0_min_flux']
train_metadata['A0_mean_min_diff_flux'] = train_metadata['A0_mean_flux'] - train_metadata['A0_min_flux']
train_metadata['A0_minus_3_sigma'] = train_metadata['A0_mean_flux'] - 3*train_metadata['A0_std_flux']
train_metadata['A0_minus_2_sigma'] = train_metadata['A0_mean_flux'] - 2*train_metadata['A0_std_flux']
train_metadata['A0_minus_1_sigma'] = train_metadata['A0_mean_flux'] - 1*train_metadata['A0_std_flux']
train_metadata['A0_plus_1_sigma'] = train_metadata['A0_mean_flux'] + 1*train_metadata['A0_std_flux']
train_metadata['A0_plus_2_sigma'] = train_metadata['A0_mean_flux'] + 2*train_metadata['A0_std_flux']
train_metadata['A0_plus_3_sigma'] = train_metadata['A0_mean_flux'] + 3*train_metadata['A0_std_flux']
#A1
train_metadata['A1_max_min_diff_flux'] = train_metadata['A1_max_flux'] - train_metadata['A1_min_flux']
train_metadata['A1_max_mean_diff_flux'] = train_metadata['A1_max_flux'] - train_metadata['A1_mean_flux']
train_metadata['A1_max_median_diff_flux'] = train_metadata['A1_max_flux'] - train_metadata['A1_median_flux']
train_metadata['A1_median_mean_diff_flux'] = train_metadata['A1_median_flux'] - train_metadata['A1_mean_flux']
train_metadata['A1_median_min_diff_flux'] = train_metadata['A1_median_flux'] - train_metadata['A1_min_flux']
train_metadata['A1_mean_min_diff_flux'] = train_metadata['A1_mean_flux'] - train_metadata['A1_min_flux']
train_metadata['A1_minus_3_sigma'] = train_metadata['A1_mean_flux'] - 3*train_metadata['A1_std_flux']
train_metadata['A1_minus_2_sigma'] = train_metadata['A1_mean_flux'] - 2*train_metadata['A1_std_flux']
train_metadata['A1_minus_1_sigma'] = train_metadata['A1_mean_flux'] - 1*train_metadata['A1_std_flux']
train_metadata['A1_plus_1_sigma'] = train_metadata['A1_mean_flux'] + 1*train_metadata['A1_std_flux']
train_metadata['A1_plus_2_sigma'] = train_metadata['A1_mean_flux'] + 2*train_metadata['A1_std_flux']
train_metadata['A1_plus_3_sigma'] = train_metadata['A1_mean_flux'] + 3*train_metadata['A1_std_flux']
#A2
train_metadata['A2_max_min_diff_flux'] = train_metadata['A2_max_flux'] - train_metadata['A2_min_flux']
train_metadata['A2_max_mean_diff_flux'] = train_metadata['A2_max_flux'] - train_metadata['A2_mean_flux']
train_metadata['A2_max_median_diff_flux'] = train_metadata['A2_max_flux'] - train_metadata['A2_median_flux']
train_metadata['A2_median_mean_diff_flux'] = train_metadata['A2_median_flux'] - train_metadata['A2_mean_flux']
train_metadata['A2_median_min_diff_flux'] = train_metadata['A2_median_flux'] - train_metadata['A2_min_flux']
train_metadata['A2_mean_min_diff_flux'] = train_metadata['A2_mean_flux'] - train_metadata['A2_min_flux']
train_metadata['A2_minus_3_sigma'] = train_metadata['A2_mean_flux'] - 3*train_metadata['A2_std_flux']
train_metadata['A2_minus_2_sigma'] = train_metadata['A2_mean_flux'] - 2*train_metadata['A2_std_flux']
train_metadata['A2_minus_1_sigma'] = train_metadata['A2_mean_flux'] - 1*train_metadata['A2_std_flux']
train_metadata['A2_plus_1_sigma'] = train_metadata['A2_mean_flux'] + 1*train_metadata['A2_std_flux']
train_metadata['A2_plus_2_sigma'] = train_metadata['A2_mean_flux'] + 2*train_metadata['A2_std_flux']
train_metadata['A2_plus_3_sigma'] = train_metadata['A2_mean_flux'] + 3*train_metadata['A2_std_flux']
#A3
train_metadata['A3_max_min_diff_flux'] = train_metadata['A3_max_flux'] - train_metadata['A3_min_flux']
train_metadata['A3_max_mean_diff_flux'] = train_metadata['A3_max_flux'] - train_metadata['A3_mean_flux']
train_metadata['A3_max_median_diff_flux'] = train_metadata['A3_max_flux'] - train_metadata['A3_median_flux']
train_metadata['A3_median_mean_diff_flux'] = train_metadata['A3_median_flux'] - train_metadata['A3_mean_flux']
train_metadata['A3_median_min_diff_flux'] = train_metadata['A3_median_flux'] - train_metadata['A3_min_flux']
train_metadata['A3_mean_min_diff_flux'] = train_metadata['A3_mean_flux'] - train_metadata['A3_min_flux']
train_metadata['A3_minus_3_sigma'] = train_metadata['A3_mean_flux'] - 3*train_metadata['A3_std_flux']
train_metadata['A3_minus_2_sigma'] = train_metadata['A3_mean_flux'] - 2*train_metadata['A3_std_flux']
train_metadata['A3_minus_1_sigma'] = train_metadata['A3_mean_flux'] - 1*train_metadata['A3_std_flux']
train_metadata['A3_plus_1_sigma'] = train_metadata['A3_mean_flux'] + 1*train_metadata['A3_std_flux']
train_metadata['A3_plus_2_sigma'] = train_metadata['A3_mean_flux'] + 2*train_metadata['A3_std_flux']
train_metadata['A3_plus_3_sigma'] = train_metadata['A3_mean_flux'] + 3*train_metadata['A3_std_flux']
#A4
train_metadata['A4_max_min_diff_flux'] = train_metadata['A4_max_flux'] - train_metadata['A4_min_flux']
train_metadata['A4_max_mean_diff_flux'] = train_metadata['A4_max_flux'] - train_metadata['A4_mean_flux']
train_metadata['A4_max_median_diff_flux'] = train_metadata['A4_max_flux'] - train_metadata['A4_median_flux']
train_metadata['A4_median_mean_diff_flux'] = train_metadata['A4_median_flux'] - train_metadata['A4_mean_flux']
train_metadata['A4_median_min_diff_flux'] = train_metadata['A4_median_flux'] - train_metadata['A4_min_flux']
train_metadata['A4_mean_min_diff_flux'] = train_metadata['A4_mean_flux'] - train_metadata['A4_min_flux']
train_metadata['A4_minus_3_sigma'] = train_metadata['A4_mean_flux'] - 3*train_metadata['A4_std_flux']
train_metadata['A4_minus_2_sigma'] = train_metadata['A4_mean_flux'] - 2*train_metadata['A4_std_flux']
train_metadata['A4_minus_1_sigma'] = train_metadata['A4_mean_flux'] - 1*train_metadata['A4_std_flux']
train_metadata['A4_plus_1_sigma'] = train_metadata['A4_mean_flux'] + 1*train_metadata['A4_std_flux']
train_metadata['A4_plus_2_sigma'] = train_metadata['A4_mean_flux'] + 2*train_metadata['A4_std_flux']
train_metadata['A4_plus_3_sigma'] = train_metadata['A4_mean_flux'] + 3*train_metadata['A4_std_flux']
#A5
train_metadata['A5_max_min_diff_flux'] = train_metadata['A5_max_flux'] - train_metadata['A5_min_flux']
train_metadata['A5_max_mean_diff_flux'] = train_metadata['A5_max_flux'] - train_metadata['A5_mean_flux']
train_metadata['A5_max_median_diff_flux'] = train_metadata['A5_max_flux'] - train_metadata['A5_median_flux']
train_metadata['A5_median_mean_diff_flux'] = train_metadata['A5_median_flux'] - train_metadata['A5_mean_flux']
train_metadata['A5_median_min_diff_flux'] = train_metadata['A5_median_flux'] - train_metadata['A5_min_flux']
train_metadata['A5_mean_min_diff_flux'] = train_metadata['A5_mean_flux'] - train_metadata['A5_min_flux']
train_metadata['A5_minus_3_sigma'] = train_metadata['A5_mean_flux'] - 3*train_metadata['A5_std_flux']
train_metadata['A5_minus_2_sigma'] = train_metadata['A5_mean_flux'] - 2*train_metadata['A5_std_flux']
train_metadata['A5_minus_1_sigma'] = train_metadata['A5_mean_flux'] - 1*train_metadata['A5_std_flux']
train_metadata['A5_plus_1_sigma'] = train_metadata['A5_mean_flux'] + 1*train_metadata['A5_std_flux']
train_metadata['A5_plus_2_sigma'] = train_metadata['A5_mean_flux'] + 2*train_metadata['A5_std_flux']
train_metadata['A5_plus_3_sigma'] = train_metadata['A5_mean_flux'] + 3*train_metadata['A5_std_flux']
#######################################TEST#########################################################
#A0
test_metadata['A0_max_min_diff_flux'] = test_metadata['A0_max_flux'] - test_metadata['A0_min_flux']
test_metadata['A0_max_mean_diff_flux'] = test_metadata['A0_max_flux'] - test_metadata['A0_mean_flux']
test_metadata['A0_max_median_diff_flux'] = test_metadata['A0_max_flux'] - test_metadata['A0_median_flux']
test_metadata['A0_median_mean_diff_flux'] = test_metadata['A0_median_flux'] - test_metadata['A0_mean_flux']
test_metadata['A0_median_min_diff_flux'] = test_metadata['A0_median_flux'] - test_metadata['A0_min_flux']
test_metadata['A0_mean_min_diff_flux'] = test_metadata['A0_mean_flux'] - test_metadata['A0_min_flux']
test_metadata['A0_minus_3_sigma'] = test_metadata['A0_mean_flux'] - 3*test_metadata['A0_std_flux']
test_metadata['A0_minus_2_sigma'] = test_metadata['A0_mean_flux'] - 2*test_metadata['A0_std_flux']
test_metadata['A0_minus_1_sigma'] = test_metadata['A0_mean_flux'] - 1*test_metadata['A0_std_flux']
test_metadata['A0_plus_1_sigma'] = test_metadata['A0_mean_flux'] + 1*test_metadata['A0_std_flux']
test_metadata['A0_plus_2_sigma'] = test_metadata['A0_mean_flux'] + 2*test_metadata['A0_std_flux']
test_metadata['A0_plus_3_sigma'] = test_metadata['A0_mean_flux'] + 3*test_metadata['A0_std_flux']
#A1
test_metadata['A1_max_min_diff_flux'] = test_metadata['A1_max_flux'] - test_metadata['A1_min_flux']
test_metadata['A1_max_mean_diff_flux'] = test_metadata['A1_max_flux'] - test_metadata['A1_mean_flux']
test_metadata['A1_max_median_diff_flux'] = test_metadata['A1_max_flux'] - test_metadata['A1_median_flux']
test_metadata['A1_median_mean_diff_flux'] = test_metadata['A1_median_flux'] - test_metadata['A1_mean_flux']
test_metadata['A1_median_min_diff_flux'] = test_metadata['A1_median_flux'] - test_metadata['A1_min_flux']
test_metadata['A1_mean_min_diff_flux'] = test_metadata['A1_mean_flux'] - test_metadata['A1_min_flux']
test_metadata['A1_minus_3_sigma'] = test_metadata['A1_mean_flux'] - 3*test_metadata['A1_std_flux']
test_metadata['A1_minus_2_sigma'] = test_metadata['A1_mean_flux'] - 2*test_metadata['A1_std_flux']
test_metadata['A1_minus_1_sigma'] = test_metadata['A1_mean_flux'] - 1*test_metadata['A1_std_flux']
test_metadata['A1_plus_1_sigma'] = test_metadata['A1_mean_flux'] + 1*test_metadata['A1_std_flux']
test_metadata['A1_plus_2_sigma'] = test_metadata['A1_mean_flux'] + 2*test_metadata['A1_std_flux']
test_metadata['A1_plus_3_sigma'] = test_metadata['A1_mean_flux'] + 3*test_metadata['A1_std_flux']
#A2
test_metadata['A2_max_min_diff_flux'] = test_metadata['A2_max_flux'] - test_metadata['A2_min_flux']
test_metadata['A2_max_mean_diff_flux'] = test_metadata['A2_max_flux'] - test_metadata['A2_mean_flux']
test_metadata['A2_max_median_diff_flux'] = test_metadata['A2_max_flux'] - test_metadata['A2_median_flux']
test_metadata['A2_median_mean_diff_flux'] = test_metadata['A2_median_flux'] - test_metadata['A2_mean_flux']
test_metadata['A2_median_min_diff_flux'] = test_metadata['A2_median_flux'] - test_metadata['A2_min_flux']
test_metadata['A2_mean_min_diff_flux'] = test_metadata['A2_mean_flux'] - test_metadata['A2_min_flux']
test_metadata['A2_minus_3_sigma'] = test_metadata['A2_mean_flux'] - 3*test_metadata['A2_std_flux']
test_metadata['A2_minus_2_sigma'] = test_metadata['A2_mean_flux'] - 2*test_metadata['A2_std_flux']
test_metadata['A2_minus_1_sigma'] = test_metadata['A2_mean_flux'] - 1*test_metadata['A2_std_flux']
test_metadata['A2_plus_1_sigma'] = test_metadata['A2_mean_flux'] + 1*test_metadata['A2_std_flux']
test_metadata['A2_plus_2_sigma'] = test_metadata['A2_mean_flux'] + 2*test_metadata['A2_std_flux']
test_metadata['A2_plus_3_sigma'] = test_metadata['A2_mean_flux'] + 3*test_metadata['A2_std_flux']
#A3
test_metadata['A3_max_min_diff_flux'] = test_metadata['A3_max_flux'] - test_metadata['A3_min_flux']
test_metadata['A3_max_mean_diff_flux'] = test_metadata['A3_max_flux'] - test_metadata['A3_mean_flux']
test_metadata['A3_max_median_diff_flux'] = test_metadata['A3_max_flux'] - test_metadata['A3_median_flux']
test_metadata['A3_median_mean_diff_flux'] = test_metadata['A3_median_flux'] - test_metadata['A3_mean_flux']
test_metadata['A3_median_min_diff_flux'] = test_metadata['A3_median_flux'] - test_metadata['A3_min_flux']
test_metadata['A3_mean_min_diff_flux'] = test_metadata['A3_mean_flux'] - test_metadata['A3_min_flux']
test_metadata['A3_minus_3_sigma'] = test_metadata['A3_mean_flux'] - 3*test_metadata['A3_std_flux']
test_metadata['A3_minus_2_sigma'] = test_metadata['A3_mean_flux'] - 2*test_metadata['A3_std_flux']
test_metadata['A3_minus_1_sigma'] = test_metadata['A3_mean_flux'] - 1*test_metadata['A3_std_flux']
test_metadata['A3_plus_1_sigma'] = test_metadata['A3_mean_flux'] + 1*test_metadata['A3_std_flux']
test_metadata['A3_plus_2_sigma'] = test_metadata['A3_mean_flux'] + 2*test_metadata['A3_std_flux']
test_metadata['A3_plus_3_sigma'] = test_metadata['A3_mean_flux'] + 3*test_metadata['A3_std_flux']
#A4
test_metadata['A4_max_min_diff_flux'] = test_metadata['A4_max_flux'] - test_metadata['A4_min_flux']
test_metadata['A4_max_mean_diff_flux'] = test_metadata['A4_max_flux'] - test_metadata['A4_mean_flux']
test_metadata['A4_max_median_diff_flux'] = test_metadata['A4_max_flux'] - test_metadata['A4_median_flux']
test_metadata['A4_median_mean_diff_flux'] = test_metadata['A4_median_flux'] - test_metadata['A4_mean_flux']
test_metadata['A4_median_min_diff_flux'] = test_metadata['A4_median_flux'] - test_metadata['A4_min_flux']
test_metadata['A4_mean_min_diff_flux'] = test_metadata['A4_mean_flux'] - test_metadata['A4_min_flux']
test_metadata['A4_minus_3_sigma'] = test_metadata['A4_mean_flux'] - 3*test_metadata['A4_std_flux']
test_metadata['A4_minus_2_sigma'] = test_metadata['A4_mean_flux'] - 2*test_metadata['A4_std_flux']
test_metadata['A4_minus_1_sigma'] = test_metadata['A4_mean_flux'] - 1*test_metadata['A4_std_flux']
test_metadata['A4_plus_1_sigma'] = test_metadata['A4_mean_flux'] + 1*test_metadata['A4_std_flux']
test_metadata['A4_plus_2_sigma'] = test_metadata['A4_mean_flux'] + 2*test_metadata['A4_std_flux']
test_metadata['A4_plus_3_sigma'] = test_metadata['A4_mean_flux'] + 3*test_metadata['A4_std_flux']
#A5
test_metadata['A5_max_min_diff_flux'] = test_metadata['A5_max_flux'] - test_metadata['A5_min_flux']
test_metadata['A5_max_mean_diff_flux'] = test_metadata['A5_max_flux'] - test_metadata['A5_mean_flux']
test_metadata['A5_max_median_diff_flux'] = test_metadata['A5_max_flux'] - test_metadata['A5_median_flux']
test_metadata['A5_median_mean_diff_flux'] = test_metadata['A5_median_flux'] - test_metadata['A5_mean_flux']
test_metadata['A5_median_min_diff_flux'] = test_metadata['A5_median_flux'] - test_metadata['A5_min_flux']
test_metadata['A5_mean_min_diff_flux'] = test_metadata['A5_mean_flux'] - test_metadata['A5_min_flux']
test_metadata['A5_minus_3_sigma'] = test_metadata['A5_mean_flux'] - 3*test_metadata['A5_std_flux']
test_metadata['A5_minus_2_sigma'] = test_metadata['A5_mean_flux'] - 2*test_metadata['A5_std_flux']
test_metadata['A5_minus_1_sigma'] = test_metadata['A5_mean_flux'] - 1*test_metadata['A5_std_flux']
test_metadata['A5_plus_1_sigma'] = test_metadata['A5_mean_flux'] + 1*test_metadata['A5_std_flux']
test_metadata['A5_plus_2_sigma'] = test_metadata['A5_mean_flux'] + 2*test_metadata['A5_std_flux']
test_metadata['A5_plus_3_sigma'] = test_metadata['A5_mean_flux'] + 3*test_metadata['A5_std_flux']

In [15]:
unnecessary_columns = list(train_metadata.columns)[-72:]

# DIV

In [16]:
#F1
train_metadata['dif_A5_A4_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] - train_metadata['A4_max_min_diff_flux']
train_metadata['dif_A5_A3_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] - train_metadata['A3_max_min_diff_flux']
train_metadata['dif_A5_A2_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] - train_metadata['A2_max_min_diff_flux']
train_metadata['dif_A5_A1_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] - train_metadata['A1_max_min_diff_flux']
train_metadata['dif_A5_A0_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] - train_metadata['A0_max_min_diff_flux']
train_metadata['dif_A4_A3_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] - train_metadata['A3_max_min_diff_flux']
train_metadata['dif_A4_A2_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] - train_metadata['A2_max_min_diff_flux']
train_metadata['dif_A4_A1_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] - train_metadata['A1_max_min_diff_flux']
train_metadata['dif_A4_A0_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] - train_metadata['A0_max_min_diff_flux']
train_metadata['dif_A3_A2_max_min_flux'] = train_metadata['A3_max_min_diff_flux'] - train_metadata['A2_max_min_diff_flux']
train_metadata['dif_A3_A1_max_min_flux'] = train_metadata['A3_max_min_diff_flux'] - train_metadata['A1_max_min_diff_flux']
train_metadata['dif_A3_A0_max_min_flux'] = train_metadata['A3_max_min_diff_flux'] - train_metadata['A0_max_min_diff_flux']
train_metadata['dif_A2_A1_max_min_flux'] = train_metadata['A2_max_min_diff_flux'] - train_metadata['A1_max_min_diff_flux']
train_metadata['dif_A2_A0_max_min_flux'] = train_metadata['A2_max_min_diff_flux'] - train_metadata['A0_max_min_diff_flux']
train_metadata['dif_A1_A0_max_min_flux'] = train_metadata['A1_max_min_diff_flux'] - train_metadata['A0_max_min_diff_flux']
#F2
train_metadata['dif_A5_A4_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] - train_metadata['A4_max_mean_diff_flux']
train_metadata['dif_A5_A3_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] - train_metadata['A3_max_mean_diff_flux']
train_metadata['dif_A5_A2_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] - train_metadata['A2_max_mean_diff_flux']
train_metadata['dif_A5_A1_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] - train_metadata['A1_max_mean_diff_flux']
train_metadata['dif_A5_A0_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] - train_metadata['A0_max_mean_diff_flux']
train_metadata['dif_A4_A3_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] - train_metadata['A3_max_mean_diff_flux']
train_metadata['dif_A4_A2_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] - train_metadata['A2_max_mean_diff_flux']
train_metadata['dif_A4_A1_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] - train_metadata['A1_max_mean_diff_flux']
train_metadata['dif_A4_A0_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] - train_metadata['A0_max_mean_diff_flux']
train_metadata['dif_A3_A2_max_mean_flux'] = train_metadata['A3_max_mean_diff_flux'] - train_metadata['A2_max_mean_diff_flux']
train_metadata['dif_A3_A1_max_mean_flux'] = train_metadata['A3_max_mean_diff_flux'] - train_metadata['A1_max_mean_diff_flux']
train_metadata['dif_A3_A0_max_mean_flux'] = train_metadata['A3_max_mean_diff_flux'] - train_metadata['A0_max_mean_diff_flux']
train_metadata['dif_A2_A1_max_mean_flux'] = train_metadata['A2_max_mean_diff_flux'] - train_metadata['A1_max_mean_diff_flux']
train_metadata['dif_A2_A0_max_mean_flux'] = train_metadata['A2_max_mean_diff_flux'] - train_metadata['A0_max_mean_diff_flux']
train_metadata['dif_A1_A0_max_mean_flux'] = train_metadata['A1_max_mean_diff_flux'] - train_metadata['A0_max_mean_diff_flux']
#F3
train_metadata['dif_A5_A4_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] - train_metadata['A4_max_median_diff_flux']
train_metadata['dif_A5_A3_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] - train_metadata['A3_max_median_diff_flux']
train_metadata['dif_A5_A2_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] - train_metadata['A2_max_median_diff_flux']
train_metadata['dif_A5_A1_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] - train_metadata['A1_max_median_diff_flux']
train_metadata['dif_A5_A0_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] - train_metadata['A0_max_median_diff_flux']
train_metadata['dif_A4_A3_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] - train_metadata['A3_max_median_diff_flux']
train_metadata['dif_A4_A2_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] - train_metadata['A2_max_median_diff_flux']
train_metadata['dif_A4_A1_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] - train_metadata['A1_max_median_diff_flux']
train_metadata['dif_A4_A0_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] - train_metadata['A0_max_median_diff_flux']
train_metadata['dif_A3_A2_max_median_flux'] = train_metadata['A3_max_median_diff_flux'] - train_metadata['A2_max_median_diff_flux']
train_metadata['dif_A3_A1_max_median_flux'] = train_metadata['A3_max_median_diff_flux'] - train_metadata['A1_max_median_diff_flux']
train_metadata['dif_A3_A0_max_median_flux'] = train_metadata['A3_max_median_diff_flux'] - train_metadata['A0_max_median_diff_flux']
train_metadata['dif_A2_A1_max_median_flux'] = train_metadata['A2_max_median_diff_flux'] - train_metadata['A1_max_median_diff_flux']
train_metadata['dif_A2_A0_max_median_flux'] = train_metadata['A2_max_median_diff_flux'] - train_metadata['A0_max_median_diff_flux']
train_metadata['dif_A1_A0_max_median_flux'] = train_metadata['A1_max_median_diff_flux'] - train_metadata['A0_max_median_diff_flux']
#F4
train_metadata['dif_A5_A4_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] - train_metadata['A4_median_mean_diff_flux']
train_metadata['dif_A5_A3_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] - train_metadata['A3_median_mean_diff_flux']
train_metadata['dif_A5_A2_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] - train_metadata['A2_median_mean_diff_flux']
train_metadata['dif_A5_A1_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] - train_metadata['A1_median_mean_diff_flux']
train_metadata['dif_A5_A0_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] - train_metadata['A0_median_mean_diff_flux']
train_metadata['dif_A4_A3_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] - train_metadata['A3_median_mean_diff_flux']
train_metadata['dif_A4_A2_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] - train_metadata['A2_median_mean_diff_flux']
train_metadata['dif_A4_A1_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] - train_metadata['A1_median_mean_diff_flux']
train_metadata['dif_A4_A0_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] - train_metadata['A0_median_mean_diff_flux']
train_metadata['dif_A3_A2_median_mean_flux'] = train_metadata['A3_median_mean_diff_flux'] - train_metadata['A2_median_mean_diff_flux']
train_metadata['dif_A3_A1_median_mean_flux'] = train_metadata['A3_median_mean_diff_flux'] - train_metadata['A1_median_mean_diff_flux']
train_metadata['dif_A3_A0_median_mean_flux'] = train_metadata['A3_median_mean_diff_flux'] - train_metadata['A0_median_mean_diff_flux']
train_metadata['dif_A2_A1_median_mean_flux'] = train_metadata['A2_median_mean_diff_flux'] - train_metadata['A1_median_mean_diff_flux']
train_metadata['dif_A2_A0_median_mean_flux'] = train_metadata['A2_median_mean_diff_flux'] - train_metadata['A0_median_mean_diff_flux']
train_metadata['dif_A1_A0_median_mean_flux'] = train_metadata['A1_median_mean_diff_flux'] - train_metadata['A0_median_mean_diff_flux']
#F5
train_metadata['dif_A5_A4_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] - train_metadata['A4_median_min_diff_flux']
train_metadata['dif_A5_A3_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] - train_metadata['A3_median_min_diff_flux']
train_metadata['dif_A5_A2_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] - train_metadata['A2_median_min_diff_flux']
train_metadata['dif_A5_A1_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] - train_metadata['A1_median_min_diff_flux']
train_metadata['dif_A5_A0_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] - train_metadata['A0_median_min_diff_flux']
train_metadata['dif_A4_A3_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] - train_metadata['A3_median_min_diff_flux']
train_metadata['dif_A4_A2_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] - train_metadata['A2_median_min_diff_flux']
train_metadata['dif_A4_A1_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] - train_metadata['A1_median_min_diff_flux']
train_metadata['dif_A4_A0_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] - train_metadata['A0_median_min_diff_flux']
train_metadata['dif_A3_A2_median_min_flux'] = train_metadata['A3_median_min_diff_flux'] - train_metadata['A2_median_min_diff_flux']
train_metadata['dif_A3_A1_median_min_flux'] = train_metadata['A3_median_min_diff_flux'] - train_metadata['A1_median_min_diff_flux']
train_metadata['dif_A3_A0_median_min_flux'] = train_metadata['A3_median_min_diff_flux'] - train_metadata['A0_median_min_diff_flux']
train_metadata['dif_A2_A1_median_min_flux'] = train_metadata['A2_median_min_diff_flux'] - train_metadata['A1_median_min_diff_flux']
train_metadata['dif_A2_A0_median_min_flux'] = train_metadata['A2_median_min_diff_flux'] - train_metadata['A0_median_min_diff_flux']
train_metadata['dif_A1_A0_median_min_flux'] = train_metadata['A1_median_min_diff_flux'] - train_metadata['A0_median_min_diff_flux']
#F6
train_metadata['dif_A5_A4_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] - train_metadata['A4_mean_min_diff_flux']
train_metadata['dif_A5_A3_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] - train_metadata['A3_mean_min_diff_flux']
train_metadata['dif_A5_A2_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] - train_metadata['A2_mean_min_diff_flux']
train_metadata['dif_A5_A1_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] - train_metadata['A1_mean_min_diff_flux']
train_metadata['dif_A5_A0_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] - train_metadata['A0_mean_min_diff_flux']
train_metadata['dif_A4_A3_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] - train_metadata['A3_mean_min_diff_flux']
train_metadata['dif_A4_A2_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] - train_metadata['A2_mean_min_diff_flux']
train_metadata['dif_A4_A1_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] - train_metadata['A1_mean_min_diff_flux']
train_metadata['dif_A4_A0_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] - train_metadata['A0_mean_min_diff_flux']
train_metadata['dif_A3_A2_mean_min_flux'] = train_metadata['A3_mean_min_diff_flux'] - train_metadata['A2_mean_min_diff_flux']
train_metadata['dif_A3_A1_mean_min_flux'] = train_metadata['A3_mean_min_diff_flux'] - train_metadata['A1_mean_min_diff_flux']
train_metadata['dif_A3_A0_mean_min_flux'] = train_metadata['A3_mean_min_diff_flux'] - train_metadata['A0_mean_min_diff_flux']
train_metadata['dif_A2_A1_mean_min_flux'] = train_metadata['A2_mean_min_diff_flux'] - train_metadata['A1_mean_min_diff_flux']
train_metadata['dif_A2_A0_mean_min_flux'] = train_metadata['A2_mean_min_diff_flux'] - train_metadata['A0_mean_min_diff_flux']
train_metadata['dif_A1_A0_mean_min_flux'] = train_metadata['A1_mean_min_diff_flux'] - train_metadata['A0_mean_min_diff_flux']
#F7
train_metadata['dif_A5_A4_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] - train_metadata['A4_plus_3_sigma']
train_metadata['dif_A5_A3_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] - train_metadata['A3_plus_3_sigma']
train_metadata['dif_A5_A2_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] - train_metadata['A2_plus_3_sigma']
train_metadata['dif_A5_A1_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] - train_metadata['A1_plus_3_sigma']
train_metadata['dif_A5_A0_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] - train_metadata['A0_plus_3_sigma']
train_metadata['dif_A4_A3_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] - train_metadata['A3_plus_3_sigma']
train_metadata['dif_A4_A2_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] - train_metadata['A2_plus_3_sigma']
train_metadata['dif_A4_A1_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] - train_metadata['A1_plus_3_sigma']
train_metadata['dif_A4_A0_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] - train_metadata['A0_plus_3_sigma']
train_metadata['dif_A3_A2_plus_3_sigma'] = train_metadata['A3_plus_3_sigma'] - train_metadata['A2_plus_3_sigma']
train_metadata['dif_A3_A1_plus_3_sigma'] = train_metadata['A3_plus_3_sigma'] - train_metadata['A1_plus_3_sigma']
train_metadata['dif_A3_A0_plus_3_sigma'] = train_metadata['A3_plus_3_sigma'] - train_metadata['A0_plus_3_sigma']
train_metadata['dif_A2_A1_plus_3_sigma'] = train_metadata['A2_plus_3_sigma'] - train_metadata['A1_plus_3_sigma']
train_metadata['dif_A2_A0_plus_3_sigma'] = train_metadata['A2_plus_3_sigma'] - train_metadata['A0_plus_3_sigma']
train_metadata['dif_A1_A0_plus_3_sigma'] = train_metadata['A1_plus_3_sigma'] - train_metadata['A0_plus_3_sigma']
#F8
train_metadata['dif_A5_A4_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] - train_metadata['A4_plus_2_sigma']
train_metadata['dif_A5_A3_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] - train_metadata['A3_plus_2_sigma']
train_metadata['dif_A5_A2_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] - train_metadata['A2_plus_2_sigma']
train_metadata['dif_A5_A1_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] - train_metadata['A1_plus_2_sigma']
train_metadata['dif_A5_A0_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] - train_metadata['A0_plus_2_sigma']
train_metadata['dif_A4_A3_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] - train_metadata['A3_plus_2_sigma']
train_metadata['dif_A4_A2_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] - train_metadata['A2_plus_2_sigma']
train_metadata['dif_A4_A1_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] - train_metadata['A1_plus_2_sigma']
train_metadata['dif_A4_A0_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] - train_metadata['A0_plus_2_sigma']
train_metadata['dif_A3_A2_plus_2_sigma'] = train_metadata['A3_plus_2_sigma'] - train_metadata['A2_plus_2_sigma']
train_metadata['dif_A3_A1_plus_2_sigma'] = train_metadata['A3_plus_2_sigma'] - train_metadata['A1_plus_2_sigma']
train_metadata['dif_A3_A0_plus_2_sigma'] = train_metadata['A3_plus_2_sigma'] - train_metadata['A0_plus_2_sigma']
train_metadata['dif_A2_A1_plus_2_sigma'] = train_metadata['A2_plus_2_sigma'] - train_metadata['A1_plus_2_sigma']
train_metadata['dif_A2_A0_plus_2_sigma'] = train_metadata['A2_plus_2_sigma'] - train_metadata['A0_plus_2_sigma']
train_metadata['dif_A1_A0_plus_2_sigma'] = train_metadata['A1_plus_2_sigma'] - train_metadata['A0_plus_2_sigma']
#F9
train_metadata['dif_A5_A4_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] - train_metadata['A4_plus_1_sigma']
train_metadata['dif_A5_A3_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] - train_metadata['A3_plus_1_sigma']
train_metadata['dif_A5_A2_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] - train_metadata['A2_plus_1_sigma']
train_metadata['dif_A5_A1_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] - train_metadata['A1_plus_1_sigma']
train_metadata['dif_A5_A0_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] - train_metadata['A0_plus_1_sigma']
train_metadata['dif_A4_A3_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] - train_metadata['A3_plus_1_sigma']
train_metadata['dif_A4_A2_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] - train_metadata['A2_plus_1_sigma']
train_metadata['dif_A4_A1_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] - train_metadata['A1_plus_1_sigma']
train_metadata['dif_A4_A0_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] - train_metadata['A0_plus_1_sigma']
train_metadata['dif_A3_A2_plus_1_sigma'] = train_metadata['A3_plus_1_sigma'] - train_metadata['A2_plus_1_sigma']
train_metadata['dif_A3_A1_plus_1_sigma'] = train_metadata['A3_plus_1_sigma'] - train_metadata['A1_plus_1_sigma']
train_metadata['dif_A3_A0_plus_1_sigma'] = train_metadata['A3_plus_1_sigma'] - train_metadata['A0_plus_1_sigma']
train_metadata['dif_A2_A1_plus_1_sigma'] = train_metadata['A2_plus_1_sigma'] - train_metadata['A1_plus_1_sigma']
train_metadata['dif_A2_A0_plus_1_sigma'] = train_metadata['A2_plus_1_sigma'] - train_metadata['A0_plus_1_sigma']
train_metadata['dif_A1_A0_plus_1_sigma'] = train_metadata['A1_plus_1_sigma'] - train_metadata['A0_plus_1_sigma']
#F10
train_metadata['dif_A5_A4_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] - train_metadata['A4_minus_3_sigma']
train_metadata['dif_A5_A3_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] - train_metadata['A3_minus_3_sigma']
train_metadata['dif_A5_A2_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] - train_metadata['A2_minus_3_sigma']
train_metadata['dif_A5_A1_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] - train_metadata['A1_minus_3_sigma']
train_metadata['dif_A5_A0_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] - train_metadata['A0_minus_3_sigma']
train_metadata['dif_A4_A3_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] - train_metadata['A3_minus_3_sigma']
train_metadata['dif_A4_A2_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] - train_metadata['A2_minus_3_sigma']
train_metadata['dif_A4_A1_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] - train_metadata['A1_minus_3_sigma']
train_metadata['dif_A4_A0_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] - train_metadata['A0_minus_3_sigma']
train_metadata['dif_A3_A2_minus_3_sigma'] = train_metadata['A3_minus_3_sigma'] - train_metadata['A2_minus_3_sigma']
train_metadata['dif_A3_A1_minus_3_sigma'] = train_metadata['A3_minus_3_sigma'] - train_metadata['A1_minus_3_sigma']
train_metadata['dif_A3_A0_minus_3_sigma'] = train_metadata['A3_minus_3_sigma'] - train_metadata['A0_minus_3_sigma']
train_metadata['dif_A2_A1_minus_3_sigma'] = train_metadata['A2_minus_3_sigma'] - train_metadata['A1_minus_3_sigma']
train_metadata['dif_A2_A0_minus_3_sigma'] = train_metadata['A2_minus_3_sigma'] - train_metadata['A0_minus_3_sigma']
train_metadata['dif_A1_A0_minus_3_sigma'] = train_metadata['A1_minus_3_sigma'] - train_metadata['A0_minus_3_sigma']
#F11
train_metadata['dif_A5_A4_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] - train_metadata['A4_minus_2_sigma']
train_metadata['dif_A5_A3_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] - train_metadata['A3_minus_2_sigma']
train_metadata['dif_A5_A2_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] - train_metadata['A2_minus_2_sigma']
train_metadata['dif_A5_A1_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] - train_metadata['A1_minus_2_sigma']
train_metadata['dif_A5_A0_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] - train_metadata['A0_minus_2_sigma']
train_metadata['dif_A4_A3_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] - train_metadata['A3_minus_2_sigma']
train_metadata['dif_A4_A2_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] - train_metadata['A2_minus_2_sigma']
train_metadata['dif_A4_A1_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] - train_metadata['A1_minus_2_sigma']
train_metadata['dif_A4_A0_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] - train_metadata['A0_minus_2_sigma']
train_metadata['dif_A3_A2_minus_2_sigma'] = train_metadata['A3_minus_2_sigma'] - train_metadata['A2_minus_2_sigma']
train_metadata['dif_A3_A1_minus_2_sigma'] = train_metadata['A3_minus_2_sigma'] - train_metadata['A1_minus_2_sigma']
train_metadata['dif_A3_A0_minus_2_sigma'] = train_metadata['A3_minus_2_sigma'] - train_metadata['A0_minus_2_sigma']
train_metadata['dif_A2_A1_minus_2_sigma'] = train_metadata['A2_minus_2_sigma'] - train_metadata['A1_minus_2_sigma']
train_metadata['dif_A2_A0_minus_2_sigma'] = train_metadata['A2_minus_2_sigma'] - train_metadata['A0_minus_2_sigma']
train_metadata['dif_A1_A0_minus_2_sigma'] = train_metadata['A1_minus_2_sigma'] - train_metadata['A0_minus_2_sigma']
#F12
train_metadata['dif_A5_A4_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] - train_metadata['A4_minus_1_sigma']
train_metadata['dif_A5_A3_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] - train_metadata['A3_minus_1_sigma']
train_metadata['dif_A5_A2_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] - train_metadata['A2_minus_1_sigma']
train_metadata['dif_A5_A1_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] - train_metadata['A1_minus_1_sigma']
train_metadata['dif_A5_A0_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] - train_metadata['A0_minus_1_sigma']
train_metadata['dif_A4_A3_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] - train_metadata['A3_minus_1_sigma']
train_metadata['dif_A4_A2_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] - train_metadata['A2_minus_1_sigma']
train_metadata['dif_A4_A1_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] - train_metadata['A1_minus_1_sigma']
train_metadata['dif_A4_A0_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] - train_metadata['A0_minus_1_sigma']
train_metadata['dif_A3_A2_minus_1_sigma'] = train_metadata['A3_minus_1_sigma'] - train_metadata['A2_minus_1_sigma']
train_metadata['dif_A3_A1_minus_1_sigma'] = train_metadata['A3_minus_1_sigma'] - train_metadata['A1_minus_1_sigma']
train_metadata['dif_A3_A0_minus_1_sigma'] = train_metadata['A3_minus_1_sigma'] - train_metadata['A0_minus_1_sigma']
train_metadata['dif_A2_A1_minus_1_sigma'] = train_metadata['A2_minus_1_sigma'] - train_metadata['A1_minus_1_sigma']
train_metadata['dif_A2_A0_minus_1_sigma'] = train_metadata['A2_minus_1_sigma'] - train_metadata['A0_minus_1_sigma']
train_metadata['dif_A1_A0_minus_1_sigma'] = train_metadata['A1_minus_1_sigma'] - train_metadata['A0_minus_1_sigma']

In [17]:
div_columns = list(train_metadata.columns)[-180:]

# DIV_DIV

In [18]:
%%time
train_metadata['div_dif_A5_A4_max_min_flux_dif_A3_A2_max_min_flux'] = train_metadata['dif_A5_A4_max_min_flux'] / train_metadata['dif_A3_A2_max_min_flux']
train_metadata['div_dif_A5_A4_max_min_flux_dif_A3_A1_max_min_flux'] = train_metadata['dif_A5_A4_max_min_flux'] / train_metadata['dif_A3_A1_max_min_flux']
train_metadata['div_dif_A5_A4_max_min_flux_dif_A3_A0_max_min_flux'] = train_metadata['dif_A5_A4_max_min_flux'] / train_metadata['dif_A3_A0_max_min_flux']
train_metadata['div_dif_A5_A4_max_min_flux_dif_A2_A1_max_min_flux'] = train_metadata['dif_A5_A4_max_min_flux'] / train_metadata['dif_A2_A1_max_min_flux']
train_metadata['div_dif_A5_A4_max_min_flux_dif_A2_A0_max_min_flux'] = train_metadata['dif_A5_A4_max_min_flux'] / train_metadata['dif_A2_A0_max_min_flux']
train_metadata['div_dif_A5_A4_max_min_flux_dif_A1_A0_max_min_flux'] = train_metadata['dif_A5_A4_max_min_flux'] / train_metadata['dif_A1_A0_max_min_flux']
train_metadata['div_dif_A4_A3_max_min_flux_dif_A2_A1_max_min_flux'] = train_metadata['dif_A4_A3_max_min_flux'] / train_metadata['dif_A2_A1_max_min_flux']
train_metadata['div_dif_A4_A3_max_min_flux_dif_A2_A0_max_min_flux'] = train_metadata['dif_A4_A3_max_min_flux'] / train_metadata['dif_A2_A0_max_min_flux']
train_metadata['div_dif_A4_A3_max_min_flux_dif_A1_A0_max_min_flux'] = train_metadata['dif_A4_A3_max_min_flux'] / train_metadata['dif_A1_A0_max_min_flux']
train_metadata['div_dif_A3_A2_max_min_flux_dif_A1_A0_max_min_flux'] = train_metadata['dif_A3_A2_max_min_flux'] / train_metadata['dif_A1_A0_max_min_flux']

train_metadata['div_dif_A5_A4_max_mean_flux_dif_A3_A2_max_mean_flux'] = train_metadata['dif_A5_A4_max_mean_flux'] / train_metadata['dif_A3_A2_max_mean_flux']
train_metadata['div_dif_A5_A4_max_mean_flux_dif_A3_A1_max_mean_flux'] = train_metadata['dif_A5_A4_max_mean_flux'] / train_metadata['dif_A3_A1_max_mean_flux']
train_metadata['div_dif_A5_A4_max_mean_flux_dif_A3_A0_max_mean_flux'] = train_metadata['dif_A5_A4_max_mean_flux'] / train_metadata['dif_A3_A0_max_mean_flux']
train_metadata['div_dif_A5_A4_max_mean_flux_dif_A2_A1_max_mean_flux'] = train_metadata['dif_A5_A4_max_mean_flux'] / train_metadata['dif_A2_A1_max_mean_flux']
train_metadata['div_dif_A5_A4_max_mean_flux_dif_A2_A0_max_mean_flux'] = train_metadata['dif_A5_A4_max_mean_flux'] / train_metadata['dif_A2_A0_max_mean_flux']
train_metadata['div_dif_A5_A4_max_mean_flux_dif_A1_A0_max_mean_flux'] = train_metadata['dif_A5_A4_max_mean_flux'] / train_metadata['dif_A1_A0_max_mean_flux']
train_metadata['div_dif_A4_A3_max_mean_flux_dif_A2_A1_max_mean_flux'] = train_metadata['dif_A4_A3_max_mean_flux'] / train_metadata['dif_A2_A1_max_mean_flux']
train_metadata['div_dif_A4_A3_max_mean_flux_dif_A2_A0_max_mean_flux'] = train_metadata['dif_A4_A3_max_mean_flux'] / train_metadata['dif_A2_A0_max_mean_flux']
train_metadata['div_dif_A4_A3_max_mean_flux_dif_A1_A0_max_mean_flux'] = train_metadata['dif_A4_A3_max_mean_flux'] / train_metadata['dif_A1_A0_max_mean_flux']
train_metadata['div_dif_A3_A2_max_mean_flux_dif_A1_A0_max_mean_flux'] = train_metadata['dif_A3_A2_max_mean_flux'] / train_metadata['dif_A1_A0_max_mean_flux']

train_metadata['div_dif_A5_A4_max_median_flux_dif_A3_A2_max_median_flux'] = train_metadata['dif_A5_A4_max_median_flux'] / train_metadata['dif_A3_A2_max_median_flux']
train_metadata['div_dif_A5_A4_max_median_flux_dif_A3_A1_max_median_flux'] = train_metadata['dif_A5_A4_max_median_flux'] / train_metadata['dif_A3_A1_max_median_flux']
train_metadata['div_dif_A5_A4_max_median_flux_dif_A3_A0_max_median_flux'] = train_metadata['dif_A5_A4_max_median_flux'] / train_metadata['dif_A3_A0_max_median_flux']
train_metadata['div_dif_A5_A4_max_median_flux_dif_A2_A1_max_median_flux'] = train_metadata['dif_A5_A4_max_median_flux'] / train_metadata['dif_A2_A1_max_median_flux']
train_metadata['div_dif_A5_A4_max_median_flux_dif_A2_A0_max_median_flux'] = train_metadata['dif_A5_A4_max_median_flux'] / train_metadata['dif_A2_A0_max_median_flux']
train_metadata['div_dif_A5_A4_max_median_flux_dif_A1_A0_max_median_flux'] = train_metadata['dif_A5_A4_max_median_flux'] / train_metadata['dif_A1_A0_max_median_flux']
train_metadata['div_dif_A4_A3_max_median_flux_dif_A2_A1_max_median_flux'] = train_metadata['dif_A4_A3_max_median_flux'] / train_metadata['dif_A2_A1_max_median_flux']
train_metadata['div_dif_A4_A3_max_median_flux_dif_A2_A0_max_median_flux'] = train_metadata['dif_A4_A3_max_median_flux'] / train_metadata['dif_A2_A0_max_median_flux']
train_metadata['div_dif_A4_A3_max_median_flux_dif_A1_A0_max_median_flux'] = train_metadata['dif_A4_A3_max_median_flux'] / train_metadata['dif_A1_A0_max_median_flux']
train_metadata['div_dif_A3_A2_max_median_flux_dif_A1_A0_max_median_flux'] = train_metadata['dif_A3_A2_max_median_flux'] / train_metadata['dif_A1_A0_max_median_flux']

train_metadata['div_dif_A5_A4_median_mean_flux_dif_A3_A2_median_mean_flux'] = train_metadata['dif_A5_A4_median_mean_flux'] / train_metadata['dif_A3_A2_median_mean_flux']
train_metadata['div_dif_A5_A4_median_mean_flux_dif_A3_A1_median_mean_flux'] = train_metadata['dif_A5_A4_median_mean_flux'] / train_metadata['dif_A3_A1_median_mean_flux']
train_metadata['div_dif_A5_A4_median_mean_flux_dif_A3_A0_median_mean_flux'] = train_metadata['dif_A5_A4_median_mean_flux'] / train_metadata['dif_A3_A0_median_mean_flux']
train_metadata['div_dif_A5_A4_median_mean_flux_dif_A2_A1_median_mean_flux'] = train_metadata['dif_A5_A4_median_mean_flux'] / train_metadata['dif_A2_A1_median_mean_flux']
train_metadata['div_dif_A5_A4_median_mean_flux_dif_A2_A0_median_mean_flux'] = train_metadata['dif_A5_A4_median_mean_flux'] / train_metadata['dif_A2_A0_median_mean_flux']
train_metadata['div_dif_A5_A4_median_mean_flux_dif_A1_A0_median_mean_flux'] = train_metadata['dif_A5_A4_median_mean_flux'] / train_metadata['dif_A1_A0_median_mean_flux']
train_metadata['div_dif_A4_A3_median_mean_flux_dif_A2_A1_median_mean_flux'] = train_metadata['dif_A4_A3_median_mean_flux'] / train_metadata['dif_A2_A1_median_mean_flux']
train_metadata['div_dif_A4_A3_median_mean_flux_dif_A2_A0_median_mean_flux'] = train_metadata['dif_A4_A3_median_mean_flux'] / train_metadata['dif_A2_A0_median_mean_flux']
train_metadata['div_dif_A4_A3_median_mean_flux_dif_A1_A0_median_mean_flux'] = train_metadata['dif_A4_A3_median_mean_flux'] / train_metadata['dif_A1_A0_median_mean_flux']
train_metadata['div_dif_A3_A2_median_mean_flux_dif_A1_A0_median_mean_flux'] = train_metadata['dif_A3_A2_median_mean_flux'] / train_metadata['dif_A1_A0_median_mean_flux']

train_metadata['div_dif_A5_A4_median_min_flux_dif_A3_A2_median_min_flux'] = train_metadata['dif_A5_A4_median_min_flux'] / train_metadata['dif_A3_A2_median_min_flux']
train_metadata['div_dif_A5_A4_median_min_flux_dif_A3_A1_median_min_flux'] = train_metadata['dif_A5_A4_median_min_flux'] / train_metadata['dif_A3_A1_median_min_flux']
train_metadata['div_dif_A5_A4_median_min_flux_dif_A3_A0_median_min_flux'] = train_metadata['dif_A5_A4_median_min_flux'] / train_metadata['dif_A3_A0_median_min_flux']
train_metadata['div_dif_A5_A4_median_min_flux_dif_A2_A1_median_min_flux'] = train_metadata['dif_A5_A4_median_min_flux'] / train_metadata['dif_A2_A1_median_min_flux']
train_metadata['div_dif_A5_A4_median_min_flux_dif_A2_A0_median_min_flux'] = train_metadata['dif_A5_A4_median_min_flux'] / train_metadata['dif_A2_A0_median_min_flux']
train_metadata['div_dif_A5_A4_median_min_flux_dif_A1_A0_median_min_flux'] = train_metadata['dif_A5_A4_median_min_flux'] / train_metadata['dif_A1_A0_median_min_flux']
train_metadata['div_dif_A4_A3_median_min_flux_dif_A2_A1_median_min_flux'] = train_metadata['dif_A4_A3_median_min_flux'] / train_metadata['dif_A2_A1_median_min_flux']
train_metadata['div_dif_A4_A3_median_min_flux_dif_A2_A0_median_min_flux'] = train_metadata['dif_A4_A3_median_min_flux'] / train_metadata['dif_A2_A0_median_min_flux']
train_metadata['div_dif_A4_A3_median_min_flux_dif_A1_A0_median_min_flux'] = train_metadata['dif_A4_A3_median_min_flux'] / train_metadata['dif_A1_A0_median_min_flux']
train_metadata['div_dif_A3_A2_median_min_flux_dif_A1_A0_median_min_flux'] = train_metadata['dif_A3_A2_median_min_flux'] / train_metadata['dif_A1_A0_median_min_flux']

train_metadata['div_dif_A5_A4_mean_min_flux_dif_A3_A2_mean_min_flux'] = train_metadata['dif_A5_A4_mean_min_flux'] / train_metadata['dif_A3_A2_mean_min_flux']
train_metadata['div_dif_A5_A4_mean_min_flux_dif_A3_A1_mean_min_flux'] = train_metadata['dif_A5_A4_mean_min_flux'] / train_metadata['dif_A3_A1_mean_min_flux']
train_metadata['div_dif_A5_A4_mean_min_flux_dif_A3_A0_mean_min_flux'] = train_metadata['dif_A5_A4_mean_min_flux'] / train_metadata['dif_A3_A0_mean_min_flux']
train_metadata['div_dif_A5_A4_mean_min_flux_dif_A2_A1_mean_min_flux'] = train_metadata['dif_A5_A4_mean_min_flux'] / train_metadata['dif_A2_A1_mean_min_flux']
train_metadata['div_dif_A5_A4_mean_min_flux_dif_A2_A0_mean_min_flux'] = train_metadata['dif_A5_A4_mean_min_flux'] / train_metadata['dif_A2_A0_mean_min_flux']
train_metadata['div_dif_A5_A4_mean_min_flux_dif_A1_A0_mean_min_flux'] = train_metadata['dif_A5_A4_mean_min_flux'] / train_metadata['dif_A1_A0_mean_min_flux']
train_metadata['div_dif_A4_A3_mean_min_flux_dif_A2_A1_mean_min_flux'] = train_metadata['dif_A4_A3_mean_min_flux'] / train_metadata['dif_A2_A1_mean_min_flux']
train_metadata['div_dif_A4_A3_mean_min_flux_dif_A2_A0_mean_min_flux'] = train_metadata['dif_A4_A3_mean_min_flux'] / train_metadata['dif_A2_A0_mean_min_flux']
train_metadata['div_dif_A4_A3_mean_min_flux_dif_A1_A0_mean_min_flux'] = train_metadata['dif_A4_A3_mean_min_flux'] / train_metadata['dif_A1_A0_mean_min_flux']
train_metadata['div_dif_A3_A2_mean_min_flux_dif_A1_A0_mean_min_flux'] = train_metadata['dif_A3_A2_mean_min_flux'] / train_metadata['dif_A1_A0_mean_min_flux']

train_metadata['div_dif_A5_A4_plus_3_sigma_dif_A3_A2_plus_3_sigma'] = train_metadata['dif_A5_A4_plus_3_sigma'] / train_metadata['dif_A3_A2_plus_3_sigma']
train_metadata['div_dif_A5_A4_plus_3_sigma_dif_A3_A1_plus_3_sigma'] = train_metadata['dif_A5_A4_plus_3_sigma'] / train_metadata['dif_A3_A1_plus_3_sigma']
train_metadata['div_dif_A5_A4_plus_3_sigma_dif_A3_A0_plus_3_sigma'] = train_metadata['dif_A5_A4_plus_3_sigma'] / train_metadata['dif_A3_A0_plus_3_sigma']
train_metadata['div_dif_A5_A4_plus_3_sigma_dif_A2_A1_plus_3_sigma'] = train_metadata['dif_A5_A4_plus_3_sigma'] / train_metadata['dif_A2_A1_plus_3_sigma']
train_metadata['div_dif_A5_A4_plus_3_sigma_dif_A2_A0_plus_3_sigma'] = train_metadata['dif_A5_A4_plus_3_sigma'] / train_metadata['dif_A2_A0_plus_3_sigma']
train_metadata['div_dif_A5_A4_plus_3_sigma_dif_A1_A0_plus_3_sigma'] = train_metadata['dif_A5_A4_plus_3_sigma'] / train_metadata['dif_A1_A0_plus_3_sigma']
train_metadata['div_dif_A4_A3_plus_3_sigma_dif_A2_A1_plus_3_sigma'] = train_metadata['dif_A4_A3_plus_3_sigma'] / train_metadata['dif_A2_A1_plus_3_sigma']
train_metadata['div_dif_A4_A3_plus_3_sigma_dif_A2_A0_plus_3_sigma'] = train_metadata['dif_A4_A3_plus_3_sigma'] / train_metadata['dif_A2_A0_plus_3_sigma']
train_metadata['div_dif_A4_A3_plus_3_sigma_dif_A1_A0_plus_3_sigma'] = train_metadata['dif_A4_A3_plus_3_sigma'] / train_metadata['dif_A1_A0_plus_3_sigma']
train_metadata['div_dif_A3_A2_plus_3_sigma_dif_A1_A0_plus_3_sigma'] = train_metadata['dif_A3_A2_plus_3_sigma'] / train_metadata['dif_A1_A0_plus_3_sigma']

train_metadata['div_dif_A5_A4_plus_2_sigma_dif_A3_A2_plus_2_sigma'] = train_metadata['dif_A5_A4_plus_2_sigma'] / train_metadata['dif_A3_A2_plus_2_sigma']
train_metadata['div_dif_A5_A4_plus_2_sigma_dif_A3_A1_plus_2_sigma'] = train_metadata['dif_A5_A4_plus_2_sigma'] / train_metadata['dif_A3_A1_plus_2_sigma']
train_metadata['div_dif_A5_A4_plus_2_sigma_dif_A3_A0_plus_2_sigma'] = train_metadata['dif_A5_A4_plus_2_sigma'] / train_metadata['dif_A3_A0_plus_2_sigma']
train_metadata['div_dif_A5_A4_plus_2_sigma_dif_A2_A1_plus_2_sigma'] = train_metadata['dif_A5_A4_plus_2_sigma'] / train_metadata['dif_A2_A1_plus_2_sigma']
train_metadata['div_dif_A5_A4_plus_2_sigma_dif_A2_A0_plus_2_sigma'] = train_metadata['dif_A5_A4_plus_2_sigma'] / train_metadata['dif_A2_A0_plus_2_sigma']
train_metadata['div_dif_A5_A4_plus_2_sigma_dif_A1_A0_plus_2_sigma'] = train_metadata['dif_A5_A4_plus_2_sigma'] / train_metadata['dif_A1_A0_plus_2_sigma']
train_metadata['div_dif_A4_A3_plus_2_sigma_dif_A2_A1_plus_2_sigma'] = train_metadata['dif_A4_A3_plus_2_sigma'] / train_metadata['dif_A2_A1_plus_2_sigma']
train_metadata['div_dif_A4_A3_plus_2_sigma_dif_A2_A0_plus_2_sigma'] = train_metadata['dif_A4_A3_plus_2_sigma'] / train_metadata['dif_A2_A0_plus_2_sigma']
train_metadata['div_dif_A4_A3_plus_2_sigma_dif_A1_A0_plus_2_sigma'] = train_metadata['dif_A4_A3_plus_2_sigma'] / train_metadata['dif_A1_A0_plus_2_sigma']
train_metadata['div_dif_A3_A2_plus_2_sigma_dif_A1_A0_plus_2_sigma'] = train_metadata['dif_A3_A2_plus_2_sigma'] / train_metadata['dif_A1_A0_plus_2_sigma']

train_metadata['div_dif_A5_A4_plus_1_sigma_dif_A3_A2_plus_1_sigma'] = train_metadata['dif_A5_A4_plus_1_sigma'] / train_metadata['dif_A3_A2_plus_1_sigma']
train_metadata['div_dif_A5_A4_plus_1_sigma_dif_A3_A1_plus_1_sigma'] = train_metadata['dif_A5_A4_plus_1_sigma'] / train_metadata['dif_A3_A1_plus_1_sigma']
train_metadata['div_dif_A5_A4_plus_1_sigma_dif_A3_A0_plus_1_sigma'] = train_metadata['dif_A5_A4_plus_1_sigma'] / train_metadata['dif_A3_A0_plus_1_sigma']
train_metadata['div_dif_A5_A4_plus_1_sigma_dif_A2_A1_plus_1_sigma'] = train_metadata['dif_A5_A4_plus_1_sigma'] / train_metadata['dif_A2_A1_plus_1_sigma']
train_metadata['div_dif_A5_A4_plus_1_sigma_dif_A2_A0_plus_1_sigma'] = train_metadata['dif_A5_A4_plus_1_sigma'] / train_metadata['dif_A2_A0_plus_1_sigma']
train_metadata['div_dif_A5_A4_plus_1_sigma_dif_A1_A0_plus_1_sigma'] = train_metadata['dif_A5_A4_plus_1_sigma'] / train_metadata['dif_A1_A0_plus_1_sigma']
train_metadata['div_dif_A4_A3_plus_1_sigma_dif_A2_A1_plus_1_sigma'] = train_metadata['dif_A4_A3_plus_1_sigma'] / train_metadata['dif_A2_A1_plus_1_sigma']
train_metadata['div_dif_A4_A3_plus_1_sigma_dif_A2_A0_plus_1_sigma'] = train_metadata['dif_A4_A3_plus_1_sigma'] / train_metadata['dif_A2_A0_plus_1_sigma']
train_metadata['div_dif_A4_A3_plus_1_sigma_dif_A1_A0_plus_1_sigma'] = train_metadata['dif_A4_A3_plus_1_sigma'] / train_metadata['dif_A1_A0_plus_1_sigma']
train_metadata['div_dif_A3_A2_plus_1_sigma_dif_A1_A0_plus_1_sigma'] = train_metadata['dif_A3_A2_plus_1_sigma'] / train_metadata['dif_A1_A0_plus_1_sigma']

train_metadata['div_dif_A5_A4_minus_1_sigma_dif_A3_A2_minus_1_sigma'] = train_metadata['dif_A5_A4_minus_1_sigma'] / train_metadata['dif_A3_A2_minus_1_sigma']
train_metadata['div_dif_A5_A4_minus_1_sigma_dif_A3_A1_minus_1_sigma'] = train_metadata['dif_A5_A4_minus_1_sigma'] / train_metadata['dif_A3_A1_minus_1_sigma']
train_metadata['div_dif_A5_A4_minus_1_sigma_dif_A3_A0_minus_1_sigma'] = train_metadata['dif_A5_A4_minus_1_sigma'] / train_metadata['dif_A3_A0_minus_1_sigma']
train_metadata['div_dif_A5_A4_minus_1_sigma_dif_A2_A1_minus_1_sigma'] = train_metadata['dif_A5_A4_minus_1_sigma'] / train_metadata['dif_A2_A1_minus_1_sigma']
train_metadata['div_dif_A5_A4_minus_1_sigma_dif_A2_A0_minus_1_sigma'] = train_metadata['dif_A5_A4_minus_1_sigma'] / train_metadata['dif_A2_A0_minus_1_sigma']
train_metadata['div_dif_A5_A4_minus_1_sigma_dif_A1_A0_minus_1_sigma'] = train_metadata['dif_A5_A4_minus_1_sigma'] / train_metadata['dif_A1_A0_minus_1_sigma']
train_metadata['div_dif_A4_A3_minus_1_sigma_dif_A2_A1_minus_1_sigma'] = train_metadata['dif_A4_A3_minus_1_sigma'] / train_metadata['dif_A2_A1_minus_1_sigma']
train_metadata['div_dif_A4_A3_minus_1_sigma_dif_A2_A0_minus_1_sigma'] = train_metadata['dif_A4_A3_minus_1_sigma'] / train_metadata['dif_A2_A0_minus_1_sigma']
train_metadata['div_dif_A4_A3_minus_1_sigma_dif_A1_A0_minus_1_sigma'] = train_metadata['dif_A4_A3_minus_1_sigma'] / train_metadata['dif_A1_A0_minus_1_sigma']
train_metadata['div_dif_A3_A2_minus_1_sigma_dif_A1_A0_minus_1_sigma'] = train_metadata['dif_A3_A2_minus_1_sigma'] / train_metadata['dif_A1_A0_minus_1_sigma']

train_metadata['div_dif_A5_A4_minus_2_sigma_dif_A3_A2_minus_2_sigma'] = train_metadata['dif_A5_A4_minus_2_sigma'] / train_metadata['dif_A3_A2_minus_2_sigma']
train_metadata['div_dif_A5_A4_minus_2_sigma_dif_A3_A1_minus_2_sigma'] = train_metadata['dif_A5_A4_minus_2_sigma'] / train_metadata['dif_A3_A1_minus_2_sigma']
train_metadata['div_dif_A5_A4_minus_2_sigma_dif_A3_A0_minus_2_sigma'] = train_metadata['dif_A5_A4_minus_2_sigma'] / train_metadata['dif_A3_A0_minus_2_sigma']
train_metadata['div_dif_A5_A4_minus_2_sigma_dif_A2_A1_minus_2_sigma'] = train_metadata['dif_A5_A4_minus_2_sigma'] / train_metadata['dif_A2_A1_minus_2_sigma']
train_metadata['div_dif_A5_A4_minus_2_sigma_dif_A2_A0_minus_2_sigma'] = train_metadata['dif_A5_A4_minus_2_sigma'] / train_metadata['dif_A2_A0_minus_2_sigma']
train_metadata['div_dif_A5_A4_minus_2_sigma_dif_A1_A0_minus_2_sigma'] = train_metadata['dif_A5_A4_minus_2_sigma'] / train_metadata['dif_A1_A0_minus_2_sigma']
train_metadata['div_dif_A4_A3_minus_2_sigma_dif_A2_A1_minus_2_sigma'] = train_metadata['dif_A4_A3_minus_2_sigma'] / train_metadata['dif_A2_A1_minus_2_sigma']
train_metadata['div_dif_A4_A3_minus_2_sigma_dif_A2_A0_minus_2_sigma'] = train_metadata['dif_A4_A3_minus_2_sigma'] / train_metadata['dif_A2_A0_minus_2_sigma']
train_metadata['div_dif_A4_A3_minus_2_sigma_dif_A1_A0_minus_2_sigma'] = train_metadata['dif_A4_A3_minus_2_sigma'] / train_metadata['dif_A1_A0_minus_2_sigma']
train_metadata['div_dif_A3_A2_minus_2_sigma_dif_A1_A0_minus_2_sigma'] = train_metadata['dif_A3_A2_minus_2_sigma'] / train_metadata['dif_A1_A0_minus_2_sigma']

train_metadata['div_dif_A5_A4_minus_3_sigma_dif_A3_A2_minus_3_sigma'] = train_metadata['dif_A5_A4_minus_3_sigma'] / train_metadata['dif_A3_A2_minus_3_sigma']
train_metadata['div_dif_A5_A4_minus_3_sigma_dif_A3_A1_minus_3_sigma'] = train_metadata['dif_A5_A4_minus_3_sigma'] / train_metadata['dif_A3_A1_minus_3_sigma']
train_metadata['div_dif_A5_A4_minus_3_sigma_dif_A3_A0_minus_3_sigma'] = train_metadata['dif_A5_A4_minus_3_sigma'] / train_metadata['dif_A3_A0_minus_3_sigma']
train_metadata['div_dif_A5_A4_minus_3_sigma_dif_A2_A1_minus_3_sigma'] = train_metadata['dif_A5_A4_minus_3_sigma'] / train_metadata['dif_A2_A1_minus_3_sigma']
train_metadata['div_dif_A5_A4_minus_3_sigma_dif_A2_A0_minus_3_sigma'] = train_metadata['dif_A5_A4_minus_3_sigma'] / train_metadata['dif_A2_A0_minus_3_sigma']
train_metadata['div_dif_A5_A4_minus_3_sigma_dif_A1_A0_minus_3_sigma'] = train_metadata['dif_A5_A4_minus_3_sigma'] / train_metadata['dif_A1_A0_minus_3_sigma']
train_metadata['div_dif_A4_A3_minus_3_sigma_dif_A2_A1_minus_3_sigma'] = train_metadata['dif_A4_A3_minus_3_sigma'] / train_metadata['dif_A2_A1_minus_3_sigma']
train_metadata['div_dif_A4_A3_minus_3_sigma_dif_A2_A0_minus_3_sigma'] = train_metadata['dif_A4_A3_minus_3_sigma'] / train_metadata['dif_A2_A0_minus_3_sigma']
train_metadata['div_dif_A4_A3_minus_3_sigma_dif_A1_A0_minus_3_sigma'] = train_metadata['dif_A4_A3_minus_3_sigma'] / train_metadata['dif_A1_A0_minus_3_sigma']
train_metadata['div_dif_A3_A2_minus_3_sigma_dif_A1_A0_minus_3_sigma'] = train_metadata['dif_A3_A2_minus_3_sigma'] / train_metadata['dif_A1_A0_minus_3_sigma']

CPU times: user 92.4 ms, sys: 0 ns, total: 92.4 ms
Wall time: 90.9 ms


In [19]:
train_metadata.drop(div_columns,axis=1,inplace=True)

In [20]:
train_metadata.drop(unnecessary_columns,axis=1,inplace=True)

In [21]:
print(train_metadata.shape)

(7848, 151)


In [22]:
train_metadata.head()

Unnamed: 0,object_id,A0_min_flux,A0_max_flux,A0_std_flux,A0_mean_flux,A0_median_flux,A1_min_flux,A1_max_flux,A1_std_flux,A1_mean_flux,A1_median_flux,A2_min_flux,A2_max_flux,A2_std_flux,A2_mean_flux,A2_median_flux,A3_min_flux,A3_max_flux,A3_std_flux,A3_mean_flux,A3_median_flux,A4_min_flux,A4_max_flux,A4_std_flux,A4_mean_flux,A4_median_flux,A5_min_flux,A5_max_flux,A5_std_flux,A5_mean_flux,A5_median_flux,div_dif_A5_A4_max_min_flux_dif_A3_A2_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A3_A1_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A3_A0_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A2_A1_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A2_A0_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A1_A0_max_min_flux,div_dif_A4_A3_max_min_flux_dif_A2_A1_max_min_flux,div_dif_A4_A3_max_min_flux_dif_A2_A0_max_min_flux,div_dif_A4_A3_max_min_flux_dif_A1_A0_max_min_flux,div_dif_A3_A2_max_min_flux_dif_A1_A0_max_min_flux,div_dif_A5_A4_max_mean_flux_dif_A3_A2_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A3_A1_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A3_A0_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A2_A1_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A2_A0_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A1_A0_max_mean_flux,div_dif_A4_A3_max_mean_flux_dif_A2_A1_max_mean_flux,div_dif_A4_A3_max_mean_flux_dif_A2_A0_max_mean_flux,div_dif_A4_A3_max_mean_flux_dif_A1_A0_max_mean_flux,div_dif_A3_A2_max_mean_flux_dif_A1_A0_max_mean_flux,div_dif_A5_A4_max_median_flux_dif_A3_A2_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A3_A1_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A3_A0_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A2_A1_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A2_A0_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A1_A0_max_median_flux,div_dif_A4_A3_max_median_flux_dif_A2_A1_max_median_flux,div_dif_A4_A3_max_median_flux_dif_A2_A0_max_median_flux,div_dif_A4_A3_max_median_flux_dif_A1_A0_max_median_flux,div_dif_A3_A2_max_median_flux_dif_A1_A0_max_median_flux,div_dif_A5_A4_median_mean_flux_dif_A3_A2_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A3_A1_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A3_A0_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A2_A1_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A2_A0_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A1_A0_median_mean_flux,div_dif_A4_A3_median_mean_flux_dif_A2_A1_median_mean_flux,div_dif_A4_A3_median_mean_flux_dif_A2_A0_median_mean_flux,div_dif_A4_A3_median_mean_flux_dif_A1_A0_median_mean_flux,div_dif_A3_A2_median_mean_flux_dif_A1_A0_median_mean_flux,div_dif_A5_A4_median_min_flux_dif_A3_A2_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A3_A1_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A3_A0_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A2_A1_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A2_A0_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A1_A0_median_min_flux,div_dif_A4_A3_median_min_flux_dif_A2_A1_median_min_flux,div_dif_A4_A3_median_min_flux_dif_A2_A0_median_min_flux,div_dif_A4_A3_median_min_flux_dif_A1_A0_median_min_flux,div_dif_A3_A2_median_min_flux_dif_A1_A0_median_min_flux,div_dif_A5_A4_mean_min_flux_dif_A3_A2_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A3_A1_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A3_A0_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A2_A1_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A2_A0_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A1_A0_mean_min_flux,div_dif_A4_A3_mean_min_flux_dif_A2_A1_mean_min_flux,div_dif_A4_A3_mean_min_flux_dif_A2_A0_mean_min_flux,div_dif_A4_A3_mean_min_flux_dif_A1_A0_mean_min_flux,div_dif_A3_A2_mean_min_flux_dif_A1_A0_mean_min_flux,div_dif_A5_A4_plus_3_sigma_dif_A3_A2_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A3_A1_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A3_A0_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A2_A1_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A2_A0_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A1_A0_plus_3_sigma,div_dif_A4_A3_plus_3_sigma_dif_A2_A1_plus_3_sigma,div_dif_A4_A3_plus_3_sigma_dif_A2_A0_plus_3_sigma,div_dif_A4_A3_plus_3_sigma_dif_A1_A0_plus_3_sigma,div_dif_A3_A2_plus_3_sigma_dif_A1_A0_plus_3_sigma,div_dif_A5_A4_plus_2_sigma_dif_A3_A2_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A3_A1_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A3_A0_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A2_A1_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A2_A0_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A1_A0_plus_2_sigma,div_dif_A4_A3_plus_2_sigma_dif_A2_A1_plus_2_sigma,div_dif_A4_A3_plus_2_sigma_dif_A2_A0_plus_2_sigma,div_dif_A4_A3_plus_2_sigma_dif_A1_A0_plus_2_sigma,div_dif_A3_A2_plus_2_sigma_dif_A1_A0_plus_2_sigma,div_dif_A5_A4_plus_1_sigma_dif_A3_A2_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A3_A1_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A3_A0_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A2_A1_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A2_A0_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A1_A0_plus_1_sigma,div_dif_A4_A3_plus_1_sigma_dif_A2_A1_plus_1_sigma,div_dif_A4_A3_plus_1_sigma_dif_A2_A0_plus_1_sigma,div_dif_A4_A3_plus_1_sigma_dif_A1_A0_plus_1_sigma,div_dif_A3_A2_plus_1_sigma_dif_A1_A0_plus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A3_A2_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A3_A1_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A3_A0_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A2_A1_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A2_A0_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A1_A0_minus_1_sigma,div_dif_A4_A3_minus_1_sigma_dif_A2_A1_minus_1_sigma,div_dif_A4_A3_minus_1_sigma_dif_A2_A0_minus_1_sigma,div_dif_A4_A3_minus_1_sigma_dif_A1_A0_minus_1_sigma,div_dif_A3_A2_minus_1_sigma_dif_A1_A0_minus_1_sigma,div_dif_A5_A4_minus_2_sigma_dif_A3_A2_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A3_A1_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A3_A0_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A2_A1_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A2_A0_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A1_A0_minus_2_sigma,div_dif_A4_A3_minus_2_sigma_dif_A2_A1_minus_2_sigma,div_dif_A4_A3_minus_2_sigma_dif_A2_A0_minus_2_sigma,div_dif_A4_A3_minus_2_sigma_dif_A1_A0_minus_2_sigma,div_dif_A3_A2_minus_2_sigma_dif_A1_A0_minus_2_sigma,div_dif_A5_A4_minus_3_sigma_dif_A3_A2_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A3_A1_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A3_A0_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A2_A1_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A2_A0_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A1_A0_minus_3_sigma,div_dif_A4_A3_minus_3_sigma_dif_A2_A1_minus_3_sigma,div_dif_A4_A3_minus_3_sigma_dif_A2_A0_minus_3_sigma,div_dif_A4_A3_minus_3_sigma_dif_A1_A0_minus_3_sigma,div_dif_A3_A2_minus_3_sigma_dif_A1_A0_minus_3_sigma
0,615,-116.91322,125.18281,83.94473,-3.254554,-10.015225,-1100.0,660.62634,601.7873,-385.8,-488.0,-682.0,611.98456,455.12134,-134.14656,-265.8,-530.5,445.73706,335.42505,-121.1035,-162.1,-422.1845,381.95374,291.80344,-55.954594,-103.54137,-422.8151,378.18814,294.7795,-47.44985,-85.52431,0.009866,0.003997,-0.00427,0.006718,-0.00298,-0.002064,0.368803,-0.163609,-0.113332,-0.209247,0.068438,0.025585,-0.027989,0.040861,-0.019865,-0.013367,0.429352,-0.208732,-0.140451,-0.195308,0.080692,0.040279,-0.046087,0.080426,-0.029333,-0.021494,0.45171,-0.164751,-0.120721,-0.266371,0.104927,0.155421,-0.277847,-0.322961,-0.076164,-0.099669,0.223752,0.052767,0.069052,-0.949891,-0.390118,-0.07655,0.07131,-0.095238,0.060289,0.036919,0.254121,-0.160868,-0.098509,-0.094634,-0.06598,-0.029971,0.03089,-0.054918,0.02104,0.015212,0.259498,-0.099418,-0.071879,-0.230554,-0.050378,-0.032622,0.027385,-0.092559,0.017741,0.014887,0.348914,-0.066877,-0.05612,-0.295518,-0.06387,-0.053938,0.037539,-0.346866,0.023643,0.022134,0.530113,-0.036134,-0.033828,-0.346556,-0.107646,-6.892273,0.085914,0.109354,0.04778,0.084856,0.205046,0.089591,0.159111,-0.788289,0.041651,0.010411,-0.01497,0.01388,-0.011012,-0.00614,0.273074,-0.216645,-0.120804,-0.147425,0.010112,0.003201,-0.004112,0.004684,-0.002923,-0.0018,0.279626,-0.174512,-0.107452,-0.177993,-0.001138,-0.000398,0.000485,-0.000612,0.00034,0.000219,0.2834,-0.157514,-0.101243,-0.19221
1,713,-14.735178,14.509829,7.113509,-2.720398,-3.096805,-11.72,9.129021,5.712334,-1.02,-0.5615,-10.07,10.529041,5.770738,-0.794238,-0.118,-12.4,11.330316,6.450413,-0.986966,-0.0739,-12.286801,9.827934,6.406989,-0.900261,-0.792176,-14.211164,14.770886,7.094073,-1.794175,-2.463012,2.193137,2.383413,-1.245276,-27.471458,-0.79428,-0.817928,6.462841,0.18686,0.192423,-0.372949,5.872078,2.691956,-1.188058,4.970684,-0.988136,-0.824276,-1.353269,0.26902,0.224409,-0.140372,8.734821,3.859373,-1.066324,6.914427,-0.950313,-0.835484,-0.819749,0.112666,0.099052,-0.09565,-3.280532,-1.709152,-0.602512,-3.568155,-0.738068,-0.930551,-3.697014,-0.764722,-0.964156,0.283658,0.106789,0.217135,0.368645,-0.210134,-0.150339,-0.528321,0.689162,0.493055,1.732696,-4.947347,0.482133,1.445163,-1.712432,-0.72351,-0.376211,-0.783743,0.018602,0.009673,0.020151,-1.625574,0.632259,0.519447,-4.562479,2.911267,-0.555306,-0.466352,-0.108654,0.020725,0.017405,-0.737597,0.411663,0.31822,1.179293,1.401921,-0.632428,-0.435822,-0.000418,0.000189,0.00013,-1.058687,-0.424747,-0.268222,-0.193238,-0.727847,-0.354531,-0.691221,0.152308,0.074189,0.144644,1.627372,1.812231,2.242405,-0.659703,-9.446775,-0.483643,-0.50974,0.777546,0.039808,0.041956,-0.281278,1.461318,1.571646,-0.741294,-20.816716,-0.49181,-0.50371,1.592888,0.037633,0.038544,-0.344696,1.324144,1.354832,-0.793819,-58.458852,-0.496293,-0.500543,4.292219,0.036439,0.036751,-0.378012
2,730,-3.45996,5.942166,1.828872,-0.04808,0.024093,-3.393,5.693109,1.807229,0.1411,0.1714,-2.85,20.99471,5.559483,2.40087,0.4917,-5.438,33.5721,8.191987,3.236164,0.6606,-5.83631,41.15998,10.710344,4.308728,1.004354,-19.159811,47.31006,13.332758,4.539396,2.542647,1.28408,0.650768,0.657714,1.319473,1.348345,-61.621941,0.541121,0.552961,-25.271394,-47.989159,0.504119,0.238841,0.24314,0.453879,0.469661,-13.507318,0.499571,0.516941,-14.867092,-26.793915,0.371664,0.168376,0.170849,0.307836,0.316202,-11.635232,0.483545,0.496685,-18.276448,-31.305795,-1.96224,-0.501801,-0.493865,-0.674218,-0.659969,-31.228048,0.375778,0.367836,17.405079,15.914487,5.390763,5.864491,5.684271,-66.734594,-104.400989,184.970117,-3.332124,-5.212844,9.23574,34.31242,3.959394,2.636965,2.57572,7.895157,7.37044,110.899386,0.856768,0.799827,12.034602,28.009182,0.927298,0.363962,0.361941,0.599111,0.593654,65.174371,0.638302,0.632488,69.437755,70.2842,0.897578,0.34514,0.341995,0.560768,0.552513,37.530796,0.625676,0.616465,41.874941,41.813415,0.822736,0.300964,0.295737,0.474563,0.461697,17.029572,0.59729,0.581096,21.433611,20.698709,1.33081,0.727042,0.776826,1.602527,1.866131,-11.344757,0.968716,1.128062,-6.857822,-8.524697,1.131938,0.518289,0.531049,0.956036,1.000377,-21.569303,0.755834,0.790889,-17.052497,-19.055204,1.081328,0.475526,0.483171,0.848792,0.873462,-30.052129,0.720519,0.741461,-25.510541,-27.791872
3,745,-3.874349,18.014029,4.374445,1.797523,1.056714,-3.62,192.2443,25.964659,5.72,0.888,-2.16,220.79521,31.957998,9.711532,0.4243,-4.945,203.2507,34.967697,14.412925,1.361,-15.494463,183.63312,33.069054,13.134436,1.27015,-10.249387,141.51329,26.06013,10.746138,2.749555,3.209111,-3.841,-0.25423,-1.748369,-0.235568,-0.27225,-0.334729,-0.0451,-0.052123,-0.084837,1.786016,-17.173962,-0.230166,-1.617774,-0.20389,-0.233293,-0.746725,-0.094111,-0.107682,-0.130622,2.359111,-4.139142,-0.235758,-1.502665,-0.214338,-0.249997,-0.672996,-0.095995,-0.111966,-0.105971,-1.027362,-0.470528,-0.314163,-0.868126,-0.452552,-0.945373,-0.266572,-0.138963,-0.290292,0.920195,-1.011815,-2.094367,-2.738796,1.957515,1.604624,8.900971,-5.436718,-4.456613,-24.721173,-8.797035,-1.019633,-0.761972,-0.557748,-3.015318,-1.231257,-2.081,3.662199,1.4954,2.52744,2.04093,-1.705334,-0.655847,-0.224293,-1.0657,-0.25826,-0.340865,-0.31743,-0.076925,-0.10153,0.199882,-1.530311,-0.614485,-0.2223,-1.026782,-0.26008,-0.348304,-0.317669,-0.080464,-0.107759,0.227604,-1.218663,-0.531038,-0.217485,-0.941146,-0.264729,-0.368335,-0.318195,-0.089503,-0.124531,0.302245,2.73136,-14.899814,-0.257018,-2.308228,-0.234913,-0.261529,-0.309797,-0.031529,-0.035101,-0.09575,-8.823601,-1.248723,-0.239434,-1.454576,-0.246112,-0.296234,-0.315041,-0.053304,-0.06416,0.033573,-4.306781,-1.017596,-0.23544,-1.332415,-0.249055,-0.306311,-0.315791,-0.059028,-0.072598,0.071123
4,1124,-6.804703,5.330927,2.360085,0.660948,0.581027,-2.623,37.170177,8.107525,4.633,1.154,-2.084,106.67169,21.319853,10.243968,0.889,-2.8,139.8184,26.270649,11.086555,1.014,-16.543753,143.60019,26.865913,9.906102,1.745012,-10.86054,109.157585,21.434628,6.896741,1.973272,-1.184956,-0.390233,-0.307518,-0.58185,-0.415295,-1.450809,0.254131,0.181386,0.633662,1.224357,-0.973041,-0.326767,-0.253367,-0.491986,-0.342568,-1.127966,0.077668,0.05408,0.178068,1.159217,-1.049942,-0.337304,-0.258633,-0.496956,-0.343164,-1.10889,0.043728,0.030196,0.097574,1.056145,-4.511816,-0.491028,-0.324001,-0.550994,-0.349068,-0.952499,-0.325302,-0.206087,-0.562348,0.211112,-6.48627,-147.431162,1.527258,6.784767,1.236186,1.511599,-18.003439,-3.280229,-4.011041,-0.233046,-5.577214,-1.310987,-1.353793,-1.713846,-1.787743,41.462133,2.477007,2.583809,-59.924856,-7.434201,-1.229898,-0.316743,-0.234954,-0.42661,-0.290438,-0.909912,0.013378,0.009108,0.028534,0.739827,-1.291111,-0.324264,-0.238158,-0.433016,-0.292025,-0.896877,0.000314,0.000212,0.000651,0.694655,-1.456946,-0.342883,-0.245824,-0.448415,-0.295719,-0.868425,-0.031089,-0.020502,-0.060208,0.596058,-0.589533,-0.206833,-0.179602,-0.318617,-0.25829,-1.364166,0.233605,0.189374,1.000185,2.313978,-0.866895,-0.262889,-0.210004,-0.37731,-0.277141,-1.043917,0.113915,0.083672,0.315171,1.204202,-0.948229,-0.276554,-0.216691,-0.390422,-0.280878,-1.001072,0.087176,0.062716,0.223526,1.055728


In [23]:
print(train_metadata_kaggle.shape)

(7848, 139)


In [24]:
test_id = test_metadata_kaggle['object_id']

In [25]:
def multi_weighted_logloss(y_true, y_preds):
    """
    @author olivier https://www.kaggle.com/ogrellier
    multi logloss for PLAsTiCC challenge
    """
    # class_weights taken from Giba's topic : https://www.kaggle.com/titericz
    # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
    # with Kyle Boone's post https://www.kaggle.com/kyleboone
    classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]
    class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}
    if len(np.unique(y_true)) > 14:
        classes.append(99)
        class_weight[99] = 2
    y_p = y_preds
    # Trasform y_true in dummies
    y_ohe = pd.get_dummies(y_true)
    # Normalize rows and limit y_preds to 1e-15, 1-1e-15
    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)
    # Transform to log
    y_p_log = np.log(y_p)
    # Get the log for ones, .values is used to drop the index of DataFrames
    # Exclude class 99 for now, since there is no class99 in the training set
    # we gave a special process for that class
    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)
    # Get the number of positives for each class
    nb_pos = y_ohe.sum(axis=0).values.astype(float)
    # Weight average and divide by the number of positives
    class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])
    y_w = y_log_ones * class_arr / nb_pos

    loss = - np.sum(y_w) / np.sum(class_arr)
    return loss


def lgb_multi_weighted_logloss(y_true, y_preds):
    """
    @author olivier https://www.kaggle.com/ogrellier
    multi logloss for PLAsTiCC challenge
    """
    # class_weights taken from Giba's topic : https://www.kaggle.com/titericz
    # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
    # with Kyle Boone's post https://www.kaggle.com/kyleboone
    classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]
    class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}
    if len(np.unique(y_true)) > 14:
        classes.append(99)
        class_weight[99] = 2
    y_p = y_preds.reshape(y_true.shape[0], len(classes), order='F')

    # Trasform y_true in dummies
    y_ohe = pd.get_dummies(y_true)
    # Normalize rows and limit y_preds to 1e-15, 1-1e-15
    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)
    # Transform to log
    y_p_log = np.log(y_p)
    # Get the log for ones, .values is used to drop the index of DataFrames
    # Exclude class 99 for now, since there is no class99 in the training set
    # we gave a special process for that class
    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)
    # Get the number of positives for each class
    nb_pos = y_ohe.sum(axis=0).values.astype(float)
    # Weight average and divide by the number of positives
    class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])
    y_w = y_log_ones * class_arr / nb_pos

    loss = - np.sum(y_w) / np.sum(class_arr)
    return 'wloss', loss, False

In [26]:
train_metadata[list(train_metadata.columns)[-120:]].head()

Unnamed: 0,div_dif_A5_A4_max_min_flux_dif_A3_A2_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A3_A1_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A3_A0_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A2_A1_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A2_A0_max_min_flux,div_dif_A5_A4_max_min_flux_dif_A1_A0_max_min_flux,div_dif_A4_A3_max_min_flux_dif_A2_A1_max_min_flux,div_dif_A4_A3_max_min_flux_dif_A2_A0_max_min_flux,div_dif_A4_A3_max_min_flux_dif_A1_A0_max_min_flux,div_dif_A3_A2_max_min_flux_dif_A1_A0_max_min_flux,div_dif_A5_A4_max_mean_flux_dif_A3_A2_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A3_A1_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A3_A0_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A2_A1_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A2_A0_max_mean_flux,div_dif_A5_A4_max_mean_flux_dif_A1_A0_max_mean_flux,div_dif_A4_A3_max_mean_flux_dif_A2_A1_max_mean_flux,div_dif_A4_A3_max_mean_flux_dif_A2_A0_max_mean_flux,div_dif_A4_A3_max_mean_flux_dif_A1_A0_max_mean_flux,div_dif_A3_A2_max_mean_flux_dif_A1_A0_max_mean_flux,div_dif_A5_A4_max_median_flux_dif_A3_A2_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A3_A1_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A3_A0_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A2_A1_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A2_A0_max_median_flux,div_dif_A5_A4_max_median_flux_dif_A1_A0_max_median_flux,div_dif_A4_A3_max_median_flux_dif_A2_A1_max_median_flux,div_dif_A4_A3_max_median_flux_dif_A2_A0_max_median_flux,div_dif_A4_A3_max_median_flux_dif_A1_A0_max_median_flux,div_dif_A3_A2_max_median_flux_dif_A1_A0_max_median_flux,div_dif_A5_A4_median_mean_flux_dif_A3_A2_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A3_A1_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A3_A0_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A2_A1_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A2_A0_median_mean_flux,div_dif_A5_A4_median_mean_flux_dif_A1_A0_median_mean_flux,div_dif_A4_A3_median_mean_flux_dif_A2_A1_median_mean_flux,div_dif_A4_A3_median_mean_flux_dif_A2_A0_median_mean_flux,div_dif_A4_A3_median_mean_flux_dif_A1_A0_median_mean_flux,div_dif_A3_A2_median_mean_flux_dif_A1_A0_median_mean_flux,div_dif_A5_A4_median_min_flux_dif_A3_A2_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A3_A1_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A3_A0_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A2_A1_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A2_A0_median_min_flux,div_dif_A5_A4_median_min_flux_dif_A1_A0_median_min_flux,div_dif_A4_A3_median_min_flux_dif_A2_A1_median_min_flux,div_dif_A4_A3_median_min_flux_dif_A2_A0_median_min_flux,div_dif_A4_A3_median_min_flux_dif_A1_A0_median_min_flux,div_dif_A3_A2_median_min_flux_dif_A1_A0_median_min_flux,div_dif_A5_A4_mean_min_flux_dif_A3_A2_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A3_A1_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A3_A0_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A2_A1_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A2_A0_mean_min_flux,div_dif_A5_A4_mean_min_flux_dif_A1_A0_mean_min_flux,div_dif_A4_A3_mean_min_flux_dif_A2_A1_mean_min_flux,div_dif_A4_A3_mean_min_flux_dif_A2_A0_mean_min_flux,div_dif_A4_A3_mean_min_flux_dif_A1_A0_mean_min_flux,div_dif_A3_A2_mean_min_flux_dif_A1_A0_mean_min_flux,div_dif_A5_A4_plus_3_sigma_dif_A3_A2_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A3_A1_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A3_A0_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A2_A1_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A2_A0_plus_3_sigma,div_dif_A5_A4_plus_3_sigma_dif_A1_A0_plus_3_sigma,div_dif_A4_A3_plus_3_sigma_dif_A2_A1_plus_3_sigma,div_dif_A4_A3_plus_3_sigma_dif_A2_A0_plus_3_sigma,div_dif_A4_A3_plus_3_sigma_dif_A1_A0_plus_3_sigma,div_dif_A3_A2_plus_3_sigma_dif_A1_A0_plus_3_sigma,div_dif_A5_A4_plus_2_sigma_dif_A3_A2_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A3_A1_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A3_A0_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A2_A1_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A2_A0_plus_2_sigma,div_dif_A5_A4_plus_2_sigma_dif_A1_A0_plus_2_sigma,div_dif_A4_A3_plus_2_sigma_dif_A2_A1_plus_2_sigma,div_dif_A4_A3_plus_2_sigma_dif_A2_A0_plus_2_sigma,div_dif_A4_A3_plus_2_sigma_dif_A1_A0_plus_2_sigma,div_dif_A3_A2_plus_2_sigma_dif_A1_A0_plus_2_sigma,div_dif_A5_A4_plus_1_sigma_dif_A3_A2_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A3_A1_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A3_A0_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A2_A1_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A2_A0_plus_1_sigma,div_dif_A5_A4_plus_1_sigma_dif_A1_A0_plus_1_sigma,div_dif_A4_A3_plus_1_sigma_dif_A2_A1_plus_1_sigma,div_dif_A4_A3_plus_1_sigma_dif_A2_A0_plus_1_sigma,div_dif_A4_A3_plus_1_sigma_dif_A1_A0_plus_1_sigma,div_dif_A3_A2_plus_1_sigma_dif_A1_A0_plus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A3_A2_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A3_A1_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A3_A0_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A2_A1_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A2_A0_minus_1_sigma,div_dif_A5_A4_minus_1_sigma_dif_A1_A0_minus_1_sigma,div_dif_A4_A3_minus_1_sigma_dif_A2_A1_minus_1_sigma,div_dif_A4_A3_minus_1_sigma_dif_A2_A0_minus_1_sigma,div_dif_A4_A3_minus_1_sigma_dif_A1_A0_minus_1_sigma,div_dif_A3_A2_minus_1_sigma_dif_A1_A0_minus_1_sigma,div_dif_A5_A4_minus_2_sigma_dif_A3_A2_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A3_A1_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A3_A0_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A2_A1_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A2_A0_minus_2_sigma,div_dif_A5_A4_minus_2_sigma_dif_A1_A0_minus_2_sigma,div_dif_A4_A3_minus_2_sigma_dif_A2_A1_minus_2_sigma,div_dif_A4_A3_minus_2_sigma_dif_A2_A0_minus_2_sigma,div_dif_A4_A3_minus_2_sigma_dif_A1_A0_minus_2_sigma,div_dif_A3_A2_minus_2_sigma_dif_A1_A0_minus_2_sigma,div_dif_A5_A4_minus_3_sigma_dif_A3_A2_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A3_A1_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A3_A0_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A2_A1_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A2_A0_minus_3_sigma,div_dif_A5_A4_minus_3_sigma_dif_A1_A0_minus_3_sigma,div_dif_A4_A3_minus_3_sigma_dif_A2_A1_minus_3_sigma,div_dif_A4_A3_minus_3_sigma_dif_A2_A0_minus_3_sigma,div_dif_A4_A3_minus_3_sigma_dif_A1_A0_minus_3_sigma,div_dif_A3_A2_minus_3_sigma_dif_A1_A0_minus_3_sigma
0,0.009866,0.003997,-0.00427,0.006718,-0.00298,-0.002064,0.368803,-0.163609,-0.113332,-0.209247,0.068438,0.025585,-0.027989,0.040861,-0.019865,-0.013367,0.429352,-0.208732,-0.140451,-0.195308,0.080692,0.040279,-0.046087,0.080426,-0.029333,-0.021494,0.45171,-0.164751,-0.120721,-0.266371,0.104927,0.155421,-0.277847,-0.322961,-0.076164,-0.099669,0.223752,0.052767,0.069052,-0.949891,-0.390118,-0.07655,0.07131,-0.095238,0.060289,0.036919,0.254121,-0.160868,-0.098509,-0.094634,-0.06598,-0.029971,0.03089,-0.054918,0.02104,0.015212,0.259498,-0.099418,-0.071879,-0.230554,-0.050378,-0.032622,0.027385,-0.092559,0.017741,0.014887,0.348914,-0.066877,-0.05612,-0.295518,-0.06387,-0.053938,0.037539,-0.346866,0.023643,0.022134,0.530113,-0.036134,-0.033828,-0.346556,-0.107646,-6.892273,0.085914,0.109354,0.04778,0.084856,0.205046,0.089591,0.159111,-0.788289,0.041651,0.010411,-0.01497,0.01388,-0.011012,-0.00614,0.273074,-0.216645,-0.120804,-0.147425,0.010112,0.003201,-0.004112,0.004684,-0.002923,-0.0018,0.279626,-0.174512,-0.107452,-0.177993,-0.001138,-0.000398,0.000485,-0.000612,0.00034,0.000219,0.2834,-0.157514,-0.101243,-0.19221
1,2.193137,2.383413,-1.245276,-27.471458,-0.79428,-0.817928,6.462841,0.18686,0.192423,-0.372949,5.872078,2.691956,-1.188058,4.970684,-0.988136,-0.824276,-1.353269,0.26902,0.224409,-0.140372,8.734821,3.859373,-1.066324,6.914427,-0.950313,-0.835484,-0.819749,0.112666,0.099052,-0.09565,-3.280532,-1.709152,-0.602512,-3.568155,-0.738068,-0.930551,-3.697014,-0.764722,-0.964156,0.283658,0.106789,0.217135,0.368645,-0.210134,-0.150339,-0.528321,0.689162,0.493055,1.732696,-4.947347,0.482133,1.445163,-1.712432,-0.72351,-0.376211,-0.783743,0.018602,0.009673,0.020151,-1.625574,0.632259,0.519447,-4.562479,2.911267,-0.555306,-0.466352,-0.108654,0.020725,0.017405,-0.737597,0.411663,0.31822,1.179293,1.401921,-0.632428,-0.435822,-0.000418,0.000189,0.00013,-1.058687,-0.424747,-0.268222,-0.193238,-0.727847,-0.354531,-0.691221,0.152308,0.074189,0.144644,1.627372,1.812231,2.242405,-0.659703,-9.446775,-0.483643,-0.50974,0.777546,0.039808,0.041956,-0.281278,1.461318,1.571646,-0.741294,-20.816716,-0.49181,-0.50371,1.592888,0.037633,0.038544,-0.344696,1.324144,1.354832,-0.793819,-58.458852,-0.496293,-0.500543,4.292219,0.036439,0.036751,-0.378012
2,1.28408,0.650768,0.657714,1.319473,1.348345,-61.621941,0.541121,0.552961,-25.271394,-47.989159,0.504119,0.238841,0.24314,0.453879,0.469661,-13.507318,0.499571,0.516941,-14.867092,-26.793915,0.371664,0.168376,0.170849,0.307836,0.316202,-11.635232,0.483545,0.496685,-18.276448,-31.305795,-1.96224,-0.501801,-0.493865,-0.674218,-0.659969,-31.228048,0.375778,0.367836,17.405079,15.914487,5.390763,5.864491,5.684271,-66.734594,-104.400989,184.970117,-3.332124,-5.212844,9.23574,34.31242,3.959394,2.636965,2.57572,7.895157,7.37044,110.899386,0.856768,0.799827,12.034602,28.009182,0.927298,0.363962,0.361941,0.599111,0.593654,65.174371,0.638302,0.632488,69.437755,70.2842,0.897578,0.34514,0.341995,0.560768,0.552513,37.530796,0.625676,0.616465,41.874941,41.813415,0.822736,0.300964,0.295737,0.474563,0.461697,17.029572,0.59729,0.581096,21.433611,20.698709,1.33081,0.727042,0.776826,1.602527,1.866131,-11.344757,0.968716,1.128062,-6.857822,-8.524697,1.131938,0.518289,0.531049,0.956036,1.000377,-21.569303,0.755834,0.790889,-17.052497,-19.055204,1.081328,0.475526,0.483171,0.848792,0.873462,-30.052129,0.720519,0.741461,-25.510541,-27.791872
3,3.209111,-3.841,-0.25423,-1.748369,-0.235568,-0.27225,-0.334729,-0.0451,-0.052123,-0.084837,1.786016,-17.173962,-0.230166,-1.617774,-0.20389,-0.233293,-0.746725,-0.094111,-0.107682,-0.130622,2.359111,-4.139142,-0.235758,-1.502665,-0.214338,-0.249997,-0.672996,-0.095995,-0.111966,-0.105971,-1.027362,-0.470528,-0.314163,-0.868126,-0.452552,-0.945373,-0.266572,-0.138963,-0.290292,0.920195,-1.011815,-2.094367,-2.738796,1.957515,1.604624,8.900971,-5.436718,-4.456613,-24.721173,-8.797035,-1.019633,-0.761972,-0.557748,-3.015318,-1.231257,-2.081,3.662199,1.4954,2.52744,2.04093,-1.705334,-0.655847,-0.224293,-1.0657,-0.25826,-0.340865,-0.31743,-0.076925,-0.10153,0.199882,-1.530311,-0.614485,-0.2223,-1.026782,-0.26008,-0.348304,-0.317669,-0.080464,-0.107759,0.227604,-1.218663,-0.531038,-0.217485,-0.941146,-0.264729,-0.368335,-0.318195,-0.089503,-0.124531,0.302245,2.73136,-14.899814,-0.257018,-2.308228,-0.234913,-0.261529,-0.309797,-0.031529,-0.035101,-0.09575,-8.823601,-1.248723,-0.239434,-1.454576,-0.246112,-0.296234,-0.315041,-0.053304,-0.06416,0.033573,-4.306781,-1.017596,-0.23544,-1.332415,-0.249055,-0.306311,-0.315791,-0.059028,-0.072598,0.071123
4,-1.184956,-0.390233,-0.307518,-0.58185,-0.415295,-1.450809,0.254131,0.181386,0.633662,1.224357,-0.973041,-0.326767,-0.253367,-0.491986,-0.342568,-1.127966,0.077668,0.05408,0.178068,1.159217,-1.049942,-0.337304,-0.258633,-0.496956,-0.343164,-1.10889,0.043728,0.030196,0.097574,1.056145,-4.511816,-0.491028,-0.324001,-0.550994,-0.349068,-0.952499,-0.325302,-0.206087,-0.562348,0.211112,-6.48627,-147.431162,1.527258,6.784767,1.236186,1.511599,-18.003439,-3.280229,-4.011041,-0.233046,-5.577214,-1.310987,-1.353793,-1.713846,-1.787743,41.462133,2.477007,2.583809,-59.924856,-7.434201,-1.229898,-0.316743,-0.234954,-0.42661,-0.290438,-0.909912,0.013378,0.009108,0.028534,0.739827,-1.291111,-0.324264,-0.238158,-0.433016,-0.292025,-0.896877,0.000314,0.000212,0.000651,0.694655,-1.456946,-0.342883,-0.245824,-0.448415,-0.295719,-0.868425,-0.031089,-0.020502,-0.060208,0.596058,-0.589533,-0.206833,-0.179602,-0.318617,-0.25829,-1.364166,0.233605,0.189374,1.000185,2.313978,-0.866895,-0.262889,-0.210004,-0.37731,-0.277141,-1.043917,0.113915,0.083672,0.315171,1.204202,-0.948229,-0.276554,-0.216691,-0.390422,-0.280878,-1.001072,0.087176,0.062716,0.223526,1.055728


In [27]:
#0.75 [0.5764, 0.54968, 0.59261, 0.56469, 0.5659]
"""
0 0.5407134604014688
1 0.5505294469507865
2 0.5930505215690117
3 0.5412398994685668
4 0.5650806630208185
MULTI WEIGHTED LOG LOSS : 0.55802
"""

'\n0 0.5407134604014688\n1 0.5505294469507865\n2 0.5930505215690117\n3 0.5412398994685668\n4 0.5650806630208185\nMULTI WEIGHTED LOG LOSS : 0.55802\n'

In [37]:
used_columns1 = ['div_dif_A5_A4_plus_1_sigma_dif_A2_A0_plus_1_sigma']

In [38]:
%%time
final_dict = {}

loss_list = []
temp = train_metadata_kaggle.copy()
temp = temp.merge(train_metadata[['object_id'] + used_columns1 ],on = 'object_id',how = 'left')
print(temp.shape)
#temp = temp.merge(train_metadata[['object_id',column_]],on = 'object_id',how = 'left')
y = temp['target']
del temp['target']
classes = sorted(y.unique())

# Taken from Giba's topic : https://www.kaggle.com/titericz
# https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
# with Kyle Boone's post https://www.kaggle.com/kyleboone
class_weight = {
    c: 1 for c in classes
}
for c in [64, 15]:
    class_weight[c] = 2

#print('Unique classes : ', classes)

train_id = temp['object_id']
del temp['object_id']
# Compute weights
w = y.value_counts()
weights = {i : np.sum(w) / w[i] for i in w.index}
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=51)
clfs = []
importances = pd.DataFrame()
lgb_params = {
'random_state':51,
'device': 'cpu', 
'objective': 'multiclass', 
'num_class': 14, 
'boosting_type': 'gbdt', 
'n_jobs': -1, 
'n_estimators': 4000, 
'subsample_freq': 6, 
'subsample_for_bin': 5000, 
'min_data_per_group': 100, 
'max_cat_to_onehot': 4, 
'cat_l2': 1.0, 
'cat_smooth': 59.5, 
'max_cat_threshold': 32, 
'metric_freq': 10, 
'verbosity': -1, 
'metric': 'multi_logloss', 
'xgboost_dart_mode': False, 
'uniform_drop': False, 
'colsample_bytree': 0.5, 
'drop_rate': 0.173, 
'learning_rate': 0.0267, 
'max_drop': 5, 
'min_child_samples': 40,
'min_child_weight': 150, 
'min_split_gain': 0.15, 
'num_leaves': 6, 
'reg_alpha': 0.1, 
'reg_lambda': 0.001, 
'skip_drop': 0.44, 
'subsample': 0.75}
oof_preds = np.zeros((len(temp), np.unique(y).shape[0]))
for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
    trn_x, trn_y = temp.iloc[trn_], y.iloc[trn_]
    val_x, val_y = temp.iloc[val_], y.iloc[val_]

    clf = lgb.LGBMClassifier(**lgb_params)
    clf.fit(
        trn_x, trn_y,
        eval_set=[(trn_x, trn_y), (val_x, val_y)],
        eval_metric=lgb_multi_weighted_logloss,
        verbose=False,
        early_stopping_rounds=50,
        sample_weight=trn_y.map(weights)
    )
    oof_preds[val_, :] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)
    loss_oof = multi_weighted_logloss(val_y, oof_preds[val_, :])
    #loss_list.append(loss_oof)
    print(fold_,loss_oof)

    imp_df = pd.DataFrame()
    imp_df['feature'] = temp.columns
    imp_df['gain'] = clf.feature_importances_
    imp_df['fold'] = fold_ + 1
    importances = pd.concat([importances, imp_df], axis=0, sort=False)

    clfs.append(clf)
print('MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))
#final_dict[column_] = loss_list

(7848, 140)
0 0.5786832237518116
1 0.5505031647693882
2 0.6027290480767754
3 0.5567627964106213
4 0.575456193510351
MULTI WEIGHTED LOG LOSS : 0.57283 
CPU times: user 12min 12s, sys: 2.13 s, total: 12min 14s
Wall time: 1min 41s


In [30]:
[x for x in list(imp_df.sort_values('gain',ascending=False)['feature']) if 'div_dif' in x]

['div_dif_A5_A4_plus_1_sigma_dif_A2_A0_plus_1_sigma',
 'div_dif_A5_A4_median_mean_flux_dif_A2_A0_median_mean_flux',
 'div_dif_A5_A4_minus_1_sigma_dif_A2_A1_minus_1_sigma',
 'div_dif_A3_A2_mean_min_flux_dif_A1_A0_mean_min_flux',
 'div_dif_A5_A4_minus_2_sigma_dif_A3_A0_minus_2_sigma',
 'div_dif_A5_A4_max_min_flux_dif_A3_A2_max_min_flux',
 'div_dif_A3_A2_median_mean_flux_dif_A1_A0_median_mean_flux',
 'div_dif_A5_A4_max_min_flux_dif_A1_A0_max_min_flux',
 'div_dif_A4_A3_max_min_flux_dif_A2_A1_max_min_flux',
 'div_dif_A5_A4_median_mean_flux_dif_A3_A2_median_mean_flux',
 'div_dif_A4_A3_max_median_flux_dif_A2_A1_max_median_flux',
 'div_dif_A4_A3_median_mean_flux_dif_A2_A0_median_mean_flux',
 'div_dif_A5_A4_max_mean_flux_dif_A3_A2_max_mean_flux',
 'div_dif_A5_A4_max_median_flux_dif_A1_A0_max_median_flux',
 'div_dif_A5_A4_minus_3_sigma_dif_A2_A0_minus_3_sigma',
 'div_dif_A5_A4_mean_min_flux_dif_A2_A0_mean_min_flux',
 'div_dif_A5_A4_max_median_flux_dif_A3_A2_max_median_flux',
 'div_dif_A4_A3_minu

In [None]:
"""%%time
final_dict = {}

for column_ in list(train_metadata.columns)[233:]:
    loss_list = []
    temp = train_metadata_kaggle.copy()
    temp = temp.merge(train_metadata[['object_id',column_]],on = 'object_id',how = 'left')
    y = temp['target']
    del temp['target']
    classes = sorted(y.unique())

    class_weight = {
        c: 1 for c in classes
    }
    for c in [64, 15]:
        class_weight[c] = 2

    train_id = temp['object_id']
    del temp['object_id']
    # Compute weights
    w = y.value_counts()
    weights = {i : np.sum(w) / w[i] for i in w.index}
    folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=51)
    clfs = []
    importances = pd.DataFrame()
    lgb_params = {
    'random_state':51,
    'device': 'cpu', 
    'objective': 'multiclass', 
    'num_class': 14, 
    'boosting_type': 'gbdt', 
    'n_jobs': -1, 
    'max_depth': 7, 
    'n_estimators': 1000, 
    'subsample_freq': 2, 
    'subsample_for_bin': 5000, 
    'min_data_per_group': 100, 
    'max_cat_to_onehot': 4, 
    'cat_l2': 1.0, 
    'cat_smooth': 59.5, 
    'max_cat_threshold': 32, 
    'metric_freq': 10, 
    'verbosity': -1, 
    'metric': 'multi_logloss', 
    'xgboost_dart_mode': False, 
    'uniform_drop': False, 
    'colsample_bytree': 0.5, 
    'drop_rate': 0.173, 
    'learning_rate': 0.0267, 
    'max_drop': 5, 
    'min_child_samples': 10,
    'min_child_weight': 200.0, 
    #'min_child_weight': 100.0, 
    'min_split_gain': 0.1, 
    'num_leaves': 7, 
    #'reg_alpha': 0.1,
    'reg_alpha': 0.0, 
    'reg_lambda': 0.00023, 
    'skip_drop': 0.44, 
    'subsample': 0.75}
    oof_preds = np.zeros((len(temp), np.unique(y).shape[0]))
    for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
        trn_x, trn_y = temp.iloc[trn_], y.iloc[trn_]
        val_x, val_y = temp.iloc[val_], y.iloc[val_]

        clf = lgb.LGBMClassifier(**lgb_params)
        clf.fit(
            trn_x, trn_y,
            eval_set=[(trn_x, trn_y), (val_x, val_y)],
            eval_metric=lgb_multi_weighted_logloss,
            verbose=False,
            early_stopping_rounds=50,
            sample_weight=trn_y.map(weights)
        )
        oof_preds[val_, :] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)
        loss_oof = multi_weighted_logloss(val_y, oof_preds[val_, :])
        loss_list.append(loss_oof)
        #print(fold_,loss_oof)

        imp_df = pd.DataFrame()
        imp_df['feature'] = temp.columns
        imp_df['gain'] = clf.feature_importances_
        imp_df['fold'] = fold_ + 1
        importances = pd.concat([importances, imp_df], axis=0, sort=False)

        clfs.append(clf)
    print(column_,'MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))
    final_dict[column_] = loss_list"""

In [None]:
"""
final_dict2 = pd.Series(final_dict)
final_dict2 = pd.DataFrame(final_dict2)
final_dict2['fold1'] = final_dict2[0].apply(lambda x: x[0])
final_dict2['fold2'] = final_dict2[0].apply(lambda x: x[1])
final_dict2['fold3'] = final_dict2[0].apply(lambda x: x[2])
final_dict2['fold4'] = final_dict2[0].apply(lambda x: x[3])
final_dict2['fold5'] = final_dict2[0].apply(lambda x: x[4])

del final_dict2[0]

final_dict2 = final_dict2.reset_index(drop=False)

final_dict2 = final_dict2.rename(columns={'index':'column_name'})

final_dict2['fold1_1'] = (final_dict2['fold1'] > 0.62756) * 1
final_dict2['fold2_1'] = (final_dict2['fold2'] > 0.59905) * 1
final_dict2['fold3_1'] = (final_dict2['fold3'] > 0.65624) * 1
final_dict2['fold4_1'] = (final_dict2['fold4'] > 0.60978) * 1
final_dict2['fold5_1'] = (final_dict2['fold5'] > 0.61012) * 1
final_dict2['fold_sum'] = final_dict2['fold1_1']+ final_dict2['fold2_1'] + final_dict2['fold3_1'] + final_dict2['fold4_1'] + final_dict2['fold5_1']
"""

In [None]:
#final_dict2[final_dict2['column_name'].isin(most_imp_ones)]
#final_dict2[final_dict2['fold_sum'] == 1]

In [None]:
#final_dict2.head()

In [None]:
#final_dict2.to_csv('final_features3.csv',index=False)

In [None]:
#modify to work with kfold
#def smoteAdataset(Xig, yig, test_size=0.2, random_state=0):
def smoteAdataset(Xig_train, yig_train, Xig_test, yig_test):
    
        
    sm=SMOTE(random_state=51)
    Xig_train_res, yig_train_res = sm.fit_sample(Xig_train, yig_train.ravel())

        
    return Xig_train_res, pd.Series(yig_train_res), Xig_test, pd.Series(yig_test)

In [None]:
%%time
final_dict = {}

loss_list = []
temp = train_metadata_kaggle.copy()

#temp = temp.merge(train_metadata[['object_id'] + most_imp_ones2 ],on = 'object_id',how = 'left')

temp = temp.merge(train_metadata[['object_id'] + used_columns1  ],on = 'object_id',how = 'left')


print(temp.shape)
temp.fillna(0, inplace=True)

y = temp['target']
del temp['target']
classes = sorted(y.unique())

# Taken from Giba's topic : https://www.kaggle.com/titericz
# https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
# with Kyle Boone's post https://www.kaggle.com/kyleboone
class_weight = {
    c: 1 for c in classes
}
for c in [64, 15]:
    class_weight[c] = 2

#print('Unique classes : ', classes)

train_id = temp['object_id']
del temp['object_id']
# Compute weights
w = y.value_counts()
weights = {i : np.sum(w) / w[i] for i in w.index}
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=51)
clfs = []
importances = pd.DataFrame()
lgb_params = {
'random_state':51,
'device': 'cpu', 
'objective': 'multiclass', 
'num_class': 14, 
'boosting_type': 'gbdt', 
'n_jobs': -1, 
'n_estimators': 4000, 
'subsample_freq': 6, 
'subsample_for_bin': 5000, 
'min_data_per_group': 100, 
'max_cat_to_onehot': 4, 
'cat_l2': 1.0, 
'cat_smooth': 59.5, 
'max_cat_threshold': 32, 
'metric_freq': 10, 
'verbosity': -1, 
'metric': 'multi_logloss', 
'xgboost_dart_mode': False, 
'uniform_drop': False, 
'colsample_bytree': 0.5, 
'drop_rate': 0.173, 
'learning_rate': 0.0267, 
'max_drop': 5, 
'min_child_samples': 40,
'min_child_weight': 150, 
'min_split_gain': 0.15, 
'num_leaves': 6, 
'reg_alpha': 0.1, 
'reg_lambda': 0.001, 
'skip_drop': 0.44, 
'subsample': 0.75}
oof_preds = np.zeros((len(temp), np.unique(y).shape[0]))
for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
    trn_x, trn_y = temp.iloc[trn_], y.iloc[trn_]
    val_x, val_y = temp.iloc[val_], y.iloc[val_]

    trn_xa, trn_y, val_xa, val_y=smoteAdataset(trn_x.values, trn_y.values, val_x.values, val_y.values)
    trn_x=pd.DataFrame(data=trn_xa, columns=trn_x.columns)
    val_x=pd.DataFrame(data=val_xa, columns=val_x.columns)
    
    print(trn_x.shape,trn_y.shape,val_x.shape,val_y.shape)
    
    clf = lgb.LGBMClassifier(**lgb_params)
    clf.fit(
        trn_x, trn_y,
        eval_set=[(trn_x, trn_y), (val_x, val_y)],
        eval_metric=lgb_multi_weighted_logloss,
        verbose=False,
        early_stopping_rounds=50,
        sample_weight=trn_y.map(weights)
    )
    oof_preds[val_, :] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)
    loss_oof = multi_weighted_logloss(val_y, oof_preds[val_, :])
    #loss_list.append(loss_oof)
    print(fold_,loss_oof)

    imp_df = pd.DataFrame()
    imp_df['feature'] = temp.columns
    imp_df['gain'] = clf.feature_importances_
    imp_df['fold'] = fold_ + 1
    importances = pd.concat([importances, imp_df], axis=0, sort=False)

    clfs.append(clf)
print('MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))
#final_dict[column_] = loss_list

In [None]:
#used_columns = used_columns1 + used_columns2 + used_columns3 

In [None]:
used_columns1

In [None]:
test_metadata['div_A4_A2_median_min_flux'] = test_metadata['A4_median_min_diff_flux'] / test_metadata['A2_median_min_diff_flux']
test_metadata['div_A5_A2_median_min_flux'] = test_metadata['A5_median_min_diff_flux'] / test_metadata['A2_median_min_diff_flux']
test_metadata['div_A5_A2_minus_1_sigma'] = test_metadata['A5_minus_1_sigma'] / test_metadata['A2_minus_1_sigma']
test_metadata['div_A5_A4_median_mean_flux'] = test_metadata['A5_median_mean_diff_flux'] / test_metadata['A4_median_mean_diff_flux']
test_metadata['div_A3_A0_plus_1_sigma'] = test_metadata['A3_plus_1_sigma'] / test_metadata['A0_plus_1_sigma']
test_metadata['div_A4_A1_minus_1_sigma'] = test_metadata['A4_minus_1_sigma'] / test_metadata['A1_minus_1_sigma']

In [None]:
print(train_metadata.shape,test_metadata.shape)

In [None]:
#train_metadata.drop([x for x in train_metadata.columns if x not in ['object_id'] + used_columns ] ,axis = 1,inplace=True)

In [None]:
#test_metadata = test_metadata[[x for x in test_metadata.columns if x in ['object_id'] + used_columns ]]

In [None]:
train_metadata_kaggle = train_metadata_kaggle.merge(train_metadata[['object_id'] + used_columns1 ],on = 'object_id',how = 'left')
test_metadata_kaggle = test_metadata_kaggle.merge(test_metadata[['object_id'] + used_columns1 ],on = 'object_id',how = 'left')

In [None]:
print(train_metadata_kaggle.shape,test_metadata_kaggle.shape)

In [None]:
train_metadata_kaggle.head()

In [None]:
test_metadata_kaggle.head()

In [None]:
%%time
train_metadata_kaggle.to_csv('mydata_train_metadata.csv',index=False)
test_metadata_kaggle.to_csv('mydata_test_metadata.csv',index=False)

In [None]:
imp_df.sort_values('gain',ascending=False).head()

In [None]:
temp_test = test_metadata_kaggle.copy()

In [None]:
del temp_test['object_id']

In [None]:
temp_test.fillna(0,inplace = True)

In [None]:
print(temp.shape,temp_test.shape)

In [None]:
list(temp.columns) == list(temp_test.columns)

In [None]:
%%time
test_pred0 = pd.DataFrame()
test_pred1 = pd.DataFrame()
test_pred2 = pd.DataFrame()
test_pred3 = pd.DataFrame()
test_pred4 = pd.DataFrame()

list_of_df = [test_pred0,test_pred1,test_pred2,test_pred3,test_pred4]

for num,c in enumerate(clfs):
    print(num)
    for k in range(0,len(temp_test),500000):
        test_pred = pd.DataFrame(c.predict_proba(temp_test[ k:k+500000] ))
        list_of_df[num] = pd.concat([list_of_df[num],test_pred],axis=0)
        del test_pred

In [None]:
test_pred2 = pd.DataFrame()
test_pred2 = (list_of_df[0] + list_of_df[1] + list_of_df[2] + list_of_df[3] + list_of_df[4])/5

In [None]:
print(test_pred2.shape)

In [None]:
#test_pred2 = pd.DataFrame(np.random.rand(10,14))

In [None]:
test_pred2.head()

In [None]:
temp_columns = ['object_id','class_6','class_15','class_16','class_42','class_52','class_53','class_62','class_64','class_65','class_67','class_88','class_90','class_92','class_95','class_99']

In [None]:
test_pred2.columns = temp_columns[1:15]

In [None]:
def getUnknown(data):
    return ((((((data["mymedian"]) + (((data["mymean"]) / 2.0)))/2.0)) + (((((1.0) - (((data["mymax"]) * (((data["mymax"]) * (data["mymax"]))))))) / 2.0)))/2.0)

feats = ['class_6', 'class_15', 'class_16', 'class_42', 'class_52', 'class_53',
         'class_62', 'class_64', 'class_65', 'class_67', 'class_88', 'class_90',
         'class_92', 'class_95']

In [None]:
klm = pd.DataFrame()
klm['mymean'] = test_pred2[feats].mean(axis=1)
klm['mymedian'] = test_pred2[feats].median(axis=1)
klm['mymax'] = test_pred2[feats].max(axis=1)

In [None]:
test_pred2['class_99'] = getUnknown(klm)

In [None]:
test_pred2.tail()

In [None]:
test_pred2 = test_pred2.reset_index(drop=True)

In [None]:
print(test_pred2.shape,test_id.shape)

In [None]:
test_id.tail()

In [None]:
test_id = test_id.reset_index(drop=True)

In [None]:
test_id.index == test_pred2.index

In [None]:
%%time
test_pred = pd.concat([test_id,test_pred2],axis=1)

In [None]:
test_pred = test_pred[temp_columns]

In [None]:
test_pred.head()

In [None]:
print(test_pred.shape)

In [None]:
%%time
test_pred.to_csv('test_pred_32.csv',index=False)

In [None]:
#!kaggle competitions submit -c PLAsTiCC-2018 -f test_pred_27.csv -m "Message"