In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import gc
import os
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import itertools
import pickle, gzip
import glob
from sklearn.preprocessing import StandardScaler
from tsfresh.feature_extraction import extract_features
np.warnings.filterwarnings('ignore')
import dask.dataframe as dd
import missingno as msno
from pandasql import sqldf
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import KFold
import matplotlib.gridspec as gridspec
from sklearn import preprocessing
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

In [2]:
#Always seed the randomness of this universe
np.random.seed(51)

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
%%time
train_metadata_kaggle = dd.read_csv('mydata_train_metadata.csv')
test_metadata_kaggle = dd.read_csv('mydata_test_metadata.csv')
train_metadata_kaggle = train_metadata_kaggle.compute()
test_metadata_kaggle = test_metadata_kaggle.compute()
print(train_metadata_kaggle.shape,test_metadata_kaggle.shape)

(7848, 133) (3492890, 132)
CPU times: user 3min 18s, sys: 11.7 s, total: 3min 30s
Wall time: 40.9 s


In [5]:
%%time
train_metadata = dd.read_csv('train_metadata_final.csv')
test_metadata = dd.read_csv('test_metadata_final.csv')
train_metadata = train_metadata.compute()
test_metadata = test_metadata.compute()
print(train_metadata.shape,test_metadata.shape)

(7848, 233) (3492890, 232)
CPU times: user 4min 2s, sys: 13.9 s, total: 4min 16s
Wall time: 51.8 s


In [6]:
%%time
test_metadata_kaggle = test_metadata_kaggle.reset_index(drop=True)
test_metadata = test_metadata.reset_index(drop=True)

CPU times: user 6.21 s, sys: 9.57 s, total: 15.8 s
Wall time: 15.9 s


In [7]:
print(train_metadata_kaggle['object_id'].equals(train_metadata['object_id']))

True


In [8]:
print(test_metadata_kaggle['object_id'].equals(test_metadata['object_id']))

True


In [9]:
temp_columns = ['object_id','A0_max_flux','A0_min_flux','A0_mean_flux','A0_median_flux','A0_std_flux',
'A1_max_flux','A1_min_flux','A1_mean_flux','A1_median_flux','A1_std_flux',
'A2_max_flux','A2_min_flux','A2_mean_flux','A2_median_flux','A2_std_flux',
'A3_max_flux','A3_min_flux','A3_mean_flux','A3_median_flux','A3_std_flux',
'A4_max_flux','A4_min_flux','A4_mean_flux','A4_median_flux','A4_std_flux',
'A5_max_flux','A5_min_flux','A5_mean_flux','A5_median_flux','A5_std_flux']

In [10]:
train_metadata.head()

Unnamed: 0,object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target,NG_min_flux_err,NG_max_flux_err,NG_std_flux_err,NG_sum_flux_err,NG_mean_flux_err,NG_median_flux_err,NG_min_flux,NG_max_flux,NG_std_flux,NG_sum_flux,NG_mean_flux,NG_median_flux,NG_count_detected,NG_std_detected,NG_sum_detected,NG_mean_detected,NG_median_detected,A0_min_flux_err,A0_max_flux_err,A0_std_flux_err,A0_sum_flux_err,A0_mean_flux_err,A0_median_flux_err,A0_min_flux,A0_max_flux,A0_std_flux,A0_sum_flux,A0_mean_flux,A0_median_flux,A0_count_detected,A0_std_detected,A0_sum_detected,A0_mean_detected,A0_median_detected,A1_min_flux_err,A1_max_flux_err,A1_std_flux_err,A1_sum_flux_err,A1_mean_flux_err,A1_median_flux_err,A1_min_flux,A1_max_flux,A1_std_flux,A1_sum_flux,A1_mean_flux,A1_median_flux,A1_count_detected,A1_std_detected,A1_sum_detected,A1_mean_detected,A1_median_detected,A2_min_flux_err,A2_max_flux_err,A2_std_flux_err,A2_sum_flux_err,A2_mean_flux_err,A2_median_flux_err,A2_min_flux,A2_max_flux,A2_std_flux,A2_sum_flux,A2_mean_flux,A2_median_flux,A2_count_detected,A2_std_detected,A2_sum_detected,A2_mean_detected,A2_median_detected,A3_min_flux_err,A3_max_flux_err,A3_std_flux_err,A3_sum_flux_err,A3_mean_flux_err,A3_median_flux_err,A3_min_flux,A3_max_flux,A3_std_flux,A3_sum_flux,A3_mean_flux,A3_median_flux,A3_count_detected,A3_std_detected,A3_sum_detected,A3_mean_detected,A3_median_detected,A4_min_flux_err,A4_max_flux_err,A4_std_flux_err,A4_sum_flux_err,A4_mean_flux_err,A4_median_flux_err,A4_min_flux,A4_max_flux,A4_std_flux,A4_sum_flux,A4_mean_flux,A4_median_flux,A4_count_detected,A4_std_detected,A4_sum_detected,A4_mean_detected,A4_median_detected,A5_min_flux_err,A5_max_flux_err,A5_std_flux_err,A5_sum_flux_err,A5_mean_flux_err,A5_median_flux_err,A5_min_flux,A5_max_flux,A5_std_flux,A5_sum_flux,A5_mean_flux,A5_median_flux,A5_count_detected,A5_std_detected,A5_sum_detected,A5_mean_detected,A5_median_detected,p0_region_minus_4,p0_region_minus_3,p0_region_minus_2,p0_region_minus_1,p0_region_plus_1,p0_region_plus_2,p0_region_plus_3,p0_region_plus_4,p1_region_minus_4,p1_region_minus_3,p1_region_minus_2,p1_region_minus_1,p1_region_plus_1,p1_region_plus_2,p1_region_plus_3,p1_region_plus_4,p2_region_minus_4,p2_region_minus_3,p2_region_minus_2,p2_region_minus_1,p2_region_plus_1,p2_region_plus_2,p2_region_plus_3,p2_region_plus_4,p3_region_minus_4,p3_region_minus_3,p3_region_minus_2,p3_region_minus_1,p3_region_plus_1,p3_region_plus_2,p3_region_plus_3,p3_region_plus_4,p4_region_minus_4,p4_region_minus_3,p4_region_minus_2,p4_region_minus_1,p4_region_plus_1,p4_region_plus_2,p4_region_plus_3,p4_region_plus_4,p5_region_minus_4,p5_region_minus_3,p5_region_minus_2,p5_region_minus_1,p5_region_plus_1,p5_region_plus_2,p5_region_plus_3,p5_region_plus_4,number_of_0s,number_of_1s,number_of_2s,number_of_3s,number_of_4s,number_of_5s,percent_p0_region_minus_4,percent_p0_region_minus_3,percent_p0_region_minus_2,percent_p0_region_minus_1,percent_p0_region_plus_1,percent_p0_region_plus_2,percent_p0_region_plus_3,percent_p0_region_plus_4,percent_p1_region_minus_4,percent_p1_region_minus_3,percent_p1_region_minus_2,percent_p1_region_minus_1,percent_p1_region_plus_1,percent_p1_region_plus_2,percent_p1_region_plus_3,percent_p1_region_plus_4,percent_p2_region_minus_4,percent_p2_region_minus_3,percent_p2_region_minus_2,percent_p2_region_minus_1,percent_p2_region_plus_1,percent_p2_region_plus_2,percent_p2_region_plus_3,percent_p2_region_plus_4,percent_p3_region_minus_4,percent_p3_region_minus_3,percent_p3_region_minus_2,percent_p3_region_minus_1,percent_p3_region_plus_1,percent_p3_region_plus_2,percent_p3_region_plus_3,percent_p3_region_plus_4,percent_p4_region_minus_4,percent_p4_region_minus_3,percent_p4_region_minus_2,percent_p4_region_minus_1,percent_p4_region_plus_1,percent_p4_region_plus_2,percent_p4_region_plus_3,percent_p4_region_plus_4,percent_p5_region_minus_4,percent_p5_region_minus_3,percent_p5_region_minus_2,percent_p5_region_minus_1,percent_p5_region_plus_1,percent_p5_region_plus_2,percent_p5_region_plus_3,percent_p5_region_plus_4
0,615,349.0,-61.94,320.8,-51.75,1,0.0,0.0,0.0,,0.017,92,2.13,12.845472,1.744747,1577.9254,4.482743,3.836,-1100.4401,660.62634,394.10986,-43330.145,-123.097,-89.5,352,0.2263,333.0,0.946,1.0,2.8442,4.737393,0.493621,240.87724,3.823448,3.86638,-116.91322,125.18281,83.94473,-205.03693,-3.254554,-10.015225,63,0.3528,54,0.857,1.0,3.035,6.953,1.168,291.6828,5.027,4.98,-1100.0,660.62634,601.7873,-22370.596,-385.8,-488.0,58,0.1841,56,0.9653,1.0,2.13,4.332,0.5796,196.0,3.38,3.389,-682.0,611.98456,455.12134,-7780.501,-134.14656,-265.8,58,0.1313,57,0.983,1.0,2.5,4.01,0.3474,193.72672,3.34,3.389,-530.5,445.73706,335.42505,-7024.003,-121.1035,-162.1,58,0.1313,57,0.983,1.0,2.9,11.4,1.089,220.32811,3.799,3.625,-422.1845,381.95374,291.80344,-3245.3665,-55.954594,-103.54137,58,0.1313,57,0.983,1.0,6.074,12.84,1.318,435.25317,7.637,7.31,-422.8151,378.18814,294.7795,-2704.6414,-47.44985,-85.52431,57,0.2854,52,0.912,1.0,,,16.0,17.0,13.0,17.0,,,,,11.0,22.0,13.0,12.0,,,,,12.0,21.0,10.0,15.0,,,,,14.0,17.0,13.0,14.0,,,,,14.0,18.0,12.0,14.0,,,,,17.0,13.0,12.0,15.0,,,63.0,58.0,58.0,58.0,58.0,57.0,,,0.254,0.2698,0.2063,0.2698,,,,,0.1897,0.3794,0.2241,0.2069,,,,,0.2069,0.362,0.1724,0.2585,,,,,0.2413,0.2932,0.2241,0.2413,,,,,0.2413,0.3103,0.2069,0.2413,,,,,0.2983,0.228,0.2106,0.2632,,
1,713,53.1,-27.78,223.5,-54.47,1,1.818,1.627,0.2551,45.4,0.007,88,0.6396,9.115748,1.509888,825.867,2.35962,1.998,-14.735178,14.770886,6.471144,-498.17276,-1.423351,-0.873,350,0.3774,60.0,0.1714,0.0,1.470152,3.348282,0.411563,163.19803,2.3314,2.273303,-14.735178,14.509829,7.113509,-190.42786,-2.720398,-3.096805,70,0.3525,10,0.1428,0.0,0.707,3.545,0.707,79.35102,1.417,1.115,-11.72,9.129021,5.712334,-57.109047,-1.02,-0.5615,56,0.4468,15,0.2678,0.0,0.6396,2.531,0.4397,66.8,1.193,1.0625,-10.07,10.529041,5.770738,-44.477325,-0.794238,-0.118,56,0.4468,15,0.2678,0.0,0.9683,2.97,0.427,91.848694,1.641,1.63,-12.4,11.330316,6.450413,-55.270115,-0.986966,-0.0739,56,0.4468,15,0.2678,0.0,1.43,3.555,0.5015,127.05581,2.27,2.217,-12.286801,9.827934,6.406989,-50.414646,-0.900261,-0.792176,56,0.2878,5,0.0893,0.0,3.523,9.12,1.1875,297.60904,5.312,5.082,-14.211164,14.770886,7.094073,-100.47377,-1.794175,-2.463012,56,0.0,0,0.0,0.0,,,13.0,22.0,23.0,11.0,1.0,,,,13.0,14.0,19.0,10.0,,,,,11.0,14.0,21.0,10.0,,,,,12.0,11.0,23.0,10.0,,,,,10.0,18.0,18.0,10.0,,,,,12.0,18.0,16.0,9.0,1.0,,70.0,56.0,56.0,56.0,56.0,56.0,,,0.1857,0.3142,0.3286,0.1571,0.01428,,,,0.2322,0.25,0.3394,0.1786,,,,,0.1964,0.25,0.375,0.1786,,,,,0.2142,0.1964,0.4106,0.1786,,,,,0.1786,0.3215,0.3215,0.1786,,,,,0.2142,0.3215,0.2856,0.1608,0.01785,
2,730,33.56,-6.58,170.5,-61.56,1,0.232,0.2262,0.0157,40.25,0.021,42,0.6953,11.281384,1.721134,815.45026,2.471062,1.991,-19.159811,47.31006,8.022239,748.25323,2.267434,0.4092,330,0.2551,23.0,0.0697,0.0,1.132809,3.110694,0.42808,148.17484,2.057984,2.020452,-3.45996,5.942166,1.828872,-3.46179,-0.04808,0.024093,72,0.0,0,0.0,0.0,0.6987,3.564,0.7837,75.164604,1.445,1.134,-3.393,5.693109,1.807229,7.334944,0.1411,0.1714,52,0.0,0,0.0,0.0,0.6953,2.484,0.4487,65.25,1.255,1.115,-2.85,20.99471,5.559483,124.84525,2.40087,0.4917,52,0.3447,7,0.1346,0.0,1.143,3.25,0.4758,92.66059,1.782,1.654,-5.438,33.5721,8.191987,168.28052,3.236164,0.6606,52,0.3447,7,0.1346,0.0,1.614,3.994,0.5444,127.618454,2.502,2.363,-5.83631,41.15998,10.710344,219.74513,4.308728,1.004354,51,0.3003,5,0.098,0.0,4.176,11.28,1.31,306.58023,6.01,5.71,-19.159811,47.31006,13.332758,231.50917,4.539396,2.542647,51,0.2715,4,0.0784,0.0,,,14.0,22.0,27.0,8.0,,1.0,,,6.0,19.0,19.0,7.0,,1.0,,,,40.0,6.0,2.0,1.0,3.0,,,1.0,38.0,9.0,1.0,,3.0,,,,39.0,7.0,1.0,1.0,3.0,,,4.0,28.0,14.0,1.0,2.0,2.0,72.0,52.0,52.0,52.0,51.0,51.0,,,0.1945,0.3057,0.375,0.1111,,0.013885,,,0.11536,0.3655,0.3655,0.1346,,0.01923,,,,0.769,0.11536,0.03845,0.01923,0.05768,,,0.01923,0.731,0.1731,0.01923,,0.05768,,,,0.7646,0.1372,0.0196,0.0196,0.05884,,,0.0784,0.549,0.2744,0.0196,0.0392,0.0392
3,745,0.1898,-45.6,328.2,-69.0,1,0.3037,0.2812,1.152,40.78,0.007,90,0.5674,55.892746,3.537324,897.007,2.555576,1.82,-15.494463,220.79521,27.558208,3127.1313,8.909205,1.036,351,0.3794,61.0,0.1738,0.0,0.957792,3.093587,0.520573,140.74007,1.954723,1.877306,-3.874349,18.014029,4.374445,129.42166,1.797523,1.056714,72,0.1655,2,0.02777,0.0,0.5674,3.586,0.7637,73.72757,1.316,1.024,-3.62,192.2443,25.964659,320.17404,5.72,0.888,56,0.3337,7,0.125,0.0,0.6045,2.334,0.4482,64.7,1.155,1.027,-2.16,220.79521,31.957998,543.84576,9.711532,0.4243,56,0.4558,16,0.2856,0.0,1.028,2.871,0.4255,93.37897,1.668,1.615,-4.945,203.2507,34.967697,807.1238,14.412925,1.361,56,0.4558,16,0.2856,0.0,1.588,24.75,3.03,156.04224,2.787,2.316,-15.494463,183.63312,33.069054,735.52844,13.134436,1.27015,56,0.426,13,0.2322,0.0,3.738,55.9,6.926,368.409,6.7,5.4,-10.249387,141.51329,26.06013,591.0376,10.746138,2.749555,55,0.3364,7,0.1273,0.0,,,6.0,38.0,21.0,2.0,3.0,2.0,,,,49.0,5.0,1.0,,1.0,,,,43.0,11.0,,1.0,1.0,,,,43.0,9.0,1.0,2.0,1.0,,,,43.0,8.0,1.0,3.0,1.0,,,,40.0,10.0,1.0,3.0,1.0,72.0,56.0,56.0,56.0,56.0,55.0,,,0.0833,0.528,0.2917,0.02777,0.04166,0.02777,,,,0.875,0.0893,0.01785,,0.01785,,,,0.768,0.1964,,0.01785,0.01785,,,,0.768,0.1608,0.01785,0.0357,0.01785,,,,0.768,0.1428,0.01785,0.05356,0.01785,,,,0.727,0.1818,0.01819,0.05453,0.01819
4,1124,352.8,-63.8,317.0,-51.06,1,0.1934,0.2415,0.0176,40.4,0.024,90,0.6953,11.38369,1.933838,969.0573,2.753004,2.215,-16.543753,143.60019,20.051722,2515.287,7.145702,1.142,352,0.3792,61.0,0.1733,0.0,1.208098,3.658313,0.566168,141.82178,2.251139,2.153805,-6.804703,5.330927,2.360085,41.63972,0.660948,0.581027,63,0.0,0,0.0,0.0,0.6953,3.73,0.88,98.694405,1.701,1.306,-2.623,37.170177,8.107525,268.80893,4.633,1.154,58,0.4207,13,0.2241,0.0,0.7373,2.63,0.509,82.6,1.424,1.259,-2.084,106.67169,21.319853,594.15015,10.243968,0.889,58,0.4668,18,0.3103,0.0,1.192,2.857,0.4622,111.78342,1.928,1.853,-2.8,139.8184,26.270649,643.0202,11.086555,1.014,58,0.451,16,0.276,0.0,1.848,10.42,1.133,162.31221,2.799,2.6,-16.543753,143.60019,26.865913,574.5539,9.906102,1.745012,58,0.381,10,0.1724,0.0,4.59,11.38,1.439,371.84573,6.523,6.03,-10.86054,109.157585,21.434628,393.11426,6.896741,1.973272,57,0.2578,4,0.0702,0.0,1.0,1.0,7.0,23.0,22.0,9.0,,,,,,40.0,12.0,3.0,1.0,2.0,,,,41.0,11.0,3.0,1.0,2.0,,,,44.0,9.0,2.0,1.0,2.0,,,,46.0,9.0,,1.0,2.0,,,,44.0,10.0,,1.0,2.0,63.0,58.0,58.0,58.0,58.0,57.0,0.01587,0.01587,0.1111,0.365,0.349,0.1428,,,,,,0.6895,0.2069,0.05173,0.01724,0.0345,,,,0.707,0.1897,0.05173,0.01724,0.0345,,,,0.759,0.1552,0.0345,0.01724,0.0345,,,,0.793,0.1552,,0.01724,0.0345,,,,0.772,0.1754,,0.01755,0.0351


In [11]:
train_metadata.drop([x for x in train_metadata.columns if x not in temp_columns], axis=1,inplace=True)
gc.collect()

525

In [12]:
test_metadata.drop([x for x in test_metadata.columns if x not in temp_columns], axis=1,inplace=True)
gc.collect()

7

In [13]:
print(train_metadata.shape,test_metadata.shape)

(7848, 31) (3492890, 31)


In [14]:
test_metadata_kaggle.tail()

Unnamed: 0,object_id,flux_min,flux_max,flux_mean,flux_median,flux_std,flux_skew,flux_err_min,flux_err_max,flux_err_mean,flux_err_median,flux_err_std,flux_err_skew,detected_mean,flux_ratio_sq_sum,flux_ratio_sq_skew,flux_by_flux_ratio_sq_sum,flux_by_flux_ratio_sq_skew,flux_w_mean,flux_diff1,flux_diff2,flux_diff3,"0__fft_coefficient__coeff_0__attr_""abs""","0__fft_coefficient__coeff_1__attr_""abs""",0__kurtosis,0__skewness,"1__fft_coefficient__coeff_0__attr_""abs""","1__fft_coefficient__coeff_1__attr_""abs""",1__kurtosis,1__skewness,"2__fft_coefficient__coeff_0__attr_""abs""","2__fft_coefficient__coeff_1__attr_""abs""",2__kurtosis,2__skewness,"3__fft_coefficient__coeff_0__attr_""abs""","3__fft_coefficient__coeff_1__attr_""abs""",3__kurtosis,3__skewness,"4__fft_coefficient__coeff_0__attr_""abs""","4__fft_coefficient__coeff_1__attr_""abs""",4__kurtosis,4__skewness,"5__fft_coefficient__coeff_0__attr_""abs""","5__fft_coefficient__coeff_1__attr_""abs""",5__kurtosis,5__skewness,flux__length,flux__longest_strike_above_mean,flux__longest_strike_below_mean,flux__mean_abs_change,flux__mean_change,flux_by_flux_ratio_sq__longest_strike_above_mean,flux_by_flux_ratio_sq__longest_strike_below_mean,mjd__mean_abs_change,mjd__mean_change,mjd_diff_det,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,haversine,latlon1,hostgal_photoz_certain,A0_sum_flux,A0_mean_flux,A0_std_detected,A1_mean_detected,A2_sum_detected,A4_mean_detected,A5_std_detected,A5_mean_detected,percent_p2_region_minus_1,A2_min_flux,A5_sum_detected,__flux_percentile_ratio_mid50___5_,__flux_percentile_ratio_mid65___2_,__median_absolute_deviation___2_,__qso_log_chi2_qsonu___0_,__stetson_k___1_,__freq1_signif___2_,__stetson_k___2_,__freq3_amplitude1___1_,__median_absolute_deviation___2_.1,__percent_close_to_median___2_,__freq_varrat___5_,__freq_varrat___4_,__qso_log_chi2_qsonu___3_,__qso_log_chi2_qsonu___1_,__qso_log_chi2_qsonu___5_,__std___4_,__freq_varrat___3_,__amplitude___2_,outlierScore,hipd,lipd,highEnergy_transitory_1.0_TF,highEnergy_transitory_1.5_TF,lowEnergy_transitory_1.0_TF,lowEnergy_transitory_1.5_TF,A1_minus_3_sigma,A5_max_median_diff_flux,A5_minus_3_sigma,A5_max_mean_diff_flux,diff_A5_A4_max_min_flux,diff_A2_A1_max_min_flux,diff_A3_A2_median_min_flux,diff_A5_A4_max_median_flux,diff_A4_A3_max_median_flux,diff_A2_A0_median_min_flux,diff_A4_A3_max_mean_flux,diff_A5_A2_max_mean_flux,diff_A5_A3_max_mean_flux,diff_A4_A0_median_mean_flux,diff_A5_A4_max_mean_flux,diff_A2_A1_max_median_flux,diff_A5_A2_max_median_flux,diff_A5_A4_median_min_flux,diff_A4_A0_median_min_flux,diff_A4_A1_max_median_flux,diff_A4_A2_max_median_flux,diff_A5_A4_minus_1_sigma,diff_A5_A3_median_min_flux,diff_A5_A3_max_median_flux,diff_A3_A1_minus_1_sigma,diff_A3_A0_median_min_flux,diff_A3_A0_plus_1_sigma,diff_A1_A0_median_min_flux,diff_A4_A2_mean_min_flux,diff_A5_A1_plus_1_sigma,diff_A4_A1_median_mean_flux,diff_A3_A2_max_median_flux,diff_A5_A1_median_mean_flux
3492885,130787966,-49.49044,75.235832,3.90543,1.703813,21.009541,0.625654,1.341227,70.066055,14.846084,10.951475,12.628468,1.30798,0.013793,374.420803,8.678639,5954.086,7.560569,15.902124,124.726272,31.936629,7.843372,35.655183,32.200453,0.154555,-0.174445,23.565507,13.59504,6.803259,2.380728,63.739853,39.279845,3.023876,1.647382,82.545904,81.181195,0.67089,0.882368,187.846479,74.176357,3.792912,1.576256,172.934431,184.07971,-0.297825,0.291885,145.0,8.0,10.0,20.061909,0.151706,4.0,43.0,89.7614,89.7614,89.7614,0.4493,0.9954,41.9836,0.036,1.937777,2.277911,1.215719,35.65518,2.377012,0.0,0.0909,0.0,0.0,0.0,0.0,0.5557,-2.62074,0.0,5.47497e-13,0.103244,2.16581,0.050748,0.683935,2.2649,0.829141,2.4392,2.16581,0.5,0.953504,0.808853,0.966782,2.32229,0.185745,19.4718,0.427032,10.7938,0.0,1.0,1.0,0,0,0,0,-20.260712,72.660479,-92.90648,71.556374,28.116876,-6.290332,4.964574,2.744162,46.834716,-14.615577,46.582108,56.130652,50.831205,-0.42683,4.249098,-5.116519,55.883266,25.372714,7.267148,48.022585,53.139104,-14.781784,42.290865,49.578878,0.823856,-9.651003,-0.71392,-13.441764,23.140275,26.263864,-1.345041,6.304388,0.159895
3492886,130787971,-58.301781,59.105938,2.910128,1.390304,18.432931,-0.033138,1.598772,70.469521,14.704315,10.958591,12.573206,1.570236,0.014286,772.852614,9.258595,27352.84,9.229165,35.392055,117.407719,40.344517,3.317347,47.270954,21.592998,-1.528033,0.106061,23.940401,25.706965,1.965597,0.964946,106.555644,97.317107,4.782199,2.138468,13.695812,30.341361,-0.615031,-0.353402,198.983121,128.149311,1.826418,0.8096,111.51393,121.261346,-0.765793,-0.401619,140.0,6.0,10.0,16.961143,-0.250912,5.0,95.0,0.9774,0.9774,0.9774,0.6729,0.0614,43.0419,0.083,2.063756,0.297612,0.715511,-47.270954,-3.376497,0.0,0.0,2.0,0.0,0.0,0.0,0.696,-13.268991,0.0,8.99796e-14,0.009136,3.61916,-0.781019,0.920752,2.63034,0.692992,0.862991,3.61916,0.652174,0.912471,0.714979,-0.248684,1.3337,0.073276,16.8101,0.711031,28.9745,0.0,1.0,1.0,0,0,0,0,-14.522348,36.030482,-83.425852,43.320908,18.349118,36.95174,-2.77849,-18.696773,46.039018,6.605267,43.880995,3.273846,34.514775,0.369478,-9.366219,29.715107,-7.396649,37.045891,23.489138,41.015231,11.300124,-15.233271,56.708253,27.342246,-2.097347,3.826777,3.229053,-0.631366,15.543929,23.878944,-1.041127,-34.738894,8.289426
3492887,130787974,-79.991745,341.318909,45.861197,3.651273,99.890188,1.724598,2.153563,62.515724,17.602066,12.978883,13.7979,1.212612,0.176056,19850.226004,4.094035,5000366.0,4.974861,251.90474,421.310654,9.186648,1.6725,145.685302,100.228336,9.587091,2.902355,98.83636,87.819879,2.510787,1.762819,1596.579768,1289.962654,-0.098882,1.161153,1243.369962,1048.754676,1.543198,1.622067,2253.494687,2069.880289,0.535269,1.533207,1174.323843,1632.869223,2.254685,1.695066,142.0,25.0,49.0,32.324748,-0.010421,6.0,68.0,5.534221,5.534221,132.8213,0.1211,0.0093,38.7604,0.136,2.040056,-2.014406,0.122231,145.6853,11.206562,0.0,0.2856,6.0,0.2,0.321,0.11365,0.65,-4.419514,5.0,1.44553e-23,0.193468,9.32174,1.03807,0.717207,2.56551,0.73406,4.24961,9.32174,0.65,0.866953,0.715438,4.95389,3.97704,2.22089,120.613,0.611322,167.787,0.578571,0.728336,0.784156,0,0,1,1,-63.124318,311.3023,-244.670302,280.345792,29.877981,266.755227,1.894723,-25.315437,32.299837,-15.242143,22.914133,29.021152,26.326585,-53.646005,3.412452,262.668835,-14.443295,55.193418,-4.538695,273.540977,10.872142,-5.775512,64.002143,6.9844,-27.112331,-13.34742,101.899421,-19.328535,-4.03311,77.274232,-48.284397,-21.427695,-19.556508
3492888,130788053,-31.093233,72.267593,2.674462,0.517638,14.199325,2.161186,1.472764,43.980267,12.126998,10.250809,9.682963,1.437416,0.019417,484.141947,9.877956,27170.69,9.854728,56.121331,103.360826,38.647332,1.841739,66.16929,83.692703,8.999515,2.93731,69.0261,61.217178,6.92728,2.627613,4.940699,7.940467,1.710969,-0.797514,16.365837,5.388825,0.142817,0.208712,26.674112,83.691903,0.191491,-0.381,102.174957,87.215959,0.963633,0.285633,103.0,5.0,7.0,11.934926,-0.04741,2.0,60.0,1.0,1.0,1.0,0.4287,0.2616,41.8625,0.028,1.804366,-5.960534,0.556884,66.16929,6.616929,0.3162,0.1428,0.0,0.0,0.0,0.0,0.4211,-10.273894,0.0,3.60196e-08,0.006265,1.53275,1.64709,0.522592,2.55292,0.878238,4.48906,1.53275,0.526316,0.867612,0.795611,0.465246,4.02397,-0.534743,11.1447,0.714563,7.93634,0.0,1.0,1.0,0,0,0,0,-65.079763,45.200562,-47.358673,42.478371,30.804272,-52.477498,3.320794,27.394924,1.959517,-0.229442,0.990932,36.61954,26.379604,5.170967,25.388672,-59.876169,39.176736,3.409348,19.748864,-48.094357,11.781812,-2.096472,20.06686,29.354441,8.462786,3.091352,-21.42777,-7.628113,20.52925,-12.566398,8.626461,9.822295,6.620209
3492889,130788054,-29.865211,92.339668,6.95722,2.271126,18.866321,2.12926,1.354718,45.210678,11.727779,8.838233,10.338816,1.330947,0.021429,810.266032,10.686268,28541.28,11.368798,35.224582,122.204879,17.565187,3.469307,12.420712,21.74714,1.421138,1.254474,53.074221,48.918371,12.748127,3.515225,64.690734,89.002508,3.776189,1.852142,114.528183,118.167243,-0.385016,0.687964,153.586699,40.424776,-0.387068,0.03937,575.710317,55.029144,-0.057886,0.760705,140.0,8.0,13.0,16.068763,0.228035,3.0,53.0,20.9559,20.9559,41.9118,0.3625,0.7335,41.4333,0.013,0.923229,8.216008,0.754854,12.420712,0.69004,0.0,0.0714,2.0,0.0,0.0,0.0,0.64,-5.267357,0.0,1.36987e-05,0.398007,3.86416,0.128046,0.463121,2.83368,0.821308,4.16606,3.86416,0.44,0.815151,0.56484,0.935915,3.44882,0.337461,9.26697,0.256333,16.5547,0.578571,2.0,1.0,0,0,1,0,-34.671397,85.388008,-80.884742,74.893904,85.28634,-17.977271,3.157845,65.293493,-3.331447,-5.207875,-2.633536,49.639423,53.792811,0.873345,56.426348,-20.326206,58.333139,19.992847,5.561553,-27.28656,-6.960354,-11.787757,27.60443,61.962046,4.701807,-2.05003,1.658487,-7.55681,10.595999,33.610803,2.025241,-3.628907,-6.841904


In [15]:
test_metadata.tail()

Unnamed: 0,object_id,A0_min_flux,A0_max_flux,A0_std_flux,A0_mean_flux,A0_median_flux,A1_min_flux,A1_max_flux,A1_std_flux,A1_mean_flux,A1_median_flux,A2_min_flux,A2_max_flux,A2_std_flux,A2_mean_flux,A2_median_flux,A3_min_flux,A3_max_flux,A3_std_flux,A3_mean_flux,A3_median_flux,A4_min_flux,A4_max_flux,A4_std_flux,A4_mean_flux,A4_median_flux,A5_min_flux,A5_max_flux,A5_std_flux,A5_mean_flux,A5_median_flux
3492885,130787966,-19.231127,18.327864,10.01586,2.377012,0.194802,-5.105165,22.772732,7.467904,2.143,0.879,-2.62074,18.966825,5.44,3.541103,2.189612,-8.542406,24.314121,8.09,3.588952,1.23252,-23.242554,73.36684,19.793636,6.059564,3.450523,-49.49044,75.23583,32.195312,3.679456,2.575351
3492886,130787971,-13.702611,5.737575,6.879981,-3.376497,-5.786102,-6.108143,14.889024,5.566116,2.176,1.177,-13.268991,44.679916,13.82,4.632854,1.252785,-11.002852,9.42867,6.11,0.622537,0.740433,-27.026964,59.105938,17.087936,6.41881,4.378683,-58.30178,46.18024,28.761728,2.859332,10.149758
3492887,130787974,-19.901522,117.10414,33.75358,11.206562,5.16817,-3.021157,65.79676,25.748106,14.12,2.72,-4.419514,331.15363,117.7,79.82899,5.408035,-7.961402,308.07877,92.8,54.059563,3.76087,-15.829834,341.3189,122.37403,64.38556,4.701163,-79.991745,307.03497,90.45316,26.689178,-4.26733
3492888,130788053,-9.348281,72.26759,23.513706,6.616929,0.730023,-1.932591,66.417595,24.979921,9.86,0.5176,-10.273894,5.598794,3.746,-0.260037,-0.425032,-11.894145,17.121632,7.68,1.022865,1.275511,-29.65397,17.978836,11.33524,0.889137,0.173198,-31.093233,47.343845,17.408049,4.865474,2.143283
3492889,130788054,-13.072737,30.335392,11.523481,0.69004,-1.810264,-3.566863,47.519875,12.820799,3.791,0.1388,-5.267357,27.84211,7.82,2.587629,0.787241,-6.765305,25.8731,9.1,4.772007,2.447138,-12.543804,24.374737,9.450494,5.907181,4.280222,-29.865211,92.33967,32.776836,17.445766,6.951662


In [16]:
train_metadata_kaggle.tail()

Unnamed: 0,object_id,flux_min,flux_max,flux_mean,flux_median,flux_std,flux_skew,flux_err_min,flux_err_max,flux_err_mean,flux_err_median,flux_err_std,flux_err_skew,detected_mean,flux_ratio_sq_sum,flux_ratio_sq_skew,flux_by_flux_ratio_sq_sum,flux_by_flux_ratio_sq_skew,flux_w_mean,flux_diff1,flux_diff2,flux_diff3,"0__fft_coefficient__coeff_0__attr_""abs""","0__fft_coefficient__coeff_1__attr_""abs""",0__kurtosis,0__skewness,"1__fft_coefficient__coeff_0__attr_""abs""","1__fft_coefficient__coeff_1__attr_""abs""",1__kurtosis,1__skewness,"2__fft_coefficient__coeff_0__attr_""abs""","2__fft_coefficient__coeff_1__attr_""abs""",2__kurtosis,2__skewness,"3__fft_coefficient__coeff_0__attr_""abs""","3__fft_coefficient__coeff_1__attr_""abs""",3__kurtosis,3__skewness,"4__fft_coefficient__coeff_0__attr_""abs""","4__fft_coefficient__coeff_1__attr_""abs""",4__kurtosis,4__skewness,"5__fft_coefficient__coeff_0__attr_""abs""","5__fft_coefficient__coeff_1__attr_""abs""",5__kurtosis,5__skewness,flux__length,flux__longest_strike_above_mean,flux__longest_strike_below_mean,flux__mean_abs_change,flux__mean_change,flux_by_flux_ratio_sq__longest_strike_above_mean,flux_by_flux_ratio_sq__longest_strike_below_mean,mjd__mean_abs_change,mjd__mean_change,mjd_diff_det,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target,haversine,latlon1,hostgal_photoz_certain,A0_sum_flux,A0_mean_flux,A0_std_detected,A1_mean_detected,A2_sum_detected,A4_mean_detected,A5_std_detected,A5_mean_detected,percent_p2_region_minus_1,A2_min_flux,A5_sum_detected,__flux_percentile_ratio_mid50___5_,__flux_percentile_ratio_mid65___2_,__median_absolute_deviation___2_,__qso_log_chi2_qsonu___0_,__stetson_k___1_,__freq1_signif___2_,__stetson_k___2_,__freq3_amplitude1___1_,__median_absolute_deviation___2_.1,__percent_close_to_median___2_,__freq_varrat___5_,__freq_varrat___4_,__qso_log_chi2_qsonu___3_,__qso_log_chi2_qsonu___1_,__qso_log_chi2_qsonu___5_,__std___4_,__freq_varrat___3_,__amplitude___2_,outlierScore,hipd,lipd,highEnergy_transitory_1.0_TF,highEnergy_transitory_1.5_TF,lowEnergy_transitory_1.0_TF,lowEnergy_transitory_1.5_TF,A1_minus_3_sigma,A5_max_median_diff_flux,A5_minus_3_sigma,A5_max_mean_diff_flux,diff_A5_A4_max_min_flux,diff_A2_A1_max_min_flux,diff_A3_A2_median_min_flux,diff_A5_A4_max_median_flux,diff_A4_A3_max_median_flux,diff_A2_A0_median_min_flux,diff_A4_A3_max_mean_flux,diff_A5_A2_max_mean_flux,diff_A5_A3_max_mean_flux,diff_A4_A0_median_mean_flux,diff_A5_A4_max_mean_flux,diff_A2_A1_max_median_flux,diff_A5_A2_max_median_flux,diff_A5_A4_median_min_flux,diff_A4_A0_median_min_flux,diff_A4_A1_max_median_flux,diff_A4_A2_max_median_flux,diff_A5_A4_minus_1_sigma,diff_A5_A3_median_min_flux,diff_A5_A3_max_median_flux,diff_A3_A1_minus_1_sigma,diff_A3_A0_median_min_flux,diff_A3_A0_plus_1_sigma,diff_A1_A0_median_min_flux,diff_A4_A2_mean_min_flux,diff_A5_A1_plus_1_sigma,diff_A4_A1_median_mean_flux,diff_A3_A2_max_median_flux,diff_A5_A1_median_mean_flux
7843,130739978,-105.375282,517.602478,6.786007,1.599034,50.600944,7.259594,1.700991,72.230759,16.391897,11.132936,14.645405,1.401451,0.034014,3178.336379,11.992239,488253.1,11.679274,153.619074,622.97776,91.803294,4.055341,21.873135,28.266883,5.705815,1.784807,162.456175,162.296936,13.667479,3.680811,35.689574,7.912341,8.977344,2.753125,117.413905,65.50816,-0.46037,0.410709,42.460987,115.925375,0.845991,0.431793,702.571174,302.81748,20.652438,3.998852,147.0,4.0,15.0,32.225018,0.053633,1.0,97.0,199.95325,199.95325,799.813,0.0,0.0,,0.013,65,1.568287,3.671722,0.0,21.873135,1.286655,0.2426,0.0714,2,0.0,0.1691,0.02856,0.625,-7.383,1,2.35133e-29,0.110073,3.12249,1.66984,0.413225,2.6355,0.698985,11.6993,3.12249,0.666667,0.961661,0.881326,0.257244,5.26442,1.57364,16.4719,0.527594,22.9597,0.0,1.0,1.0,0,0,0,0,-114.710914,506.473179,-272.734698,497.529038,541.19002,-115.000845,6.994,458.076275,26.931324,-17.91978,28.676722,460.481299,478.301014,0.22783,449.624292,-117.699845,466.670374,83.113745,9.356076,-109.105746,8.594099,-59.466922,103.395601,485.007599,26.06795,-10.92578,-3.456372,-20.61878,25.012949,63.972544,11.418842,-18.337225,2.966859
7844,130755807,-69.036392,363.402466,16.466672,1.489275,68.780504,3.818545,2.326328,71.215874,18.4772,15.121149,14.085974,1.175512,0.052632,3025.366219,5.549463,915226.6,5.798803,302.517634,432.438858,26.261461,1.429467,295.62292,333.272992,10.607947,3.192929,76.624077,61.855298,9.20888,2.970945,14.033524,21.132148,-0.523883,0.014138,380.065039,328.717896,13.153053,3.576847,1018.956828,973.270337,5.009929,2.493254,119.965253,184.278912,2.815352,1.133398,114.0,11.0,23.0,29.287346,0.036457,5.0,55.0,7.40326,7.40326,37.0163,2.5606,1.1146,46.6108,0.136,90,1.719466,-0.907001,7.805601,295.62292,24.635242,0.2886,0.0909,0,0.12,0.0,0.0,0.389,-9.39,0,1.67644e-09,0.0507914,3.74433,2.91397,0.563771,2.4958,1.01108,3.30676,3.74433,0.222222,0.979527,0.790144,4.06803,2.82326,-0.000683,106.131,0.885791,8.84614,0.0,1.0,1.0,0,0,0,0,-41.840749,136.011173,-113.020908,131.79902,-202.36121,-38.946152,26.8027,-222.500065,33.195538,-16.774367,20.095999,122.717466,-170.749177,-14.755121,-190.845176,-44.435452,127.236559,20.138855,22.522105,305.301172,349.736624,32.240075,32.632627,-189.304527,-51.726894,10.028333,10.976305,-22.263667,75.470454,19.144573,-30.156042,316.541086,1.498847
7845,130762946,-135.602631,169.916672,-15.308645,-14.33603,35.248387,0.713889,5.974091,79.26593,27.723353,22.661341,19.148498,1.065352,0.052239,706.392995,5.975417,-34603.82,-6.150094,-48.986636,305.519303,-19.957306,-6.236789,3.677885,193.654003,6.308066,-2.131699,341.603033,110.39626,4.246585,-1.7542,681.331113,83.2464,-0.7568,-0.07674,233.298629,223.406422,0.954472,0.313884,486.45902,41.878014,-0.137298,0.608803,304.988709,253.859058,5.543692,1.533518,134.0,7.0,5.0,36.687006,-0.689947,15.0,4.0,140.827117,140.827117,844.9627,0.0,0.0,,0.43,16,1.820915,-4.077753,0.0,-3.677886,-0.282914,0.0,0.05884,5,0.0,0.0,0.0,0.2917,-80.94,0,1.72353e-20,9.3564e-09,22.4461,-0.693662,0.815164,2.62788,1.04214,0.766434,22.4461,0.208333,0.875542,0.646057,1.63538,0.769186,-0.081677,22.0119,0.357794,54.2126,0.0,1.0,1.0,0,0,0,0,-90.817236,181.037243,-154.768226,179.755016,170.65913,8.984082,6.415,126.17157,-17.328967,-91.139332,-22.144945,123.906913,105.141927,-10.057145,127.286872,29.274082,124.477936,44.48756,-111.076745,27.580448,-1.693634,-20.582343,18.135147,108.842603,-1.542746,-84.724332,-28.554012,-70.849332,-18.251088,34.999202,-4.327529,15.635333,-3.212227
7846,130772921,-51.92783,322.255371,4.440758,-0.665556,36.233874,6.133599,1.286931,59.214134,13.657375,10.306778,11.557242,1.375974,0.020833,5481.396253,11.982382,1671626.0,11.998901,304.963565,374.183201,84.261113,1.226977,47.238909,45.693842,1.480686,1.409345,311.764748,328.145289,10.979698,3.31242,15.474861,28.290208,9.641729,2.564154,8.947991,39.411759,1.576205,0.537274,32.916957,61.277364,-0.529657,-0.352855,306.855562,369.830146,11.07675,2.614438,144.0,4.0,11.0,23.726622,-0.07429,1.0,122.0,491.04915,491.04915,982.0983,0.0,0.0,,0.034,65,1.88861,1.60912,0.0,47.238907,3.374208,0.0,0.0909,1,0.0,0.1581,0.025,0.44,-7.64,1,1.43809e-13,0.224307,2.3623,0.714831,0.399215,2.56568,0.78785,13.5231,2.3623,0.64,0.908711,0.860302,-0.412563,6.22745,0.849605,9.93629,0.782408,16.9685,0.0,1.0,1.0,0,0,0,0,-264.20113,209.053891,-124.837597,204.321191,223.10008,-293.610034,8.372,191.692173,-1.940965,-1.916301,0.289967,178.641849,186.403588,5.902454,186.113621,-297.694034,183.330555,31.407907,13.327311,-306.055652,-8.361618,-25.251037,38.279519,189.751208,61.148357,6.455699,-10.484895,-6.000301,14.353766,-74.012659,30.347852,-6.420653,24.7693
7847,130779836,-86.457382,54982.875,2346.644261,164.157776,7035.582316,5.209008,13.60417,424.749237,49.316721,34.537071,55.424877,4.74183,0.483051,230021.138576,2.774493,3191873000.0,5.898897,13876.432856,55069.332382,23.467269,3.968551,10565.112176,9202.809864,11.98421,3.421185,8702.038223,7363.322644,1.960815,1.798901,67259.010364,57779.947001,17.942448,4.057211,16631.865836,14080.951812,5.901481,2.554678,90718.818732,85994.76378,21.075553,4.484138,83027.177502,74748.33107,5.105194,2.463557,118.0,9.0,87.0,1143.138373,-5.232912,7.0,89.0,8.012234,8.012234,448.6851,0.0,0.0,,0.091,6,1.340388,-1.415652,0.0,10565.112,812.7009,0.4385,0.4167,14,0.4614,0.5024,0.6,0.8,-14.48,12,1.20685e-46,6.27614e-05,413.419,5.3546,0.676057,2.36751,0.487672,3.25443,413.419,0.8,0.747109,0.546403,6.94743,6.56419,8.04276,10793.0,0.526274,15725.8,0.375,0.0,0.0,0,0,1,0,-2867.6713,28526.44133,-21909.947,24906.854,-25910.83919,28096.3537,-202.24,-26371.83444,50072.53177,281.551626,47254.5396,-3839.8756,20667.7039,-2682.59928,-26586.8357,27826.8537,-2511.64867,460.99525,25.305426,51687.03947,23860.18577,2981.8387,406.98905,23700.69733,-215.317,79.311626,-733.6056,12.051626,856.97947,10915.9039,-2789.28607,-26212.346,-3004.28733


In [17]:
train_metadata.head()

Unnamed: 0,object_id,A0_min_flux,A0_max_flux,A0_std_flux,A0_mean_flux,A0_median_flux,A1_min_flux,A1_max_flux,A1_std_flux,A1_mean_flux,A1_median_flux,A2_min_flux,A2_max_flux,A2_std_flux,A2_mean_flux,A2_median_flux,A3_min_flux,A3_max_flux,A3_std_flux,A3_mean_flux,A3_median_flux,A4_min_flux,A4_max_flux,A4_std_flux,A4_mean_flux,A4_median_flux,A5_min_flux,A5_max_flux,A5_std_flux,A5_mean_flux,A5_median_flux
0,615,-116.91322,125.18281,83.94473,-3.254554,-10.015225,-1100.0,660.62634,601.7873,-385.8,-488.0,-682.0,611.98456,455.12134,-134.14656,-265.8,-530.5,445.73706,335.42505,-121.1035,-162.1,-422.1845,381.95374,291.80344,-55.954594,-103.54137,-422.8151,378.18814,294.7795,-47.44985,-85.52431
1,713,-14.735178,14.509829,7.113509,-2.720398,-3.096805,-11.72,9.129021,5.712334,-1.02,-0.5615,-10.07,10.529041,5.770738,-0.794238,-0.118,-12.4,11.330316,6.450413,-0.986966,-0.0739,-12.286801,9.827934,6.406989,-0.900261,-0.792176,-14.211164,14.770886,7.094073,-1.794175,-2.463012
2,730,-3.45996,5.942166,1.828872,-0.04808,0.024093,-3.393,5.693109,1.807229,0.1411,0.1714,-2.85,20.99471,5.559483,2.40087,0.4917,-5.438,33.5721,8.191987,3.236164,0.6606,-5.83631,41.15998,10.710344,4.308728,1.004354,-19.159811,47.31006,13.332758,4.539396,2.542647
3,745,-3.874349,18.014029,4.374445,1.797523,1.056714,-3.62,192.2443,25.964659,5.72,0.888,-2.16,220.79521,31.957998,9.711532,0.4243,-4.945,203.2507,34.967697,14.412925,1.361,-15.494463,183.63312,33.069054,13.134436,1.27015,-10.249387,141.51329,26.06013,10.746138,2.749555
4,1124,-6.804703,5.330927,2.360085,0.660948,0.581027,-2.623,37.170177,8.107525,4.633,1.154,-2.084,106.67169,21.319853,10.243968,0.889,-2.8,139.8184,26.270649,11.086555,1.014,-16.543753,143.60019,26.865913,9.906102,1.745012,-10.86054,109.157585,21.434628,6.896741,1.973272


In [18]:
######################################TRAIN####################################################3
#A0
train_metadata['A0_max_min_diff_flux'] = train_metadata['A0_max_flux'] - train_metadata['A0_min_flux']
train_metadata['A0_max_mean_diff_flux'] = train_metadata['A0_max_flux'] - train_metadata['A0_mean_flux']
train_metadata['A0_max_median_diff_flux'] = train_metadata['A0_max_flux'] - train_metadata['A0_median_flux']
train_metadata['A0_median_mean_diff_flux'] = train_metadata['A0_median_flux'] - train_metadata['A0_mean_flux']
train_metadata['A0_median_min_diff_flux'] = train_metadata['A0_median_flux'] - train_metadata['A0_min_flux']
train_metadata['A0_mean_min_diff_flux'] = train_metadata['A0_mean_flux'] - train_metadata['A0_min_flux']
train_metadata['A0_minus_3_sigma'] = train_metadata['A0_mean_flux'] - 3*train_metadata['A0_std_flux']
train_metadata['A0_minus_2_sigma'] = train_metadata['A0_mean_flux'] - 2*train_metadata['A0_std_flux']
train_metadata['A0_minus_1_sigma'] = train_metadata['A0_mean_flux'] - 1*train_metadata['A0_std_flux']
train_metadata['A0_plus_1_sigma'] = train_metadata['A0_mean_flux'] + 1*train_metadata['A0_std_flux']
train_metadata['A0_plus_2_sigma'] = train_metadata['A0_mean_flux'] + 2*train_metadata['A0_std_flux']
train_metadata['A0_plus_3_sigma'] = train_metadata['A0_mean_flux'] + 3*train_metadata['A0_std_flux']
#A1
train_metadata['A1_max_min_diff_flux'] = train_metadata['A1_max_flux'] - train_metadata['A1_min_flux']
train_metadata['A1_max_mean_diff_flux'] = train_metadata['A1_max_flux'] - train_metadata['A1_mean_flux']
train_metadata['A1_max_median_diff_flux'] = train_metadata['A1_max_flux'] - train_metadata['A1_median_flux']
train_metadata['A1_median_mean_diff_flux'] = train_metadata['A1_median_flux'] - train_metadata['A1_mean_flux']
train_metadata['A1_median_min_diff_flux'] = train_metadata['A1_median_flux'] - train_metadata['A1_min_flux']
train_metadata['A1_mean_min_diff_flux'] = train_metadata['A1_mean_flux'] - train_metadata['A1_min_flux']
train_metadata['A1_minus_3_sigma'] = train_metadata['A1_mean_flux'] - 3*train_metadata['A1_std_flux']
train_metadata['A1_minus_2_sigma'] = train_metadata['A1_mean_flux'] - 2*train_metadata['A1_std_flux']
train_metadata['A1_minus_1_sigma'] = train_metadata['A1_mean_flux'] - 1*train_metadata['A1_std_flux']
train_metadata['A1_plus_1_sigma'] = train_metadata['A1_mean_flux'] + 1*train_metadata['A1_std_flux']
train_metadata['A1_plus_2_sigma'] = train_metadata['A1_mean_flux'] + 2*train_metadata['A1_std_flux']
train_metadata['A1_plus_3_sigma'] = train_metadata['A1_mean_flux'] + 3*train_metadata['A1_std_flux']
#A2
train_metadata['A2_max_min_diff_flux'] = train_metadata['A2_max_flux'] - train_metadata['A2_min_flux']
train_metadata['A2_max_mean_diff_flux'] = train_metadata['A2_max_flux'] - train_metadata['A2_mean_flux']
train_metadata['A2_max_median_diff_flux'] = train_metadata['A2_max_flux'] - train_metadata['A2_median_flux']
train_metadata['A2_median_mean_diff_flux'] = train_metadata['A2_median_flux'] - train_metadata['A2_mean_flux']
train_metadata['A2_median_min_diff_flux'] = train_metadata['A2_median_flux'] - train_metadata['A2_min_flux']
train_metadata['A2_mean_min_diff_flux'] = train_metadata['A2_mean_flux'] - train_metadata['A2_min_flux']
train_metadata['A2_minus_3_sigma'] = train_metadata['A2_mean_flux'] - 3*train_metadata['A2_std_flux']
train_metadata['A2_minus_2_sigma'] = train_metadata['A2_mean_flux'] - 2*train_metadata['A2_std_flux']
train_metadata['A2_minus_1_sigma'] = train_metadata['A2_mean_flux'] - 1*train_metadata['A2_std_flux']
train_metadata['A2_plus_1_sigma'] = train_metadata['A2_mean_flux'] + 1*train_metadata['A2_std_flux']
train_metadata['A2_plus_2_sigma'] = train_metadata['A2_mean_flux'] + 2*train_metadata['A2_std_flux']
train_metadata['A2_plus_3_sigma'] = train_metadata['A2_mean_flux'] + 3*train_metadata['A2_std_flux']
#A3
train_metadata['A3_max_min_diff_flux'] = train_metadata['A3_max_flux'] - train_metadata['A3_min_flux']
train_metadata['A3_max_mean_diff_flux'] = train_metadata['A3_max_flux'] - train_metadata['A3_mean_flux']
train_metadata['A3_max_median_diff_flux'] = train_metadata['A3_max_flux'] - train_metadata['A3_median_flux']
train_metadata['A3_median_mean_diff_flux'] = train_metadata['A3_median_flux'] - train_metadata['A3_mean_flux']
train_metadata['A3_median_min_diff_flux'] = train_metadata['A3_median_flux'] - train_metadata['A3_min_flux']
train_metadata['A3_mean_min_diff_flux'] = train_metadata['A3_mean_flux'] - train_metadata['A3_min_flux']
train_metadata['A3_minus_3_sigma'] = train_metadata['A3_mean_flux'] - 3*train_metadata['A3_std_flux']
train_metadata['A3_minus_2_sigma'] = train_metadata['A3_mean_flux'] - 2*train_metadata['A3_std_flux']
train_metadata['A3_minus_1_sigma'] = train_metadata['A3_mean_flux'] - 1*train_metadata['A3_std_flux']
train_metadata['A3_plus_1_sigma'] = train_metadata['A3_mean_flux'] + 1*train_metadata['A3_std_flux']
train_metadata['A3_plus_2_sigma'] = train_metadata['A3_mean_flux'] + 2*train_metadata['A3_std_flux']
train_metadata['A3_plus_3_sigma'] = train_metadata['A3_mean_flux'] + 3*train_metadata['A3_std_flux']
#A4
train_metadata['A4_max_min_diff_flux'] = train_metadata['A4_max_flux'] - train_metadata['A4_min_flux']
train_metadata['A4_max_mean_diff_flux'] = train_metadata['A4_max_flux'] - train_metadata['A4_mean_flux']
train_metadata['A4_max_median_diff_flux'] = train_metadata['A4_max_flux'] - train_metadata['A4_median_flux']
train_metadata['A4_median_mean_diff_flux'] = train_metadata['A4_median_flux'] - train_metadata['A4_mean_flux']
train_metadata['A4_median_min_diff_flux'] = train_metadata['A4_median_flux'] - train_metadata['A4_min_flux']
train_metadata['A4_mean_min_diff_flux'] = train_metadata['A4_mean_flux'] - train_metadata['A4_min_flux']
train_metadata['A4_minus_3_sigma'] = train_metadata['A4_mean_flux'] - 3*train_metadata['A4_std_flux']
train_metadata['A4_minus_2_sigma'] = train_metadata['A4_mean_flux'] - 2*train_metadata['A4_std_flux']
train_metadata['A4_minus_1_sigma'] = train_metadata['A4_mean_flux'] - 1*train_metadata['A4_std_flux']
train_metadata['A4_plus_1_sigma'] = train_metadata['A4_mean_flux'] + 1*train_metadata['A4_std_flux']
train_metadata['A4_plus_2_sigma'] = train_metadata['A4_mean_flux'] + 2*train_metadata['A4_std_flux']
train_metadata['A4_plus_3_sigma'] = train_metadata['A4_mean_flux'] + 3*train_metadata['A4_std_flux']
#A5
train_metadata['A5_max_min_diff_flux'] = train_metadata['A5_max_flux'] - train_metadata['A5_min_flux']
train_metadata['A5_max_mean_diff_flux'] = train_metadata['A5_max_flux'] - train_metadata['A5_mean_flux']
train_metadata['A5_max_median_diff_flux'] = train_metadata['A5_max_flux'] - train_metadata['A5_median_flux']
train_metadata['A5_median_mean_diff_flux'] = train_metadata['A5_median_flux'] - train_metadata['A5_mean_flux']
train_metadata['A5_median_min_diff_flux'] = train_metadata['A5_median_flux'] - train_metadata['A5_min_flux']
train_metadata['A5_mean_min_diff_flux'] = train_metadata['A5_mean_flux'] - train_metadata['A5_min_flux']
train_metadata['A5_minus_3_sigma'] = train_metadata['A5_mean_flux'] - 3*train_metadata['A5_std_flux']
train_metadata['A5_minus_2_sigma'] = train_metadata['A5_mean_flux'] - 2*train_metadata['A5_std_flux']
train_metadata['A5_minus_1_sigma'] = train_metadata['A5_mean_flux'] - 1*train_metadata['A5_std_flux']
train_metadata['A5_plus_1_sigma'] = train_metadata['A5_mean_flux'] + 1*train_metadata['A5_std_flux']
train_metadata['A5_plus_2_sigma'] = train_metadata['A5_mean_flux'] + 2*train_metadata['A5_std_flux']
train_metadata['A5_plus_3_sigma'] = train_metadata['A5_mean_flux'] + 3*train_metadata['A5_std_flux']
#######################################TEST#########################################################
#A0
test_metadata['A0_max_min_diff_flux'] = test_metadata['A0_max_flux'] - test_metadata['A0_min_flux']
test_metadata['A0_max_mean_diff_flux'] = test_metadata['A0_max_flux'] - test_metadata['A0_mean_flux']
test_metadata['A0_max_median_diff_flux'] = test_metadata['A0_max_flux'] - test_metadata['A0_median_flux']
test_metadata['A0_median_mean_diff_flux'] = test_metadata['A0_median_flux'] - test_metadata['A0_mean_flux']
test_metadata['A0_median_min_diff_flux'] = test_metadata['A0_median_flux'] - test_metadata['A0_min_flux']
test_metadata['A0_mean_min_diff_flux'] = test_metadata['A0_mean_flux'] - test_metadata['A0_min_flux']
test_metadata['A0_minus_3_sigma'] = test_metadata['A0_mean_flux'] - 3*test_metadata['A0_std_flux']
test_metadata['A0_minus_2_sigma'] = test_metadata['A0_mean_flux'] - 2*test_metadata['A0_std_flux']
test_metadata['A0_minus_1_sigma'] = test_metadata['A0_mean_flux'] - 1*test_metadata['A0_std_flux']
test_metadata['A0_plus_1_sigma'] = test_metadata['A0_mean_flux'] + 1*test_metadata['A0_std_flux']
test_metadata['A0_plus_2_sigma'] = test_metadata['A0_mean_flux'] + 2*test_metadata['A0_std_flux']
test_metadata['A0_plus_3_sigma'] = test_metadata['A0_mean_flux'] + 3*test_metadata['A0_std_flux']
#A1
test_metadata['A1_max_min_diff_flux'] = test_metadata['A1_max_flux'] - test_metadata['A1_min_flux']
test_metadata['A1_max_mean_diff_flux'] = test_metadata['A1_max_flux'] - test_metadata['A1_mean_flux']
test_metadata['A1_max_median_diff_flux'] = test_metadata['A1_max_flux'] - test_metadata['A1_median_flux']
test_metadata['A1_median_mean_diff_flux'] = test_metadata['A1_median_flux'] - test_metadata['A1_mean_flux']
test_metadata['A1_median_min_diff_flux'] = test_metadata['A1_median_flux'] - test_metadata['A1_min_flux']
test_metadata['A1_mean_min_diff_flux'] = test_metadata['A1_mean_flux'] - test_metadata['A1_min_flux']
test_metadata['A1_minus_3_sigma'] = test_metadata['A1_mean_flux'] - 3*test_metadata['A1_std_flux']
test_metadata['A1_minus_2_sigma'] = test_metadata['A1_mean_flux'] - 2*test_metadata['A1_std_flux']
test_metadata['A1_minus_1_sigma'] = test_metadata['A1_mean_flux'] - 1*test_metadata['A1_std_flux']
test_metadata['A1_plus_1_sigma'] = test_metadata['A1_mean_flux'] + 1*test_metadata['A1_std_flux']
test_metadata['A1_plus_2_sigma'] = test_metadata['A1_mean_flux'] + 2*test_metadata['A1_std_flux']
test_metadata['A1_plus_3_sigma'] = test_metadata['A1_mean_flux'] + 3*test_metadata['A1_std_flux']
#A2
test_metadata['A2_max_min_diff_flux'] = test_metadata['A2_max_flux'] - test_metadata['A2_min_flux']
test_metadata['A2_max_mean_diff_flux'] = test_metadata['A2_max_flux'] - test_metadata['A2_mean_flux']
test_metadata['A2_max_median_diff_flux'] = test_metadata['A2_max_flux'] - test_metadata['A2_median_flux']
test_metadata['A2_median_mean_diff_flux'] = test_metadata['A2_median_flux'] - test_metadata['A2_mean_flux']
test_metadata['A2_median_min_diff_flux'] = test_metadata['A2_median_flux'] - test_metadata['A2_min_flux']
test_metadata['A2_mean_min_diff_flux'] = test_metadata['A2_mean_flux'] - test_metadata['A2_min_flux']
test_metadata['A2_minus_3_sigma'] = test_metadata['A2_mean_flux'] - 3*test_metadata['A2_std_flux']
test_metadata['A2_minus_2_sigma'] = test_metadata['A2_mean_flux'] - 2*test_metadata['A2_std_flux']
test_metadata['A2_minus_1_sigma'] = test_metadata['A2_mean_flux'] - 1*test_metadata['A2_std_flux']
test_metadata['A2_plus_1_sigma'] = test_metadata['A2_mean_flux'] + 1*test_metadata['A2_std_flux']
test_metadata['A2_plus_2_sigma'] = test_metadata['A2_mean_flux'] + 2*test_metadata['A2_std_flux']
test_metadata['A2_plus_3_sigma'] = test_metadata['A2_mean_flux'] + 3*test_metadata['A2_std_flux']
#A3
test_metadata['A3_max_min_diff_flux'] = test_metadata['A3_max_flux'] - test_metadata['A3_min_flux']
test_metadata['A3_max_mean_diff_flux'] = test_metadata['A3_max_flux'] - test_metadata['A3_mean_flux']
test_metadata['A3_max_median_diff_flux'] = test_metadata['A3_max_flux'] - test_metadata['A3_median_flux']
test_metadata['A3_median_mean_diff_flux'] = test_metadata['A3_median_flux'] - test_metadata['A3_mean_flux']
test_metadata['A3_median_min_diff_flux'] = test_metadata['A3_median_flux'] - test_metadata['A3_min_flux']
test_metadata['A3_mean_min_diff_flux'] = test_metadata['A3_mean_flux'] - test_metadata['A3_min_flux']
test_metadata['A3_minus_3_sigma'] = test_metadata['A3_mean_flux'] - 3*test_metadata['A3_std_flux']
test_metadata['A3_minus_2_sigma'] = test_metadata['A3_mean_flux'] - 2*test_metadata['A3_std_flux']
test_metadata['A3_minus_1_sigma'] = test_metadata['A3_mean_flux'] - 1*test_metadata['A3_std_flux']
test_metadata['A3_plus_1_sigma'] = test_metadata['A3_mean_flux'] + 1*test_metadata['A3_std_flux']
test_metadata['A3_plus_2_sigma'] = test_metadata['A3_mean_flux'] + 2*test_metadata['A3_std_flux']
test_metadata['A3_plus_3_sigma'] = test_metadata['A3_mean_flux'] + 3*test_metadata['A3_std_flux']
#A4
test_metadata['A4_max_min_diff_flux'] = test_metadata['A4_max_flux'] - test_metadata['A4_min_flux']
test_metadata['A4_max_mean_diff_flux'] = test_metadata['A4_max_flux'] - test_metadata['A4_mean_flux']
test_metadata['A4_max_median_diff_flux'] = test_metadata['A4_max_flux'] - test_metadata['A4_median_flux']
test_metadata['A4_median_mean_diff_flux'] = test_metadata['A4_median_flux'] - test_metadata['A4_mean_flux']
test_metadata['A4_median_min_diff_flux'] = test_metadata['A4_median_flux'] - test_metadata['A4_min_flux']
test_metadata['A4_mean_min_diff_flux'] = test_metadata['A4_mean_flux'] - test_metadata['A4_min_flux']
test_metadata['A4_minus_3_sigma'] = test_metadata['A4_mean_flux'] - 3*test_metadata['A4_std_flux']
test_metadata['A4_minus_2_sigma'] = test_metadata['A4_mean_flux'] - 2*test_metadata['A4_std_flux']
test_metadata['A4_minus_1_sigma'] = test_metadata['A4_mean_flux'] - 1*test_metadata['A4_std_flux']
test_metadata['A4_plus_1_sigma'] = test_metadata['A4_mean_flux'] + 1*test_metadata['A4_std_flux']
test_metadata['A4_plus_2_sigma'] = test_metadata['A4_mean_flux'] + 2*test_metadata['A4_std_flux']
test_metadata['A4_plus_3_sigma'] = test_metadata['A4_mean_flux'] + 3*test_metadata['A4_std_flux']
#A5
test_metadata['A5_max_min_diff_flux'] = test_metadata['A5_max_flux'] - test_metadata['A5_min_flux']
test_metadata['A5_max_mean_diff_flux'] = test_metadata['A5_max_flux'] - test_metadata['A5_mean_flux']
test_metadata['A5_max_median_diff_flux'] = test_metadata['A5_max_flux'] - test_metadata['A5_median_flux']
test_metadata['A5_median_mean_diff_flux'] = test_metadata['A5_median_flux'] - test_metadata['A5_mean_flux']
test_metadata['A5_median_min_diff_flux'] = test_metadata['A5_median_flux'] - test_metadata['A5_min_flux']
test_metadata['A5_mean_min_diff_flux'] = test_metadata['A5_mean_flux'] - test_metadata['A5_min_flux']
test_metadata['A5_minus_3_sigma'] = test_metadata['A5_mean_flux'] - 3*test_metadata['A5_std_flux']
test_metadata['A5_minus_2_sigma'] = test_metadata['A5_mean_flux'] - 2*test_metadata['A5_std_flux']
test_metadata['A5_minus_1_sigma'] = test_metadata['A5_mean_flux'] - 1*test_metadata['A5_std_flux']
test_metadata['A5_plus_1_sigma'] = test_metadata['A5_mean_flux'] + 1*test_metadata['A5_std_flux']
test_metadata['A5_plus_2_sigma'] = test_metadata['A5_mean_flux'] + 2*test_metadata['A5_std_flux']
test_metadata['A5_plus_3_sigma'] = test_metadata['A5_mean_flux'] + 3*test_metadata['A5_std_flux']

In [19]:
#F1
train_metadata['div_A5_A4_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] / train_metadata['A4_max_min_diff_flux']
train_metadata['div_A5_A3_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] / train_metadata['A3_max_min_diff_flux']
train_metadata['div_A5_A2_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] / train_metadata['A2_max_min_diff_flux']
train_metadata['div_A5_A1_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] / train_metadata['A1_max_min_diff_flux']
train_metadata['div_A5_A0_max_min_flux'] = train_metadata['A5_max_min_diff_flux'] / train_metadata['A0_max_min_diff_flux']
train_metadata['div_A4_A3_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] / train_metadata['A3_max_min_diff_flux']
train_metadata['div_A4_A2_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] / train_metadata['A2_max_min_diff_flux']
train_metadata['div_A4_A1_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] / train_metadata['A1_max_min_diff_flux']
train_metadata['div_A4_A0_max_min_flux'] = train_metadata['A4_max_min_diff_flux'] / train_metadata['A0_max_min_diff_flux']
train_metadata['div_A3_A2_max_min_flux'] = train_metadata['A3_max_min_diff_flux'] / train_metadata['A2_max_min_diff_flux']
train_metadata['div_A3_A1_max_min_flux'] = train_metadata['A3_max_min_diff_flux'] / train_metadata['A1_max_min_diff_flux']
train_metadata['div_A3_A0_max_min_flux'] = train_metadata['A3_max_min_diff_flux'] / train_metadata['A0_max_min_diff_flux']
train_metadata['div_A2_A1_max_min_flux'] = train_metadata['A2_max_min_diff_flux'] / train_metadata['A1_max_min_diff_flux']
train_metadata['div_A2_A0_max_min_flux'] = train_metadata['A2_max_min_diff_flux'] / train_metadata['A0_max_min_diff_flux']
train_metadata['div_A1_A0_max_min_flux'] = train_metadata['A1_max_min_diff_flux'] / train_metadata['A0_max_min_diff_flux']
#F2
train_metadata['div_A5_A4_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] / train_metadata['A4_max_mean_diff_flux']
train_metadata['div_A5_A3_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] / train_metadata['A3_max_mean_diff_flux']
train_metadata['div_A5_A2_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] / train_metadata['A2_max_mean_diff_flux']
train_metadata['div_A5_A1_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] / train_metadata['A1_max_mean_diff_flux']
train_metadata['div_A5_A0_max_mean_flux'] = train_metadata['A5_max_mean_diff_flux'] / train_metadata['A0_max_mean_diff_flux']
train_metadata['div_A4_A3_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] / train_metadata['A3_max_mean_diff_flux']
train_metadata['div_A4_A2_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] / train_metadata['A2_max_mean_diff_flux']
train_metadata['div_A4_A1_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] / train_metadata['A1_max_mean_diff_flux']
train_metadata['div_A4_A0_max_mean_flux'] = train_metadata['A4_max_mean_diff_flux'] / train_metadata['A0_max_mean_diff_flux']
train_metadata['div_A3_A2_max_mean_flux'] = train_metadata['A3_max_mean_diff_flux'] / train_metadata['A2_max_mean_diff_flux']
train_metadata['div_A3_A1_max_mean_flux'] = train_metadata['A3_max_mean_diff_flux'] / train_metadata['A1_max_mean_diff_flux']
train_metadata['div_A3_A0_max_mean_flux'] = train_metadata['A3_max_mean_diff_flux'] / train_metadata['A0_max_mean_diff_flux']
train_metadata['div_A2_A1_max_mean_flux'] = train_metadata['A2_max_mean_diff_flux'] / train_metadata['A1_max_mean_diff_flux']
train_metadata['div_A2_A0_max_mean_flux'] = train_metadata['A2_max_mean_diff_flux'] / train_metadata['A0_max_mean_diff_flux']
train_metadata['div_A1_A0_max_mean_flux'] = train_metadata['A1_max_mean_diff_flux'] / train_metadata['A0_max_mean_diff_flux']
#F3
train_metadata['div_A5_A4_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] / train_metadata['A4_max_median_diff_flux']
train_metadata['div_A5_A3_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] / train_metadata['A3_max_median_diff_flux']
train_metadata['div_A5_A2_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] / train_metadata['A2_max_median_diff_flux']
train_metadata['div_A5_A1_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] / train_metadata['A1_max_median_diff_flux']
train_metadata['div_A5_A0_max_median_flux'] = train_metadata['A5_max_median_diff_flux'] / train_metadata['A0_max_median_diff_flux']
train_metadata['div_A4_A3_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] / train_metadata['A3_max_median_diff_flux']
train_metadata['div_A4_A2_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] / train_metadata['A2_max_median_diff_flux']
train_metadata['div_A4_A1_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] / train_metadata['A1_max_median_diff_flux']
train_metadata['div_A4_A0_max_median_flux'] = train_metadata['A4_max_median_diff_flux'] / train_metadata['A0_max_median_diff_flux']
train_metadata['div_A3_A2_max_median_flux'] = train_metadata['A3_max_median_diff_flux'] / train_metadata['A2_max_median_diff_flux']
train_metadata['div_A3_A1_max_median_flux'] = train_metadata['A3_max_median_diff_flux'] / train_metadata['A1_max_median_diff_flux']
train_metadata['div_A3_A0_max_median_flux'] = train_metadata['A3_max_median_diff_flux'] / train_metadata['A0_max_median_diff_flux']
train_metadata['div_A2_A1_max_median_flux'] = train_metadata['A2_max_median_diff_flux'] / train_metadata['A1_max_median_diff_flux']
train_metadata['div_A2_A0_max_median_flux'] = train_metadata['A2_max_median_diff_flux'] / train_metadata['A0_max_median_diff_flux']
train_metadata['div_A1_A0_max_median_flux'] = train_metadata['A1_max_median_diff_flux'] / train_metadata['A0_max_median_diff_flux']
#F4
train_metadata['div_A5_A4_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] / train_metadata['A4_median_mean_diff_flux']
train_metadata['div_A5_A3_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] / train_metadata['A3_median_mean_diff_flux']
train_metadata['div_A5_A2_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] / train_metadata['A2_median_mean_diff_flux']
train_metadata['div_A5_A1_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] / train_metadata['A1_median_mean_diff_flux']
train_metadata['div_A5_A0_median_mean_flux'] = train_metadata['A5_median_mean_diff_flux'] / train_metadata['A0_median_mean_diff_flux']
train_metadata['div_A4_A3_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] / train_metadata['A3_median_mean_diff_flux']
train_metadata['div_A4_A2_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] / train_metadata['A2_median_mean_diff_flux']
train_metadata['div_A4_A1_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] / train_metadata['A1_median_mean_diff_flux']
train_metadata['div_A4_A0_median_mean_flux'] = train_metadata['A4_median_mean_diff_flux'] / train_metadata['A0_median_mean_diff_flux']
train_metadata['div_A3_A2_median_mean_flux'] = train_metadata['A3_median_mean_diff_flux'] / train_metadata['A2_median_mean_diff_flux']
train_metadata['div_A3_A1_median_mean_flux'] = train_metadata['A3_median_mean_diff_flux'] / train_metadata['A1_median_mean_diff_flux']
train_metadata['div_A3_A0_median_mean_flux'] = train_metadata['A3_median_mean_diff_flux'] / train_metadata['A0_median_mean_diff_flux']
train_metadata['div_A2_A1_median_mean_flux'] = train_metadata['A2_median_mean_diff_flux'] / train_metadata['A1_median_mean_diff_flux']
train_metadata['div_A2_A0_median_mean_flux'] = train_metadata['A2_median_mean_diff_flux'] / train_metadata['A0_median_mean_diff_flux']
train_metadata['div_A1_A0_median_mean_flux'] = train_metadata['A1_median_mean_diff_flux'] / train_metadata['A0_median_mean_diff_flux']
#F5
train_metadata['div_A5_A4_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] / train_metadata['A4_median_min_diff_flux']
train_metadata['div_A5_A3_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] / train_metadata['A3_median_min_diff_flux']
train_metadata['div_A5_A2_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] / train_metadata['A2_median_min_diff_flux']
train_metadata['div_A5_A1_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] / train_metadata['A1_median_min_diff_flux']
train_metadata['div_A5_A0_median_min_flux'] = train_metadata['A5_median_min_diff_flux'] / train_metadata['A0_median_min_diff_flux']
train_metadata['div_A4_A3_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] / train_metadata['A3_median_min_diff_flux']
train_metadata['div_A4_A2_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] / train_metadata['A2_median_min_diff_flux']
train_metadata['div_A4_A1_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] / train_metadata['A1_median_min_diff_flux']
train_metadata['div_A4_A0_median_min_flux'] = train_metadata['A4_median_min_diff_flux'] / train_metadata['A0_median_min_diff_flux']
train_metadata['div_A3_A2_median_min_flux'] = train_metadata['A3_median_min_diff_flux'] / train_metadata['A2_median_min_diff_flux']
train_metadata['div_A3_A1_median_min_flux'] = train_metadata['A3_median_min_diff_flux'] / train_metadata['A1_median_min_diff_flux']
train_metadata['div_A3_A0_median_min_flux'] = train_metadata['A3_median_min_diff_flux'] / train_metadata['A0_median_min_diff_flux']
train_metadata['div_A2_A1_median_min_flux'] = train_metadata['A2_median_min_diff_flux'] / train_metadata['A1_median_min_diff_flux']
train_metadata['div_A2_A0_median_min_flux'] = train_metadata['A2_median_min_diff_flux'] / train_metadata['A0_median_min_diff_flux']
train_metadata['div_A1_A0_median_min_flux'] = train_metadata['A1_median_min_diff_flux'] / train_metadata['A0_median_min_diff_flux']
#F6
train_metadata['div_A5_A4_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] / train_metadata['A4_mean_min_diff_flux']
train_metadata['div_A5_A3_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] / train_metadata['A3_mean_min_diff_flux']
train_metadata['div_A5_A2_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] / train_metadata['A2_mean_min_diff_flux']
train_metadata['div_A5_A1_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] / train_metadata['A1_mean_min_diff_flux']
train_metadata['div_A5_A0_mean_min_flux'] = train_metadata['A5_mean_min_diff_flux'] / train_metadata['A0_mean_min_diff_flux']
train_metadata['div_A4_A3_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] / train_metadata['A3_mean_min_diff_flux']
train_metadata['div_A4_A2_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] / train_metadata['A2_mean_min_diff_flux']
train_metadata['div_A4_A1_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] / train_metadata['A1_mean_min_diff_flux']
train_metadata['div_A4_A0_mean_min_flux'] = train_metadata['A4_mean_min_diff_flux'] / train_metadata['A0_mean_min_diff_flux']
train_metadata['div_A3_A2_mean_min_flux'] = train_metadata['A3_mean_min_diff_flux'] / train_metadata['A2_mean_min_diff_flux']
train_metadata['div_A3_A1_mean_min_flux'] = train_metadata['A3_mean_min_diff_flux'] / train_metadata['A1_mean_min_diff_flux']
train_metadata['div_A3_A0_mean_min_flux'] = train_metadata['A3_mean_min_diff_flux'] / train_metadata['A0_mean_min_diff_flux']
train_metadata['div_A2_A1_mean_min_flux'] = train_metadata['A2_mean_min_diff_flux'] / train_metadata['A1_mean_min_diff_flux']
train_metadata['div_A2_A0_mean_min_flux'] = train_metadata['A2_mean_min_diff_flux'] / train_metadata['A0_mean_min_diff_flux']
train_metadata['div_A1_A0_mean_min_flux'] = train_metadata['A1_mean_min_diff_flux'] / train_metadata['A0_mean_min_diff_flux']
#F7
train_metadata['div_A5_A4_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] / train_metadata['A4_plus_3_sigma']
train_metadata['div_A5_A3_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] / train_metadata['A3_plus_3_sigma']
train_metadata['div_A5_A2_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] / train_metadata['A2_plus_3_sigma']
train_metadata['div_A5_A1_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] / train_metadata['A1_plus_3_sigma']
train_metadata['div_A5_A0_plus_3_sigma'] = train_metadata['A5_plus_3_sigma'] / train_metadata['A0_plus_3_sigma']
train_metadata['div_A4_A3_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] / train_metadata['A3_plus_3_sigma']
train_metadata['div_A4_A2_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] / train_metadata['A2_plus_3_sigma']
train_metadata['div_A4_A1_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] / train_metadata['A1_plus_3_sigma']
train_metadata['div_A4_A0_plus_3_sigma'] = train_metadata['A4_plus_3_sigma'] / train_metadata['A0_plus_3_sigma']
train_metadata['div_A3_A2_plus_3_sigma'] = train_metadata['A3_plus_3_sigma'] / train_metadata['A2_plus_3_sigma']
train_metadata['div_A3_A1_plus_3_sigma'] = train_metadata['A3_plus_3_sigma'] / train_metadata['A1_plus_3_sigma']
train_metadata['div_A3_A0_plus_3_sigma'] = train_metadata['A3_plus_3_sigma'] / train_metadata['A0_plus_3_sigma']
train_metadata['div_A2_A1_plus_3_sigma'] = train_metadata['A2_plus_3_sigma'] / train_metadata['A1_plus_3_sigma']
train_metadata['div_A2_A0_plus_3_sigma'] = train_metadata['A2_plus_3_sigma'] / train_metadata['A0_plus_3_sigma']
train_metadata['div_A1_A0_plus_3_sigma'] = train_metadata['A1_plus_3_sigma'] / train_metadata['A0_plus_3_sigma']
#F8
train_metadata['div_A5_A4_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] / train_metadata['A4_plus_2_sigma']
train_metadata['div_A5_A3_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] / train_metadata['A3_plus_2_sigma']
train_metadata['div_A5_A2_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] / train_metadata['A2_plus_2_sigma']
train_metadata['div_A5_A1_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] / train_metadata['A1_plus_2_sigma']
train_metadata['div_A5_A0_plus_2_sigma'] = train_metadata['A5_plus_2_sigma'] / train_metadata['A0_plus_2_sigma']
train_metadata['div_A4_A3_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] / train_metadata['A3_plus_2_sigma']
train_metadata['div_A4_A2_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] / train_metadata['A2_plus_2_sigma']
train_metadata['div_A4_A1_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] / train_metadata['A1_plus_2_sigma']
train_metadata['div_A4_A0_plus_2_sigma'] = train_metadata['A4_plus_2_sigma'] / train_metadata['A0_plus_2_sigma']
train_metadata['div_A3_A2_plus_2_sigma'] = train_metadata['A3_plus_2_sigma'] / train_metadata['A2_plus_2_sigma']
train_metadata['div_A3_A1_plus_2_sigma'] = train_metadata['A3_plus_2_sigma'] / train_metadata['A1_plus_2_sigma']
train_metadata['div_A3_A0_plus_2_sigma'] = train_metadata['A3_plus_2_sigma'] / train_metadata['A0_plus_2_sigma']
train_metadata['div_A2_A1_plus_2_sigma'] = train_metadata['A2_plus_2_sigma'] / train_metadata['A1_plus_2_sigma']
train_metadata['div_A2_A0_plus_2_sigma'] = train_metadata['A2_plus_2_sigma'] / train_metadata['A0_plus_2_sigma']
train_metadata['div_A1_A0_plus_2_sigma'] = train_metadata['A1_plus_2_sigma'] / train_metadata['A0_plus_2_sigma']
#F9
train_metadata['div_A5_A4_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] / train_metadata['A4_plus_1_sigma']
train_metadata['div_A5_A3_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] / train_metadata['A3_plus_1_sigma']
train_metadata['div_A5_A2_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] / train_metadata['A2_plus_1_sigma']
train_metadata['div_A5_A1_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] / train_metadata['A1_plus_1_sigma']
train_metadata['div_A5_A0_plus_1_sigma'] = train_metadata['A5_plus_1_sigma'] / train_metadata['A0_plus_1_sigma']
train_metadata['div_A4_A3_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] / train_metadata['A3_plus_1_sigma']
train_metadata['div_A4_A2_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] / train_metadata['A2_plus_1_sigma']
train_metadata['div_A4_A1_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] / train_metadata['A1_plus_1_sigma']
train_metadata['div_A4_A0_plus_1_sigma'] = train_metadata['A4_plus_1_sigma'] / train_metadata['A0_plus_1_sigma']
train_metadata['div_A3_A2_plus_1_sigma'] = train_metadata['A3_plus_1_sigma'] / train_metadata['A2_plus_1_sigma']
train_metadata['div_A3_A1_plus_1_sigma'] = train_metadata['A3_plus_1_sigma'] / train_metadata['A1_plus_1_sigma']
train_metadata['div_A3_A0_plus_1_sigma'] = train_metadata['A3_plus_1_sigma'] / train_metadata['A0_plus_1_sigma']
train_metadata['div_A2_A1_plus_1_sigma'] = train_metadata['A2_plus_1_sigma'] / train_metadata['A1_plus_1_sigma']
train_metadata['div_A2_A0_plus_1_sigma'] = train_metadata['A2_plus_1_sigma'] / train_metadata['A0_plus_1_sigma']
train_metadata['div_A1_A0_plus_1_sigma'] = train_metadata['A1_plus_1_sigma'] / train_metadata['A0_plus_1_sigma']
#F10
train_metadata['div_A5_A4_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] / train_metadata['A4_minus_3_sigma']
train_metadata['div_A5_A3_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] / train_metadata['A3_minus_3_sigma']
train_metadata['div_A5_A2_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] / train_metadata['A2_minus_3_sigma']
train_metadata['div_A5_A1_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] / train_metadata['A1_minus_3_sigma']
train_metadata['div_A5_A0_minus_3_sigma'] = train_metadata['A5_minus_3_sigma'] / train_metadata['A0_minus_3_sigma']
train_metadata['div_A4_A3_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] / train_metadata['A3_minus_3_sigma']
train_metadata['div_A4_A2_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] / train_metadata['A2_minus_3_sigma']
train_metadata['div_A4_A1_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] / train_metadata['A1_minus_3_sigma']
train_metadata['div_A4_A0_minus_3_sigma'] = train_metadata['A4_minus_3_sigma'] / train_metadata['A0_minus_3_sigma']
train_metadata['div_A3_A2_minus_3_sigma'] = train_metadata['A3_minus_3_sigma'] / train_metadata['A2_minus_3_sigma']
train_metadata['div_A3_A1_minus_3_sigma'] = train_metadata['A3_minus_3_sigma'] / train_metadata['A1_minus_3_sigma']
train_metadata['div_A3_A0_minus_3_sigma'] = train_metadata['A3_minus_3_sigma'] / train_metadata['A0_minus_3_sigma']
train_metadata['div_A2_A1_minus_3_sigma'] = train_metadata['A2_minus_3_sigma'] / train_metadata['A1_minus_3_sigma']
train_metadata['div_A2_A0_minus_3_sigma'] = train_metadata['A2_minus_3_sigma'] / train_metadata['A0_minus_3_sigma']
train_metadata['div_A1_A0_minus_3_sigma'] = train_metadata['A1_minus_3_sigma'] / train_metadata['A0_minus_3_sigma']
#F11
train_metadata['div_A5_A4_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] / train_metadata['A4_minus_2_sigma']
train_metadata['div_A5_A3_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] / train_metadata['A3_minus_2_sigma']
train_metadata['div_A5_A2_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] / train_metadata['A2_minus_2_sigma']
train_metadata['div_A5_A1_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] / train_metadata['A1_minus_2_sigma']
train_metadata['div_A5_A0_minus_2_sigma'] = train_metadata['A5_minus_2_sigma'] / train_metadata['A0_minus_2_sigma']
train_metadata['div_A4_A3_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] / train_metadata['A3_minus_2_sigma']
train_metadata['div_A4_A2_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] / train_metadata['A2_minus_2_sigma']
train_metadata['div_A4_A1_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] / train_metadata['A1_minus_2_sigma']
train_metadata['div_A4_A0_minus_2_sigma'] = train_metadata['A4_minus_2_sigma'] / train_metadata['A0_minus_2_sigma']
train_metadata['div_A3_A2_minus_2_sigma'] = train_metadata['A3_minus_2_sigma'] / train_metadata['A2_minus_2_sigma']
train_metadata['div_A3_A1_minus_2_sigma'] = train_metadata['A3_minus_2_sigma'] / train_metadata['A1_minus_2_sigma']
train_metadata['div_A3_A0_minus_2_sigma'] = train_metadata['A3_minus_2_sigma'] / train_metadata['A0_minus_2_sigma']
train_metadata['div_A2_A1_minus_2_sigma'] = train_metadata['A2_minus_2_sigma'] / train_metadata['A1_minus_2_sigma']
train_metadata['div_A2_A0_minus_2_sigma'] = train_metadata['A2_minus_2_sigma'] / train_metadata['A0_minus_2_sigma']
train_metadata['div_A1_A0_minus_2_sigma'] = train_metadata['A1_minus_2_sigma'] / train_metadata['A0_minus_2_sigma']
#F12
train_metadata['div_A5_A4_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] / train_metadata['A4_minus_1_sigma']
train_metadata['div_A5_A3_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] / train_metadata['A3_minus_1_sigma']
train_metadata['div_A5_A2_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] / train_metadata['A2_minus_1_sigma']
train_metadata['div_A5_A1_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] / train_metadata['A1_minus_1_sigma']
train_metadata['div_A5_A0_minus_1_sigma'] = train_metadata['A5_minus_1_sigma'] / train_metadata['A0_minus_1_sigma']
train_metadata['div_A4_A3_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] / train_metadata['A3_minus_1_sigma']
train_metadata['div_A4_A2_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] / train_metadata['A2_minus_1_sigma']
train_metadata['div_A4_A1_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] / train_metadata['A1_minus_1_sigma']
train_metadata['div_A4_A0_minus_1_sigma'] = train_metadata['A4_minus_1_sigma'] / train_metadata['A0_minus_1_sigma']
train_metadata['div_A3_A2_minus_1_sigma'] = train_metadata['A3_minus_1_sigma'] / train_metadata['A2_minus_1_sigma']
train_metadata['div_A3_A1_minus_1_sigma'] = train_metadata['A3_minus_1_sigma'] / train_metadata['A1_minus_1_sigma']
train_metadata['div_A3_A0_minus_1_sigma'] = train_metadata['A3_minus_1_sigma'] / train_metadata['A0_minus_1_sigma']
train_metadata['div_A2_A1_minus_1_sigma'] = train_metadata['A2_minus_1_sigma'] / train_metadata['A1_minus_1_sigma']
train_metadata['div_A2_A0_minus_1_sigma'] = train_metadata['A2_minus_1_sigma'] / train_metadata['A0_minus_1_sigma']
train_metadata['div_A1_A0_minus_1_sigma'] = train_metadata['A1_minus_1_sigma'] / train_metadata['A0_minus_1_sigma']

In [20]:
print(train_metadata.shape)

(7848, 283)


In [41]:
train_metadata.iloc[:,103:].head()

Unnamed: 0,div_A5_A4_max_min_flux,div_A5_A3_max_min_flux,div_A5_A2_max_min_flux,div_A5_A1_max_min_flux,div_A5_A0_max_min_flux,div_A4_A3_max_min_flux,div_A4_A2_max_min_flux,div_A4_A1_max_min_flux,div_A4_A0_max_min_flux,div_A3_A2_max_min_flux,div_A3_A1_max_min_flux,div_A3_A0_max_min_flux,div_A2_A1_max_min_flux,div_A2_A0_max_min_flux,div_A1_A0_max_min_flux,div_A5_A4_max_mean_flux,div_A5_A3_max_mean_flux,div_A5_A2_max_mean_flux,div_A5_A1_max_mean_flux,div_A5_A0_max_mean_flux,div_A4_A3_max_mean_flux,div_A4_A2_max_mean_flux,div_A4_A1_max_mean_flux,div_A4_A0_max_mean_flux,div_A3_A2_max_mean_flux,div_A3_A1_max_mean_flux,div_A3_A0_max_mean_flux,div_A2_A1_max_mean_flux,div_A2_A0_max_mean_flux,div_A1_A0_max_mean_flux,div_A5_A4_max_median_flux,div_A5_A3_max_median_flux,div_A5_A2_max_median_flux,div_A5_A1_max_median_flux,div_A5_A0_max_median_flux,div_A4_A3_max_median_flux,div_A4_A2_max_median_flux,div_A4_A1_max_median_flux,div_A4_A0_max_median_flux,div_A3_A2_max_median_flux,div_A3_A1_max_median_flux,div_A3_A0_max_median_flux,div_A2_A1_max_median_flux,div_A2_A0_max_median_flux,div_A1_A0_max_median_flux,div_A5_A4_median_mean_flux,div_A5_A3_median_mean_flux,div_A5_A2_median_mean_flux,div_A5_A1_median_mean_flux,div_A5_A0_median_mean_flux,div_A4_A3_median_mean_flux,div_A4_A2_median_mean_flux,div_A4_A1_median_mean_flux,div_A4_A0_median_mean_flux,div_A3_A2_median_mean_flux,div_A3_A1_median_mean_flux,div_A3_A0_median_mean_flux,div_A2_A1_median_mean_flux,div_A2_A0_median_mean_flux,div_A1_A0_median_mean_flux,div_A5_A4_median_min_flux,div_A5_A3_median_min_flux,div_A5_A2_median_min_flux,div_A5_A1_median_min_flux,div_A5_A0_median_min_flux,div_A4_A3_median_min_flux,div_A4_A2_median_min_flux,div_A4_A1_median_min_flux,div_A4_A0_median_min_flux,div_A3_A2_median_min_flux,div_A3_A1_median_min_flux,div_A3_A0_median_min_flux,div_A2_A1_median_min_flux,div_A2_A0_median_min_flux,div_A1_A0_median_min_flux,div_A5_A4_mean_min_flux,div_A5_A3_mean_min_flux,div_A5_A2_mean_min_flux,div_A5_A1_mean_min_flux,div_A5_A0_mean_min_flux,div_A4_A3_mean_min_flux,div_A4_A2_mean_min_flux,div_A4_A1_mean_min_flux,div_A4_A0_mean_min_flux,div_A3_A2_mean_min_flux,div_A3_A1_mean_min_flux,div_A3_A0_mean_min_flux,div_A2_A1_mean_min_flux,div_A2_A0_mean_min_flux,div_A1_A0_mean_min_flux,div_A5_A4_plus_3_sigma,div_A5_A3_plus_3_sigma,div_A5_A2_plus_3_sigma,div_A5_A1_plus_3_sigma,div_A5_A0_plus_3_sigma,div_A4_A3_plus_3_sigma,div_A4_A2_plus_3_sigma,div_A4_A1_plus_3_sigma,div_A4_A0_plus_3_sigma,div_A3_A2_plus_3_sigma,div_A3_A1_plus_3_sigma,div_A3_A0_plus_3_sigma,div_A2_A1_plus_3_sigma,div_A2_A0_plus_3_sigma,div_A1_A0_plus_3_sigma,div_A5_A4_plus_2_sigma,div_A5_A3_plus_2_sigma,div_A5_A2_plus_2_sigma,div_A5_A1_plus_2_sigma,div_A5_A0_plus_2_sigma,div_A4_A3_plus_2_sigma,div_A4_A2_plus_2_sigma,div_A4_A1_plus_2_sigma,div_A4_A0_plus_2_sigma,div_A3_A2_plus_2_sigma,div_A3_A1_plus_2_sigma,div_A3_A0_plus_2_sigma,div_A2_A1_plus_2_sigma,div_A2_A0_plus_2_sigma,div_A1_A0_plus_2_sigma,div_A5_A4_plus_1_sigma,div_A5_A3_plus_1_sigma,div_A5_A2_plus_1_sigma,div_A5_A1_plus_1_sigma,div_A5_A0_plus_1_sigma,div_A4_A3_plus_1_sigma,div_A4_A2_plus_1_sigma,div_A4_A1_plus_1_sigma,div_A4_A0_plus_1_sigma,div_A3_A2_plus_1_sigma,div_A3_A1_plus_1_sigma,div_A3_A0_plus_1_sigma,div_A2_A1_plus_1_sigma,div_A2_A0_plus_1_sigma,div_A1_A0_plus_1_sigma,div_A5_A4_minus_3_sigma,div_A5_A3_minus_3_sigma,div_A5_A2_minus_3_sigma,div_A5_A1_minus_3_sigma,div_A5_A0_minus_3_sigma,div_A4_A3_minus_3_sigma,div_A4_A2_minus_3_sigma,div_A4_A1_minus_3_sigma,div_A4_A0_minus_3_sigma,div_A3_A2_minus_3_sigma,div_A3_A1_minus_3_sigma,div_A3_A0_minus_3_sigma,div_A2_A1_minus_3_sigma,div_A2_A0_minus_3_sigma,div_A1_A0_minus_3_sigma,div_A5_A4_minus_2_sigma,div_A5_A3_minus_2_sigma,div_A5_A2_minus_2_sigma,div_A5_A1_minus_2_sigma,div_A5_A0_minus_2_sigma,div_A4_A3_minus_2_sigma,div_A4_A2_minus_2_sigma,div_A4_A1_minus_2_sigma,div_A4_A0_minus_2_sigma,div_A3_A2_minus_2_sigma,div_A3_A1_minus_2_sigma,div_A3_A0_minus_2_sigma,div_A2_A1_minus_2_sigma,div_A2_A0_minus_2_sigma,div_A1_A0_minus_2_sigma,div_A5_A4_minus_1_sigma,div_A5_A3_minus_1_sigma,div_A5_A2_minus_1_sigma,div_A5_A1_minus_1_sigma,div_A5_A0_minus_1_sigma,div_A4_A3_minus_1_sigma,div_A4_A2_minus_1_sigma,div_A4_A1_minus_1_sigma,div_A4_A0_minus_1_sigma,div_A3_A2_minus_1_sigma,div_A3_A1_minus_1_sigma,div_A3_A0_minus_1_sigma,div_A2_A1_minus_1_sigma,div_A2_A0_minus_1_sigma,div_A1_A0_minus_1_sigma
0,0.996101,0.820501,0.619021,0.454954,3.308618,0.823712,0.621443,0.456734,3.321567,0.754443,0.554483,4.032437,0.734957,5.344923,7.27243,0.97198,0.750895,0.57046,0.406754,3.313973,0.772542,0.586905,0.41848,3.409509,0.759706,0.541692,4.413362,0.713028,5.8093,8.147367,0.955133,0.762889,0.528276,0.40371,3.429876,0.798726,0.553091,0.422675,3.590992,0.692467,0.529186,4.495902,0.764204,6.492584,8.49588,0.800106,0.928725,0.289202,0.372549,5.631758,1.160752,0.361455,0.465624,7.038766,0.311397,0.40114,6.06397,1.288194,19.473429,15.116844,1.058522,0.915556,0.810406,0.551129,3.155258,0.864938,0.765601,0.520659,2.980815,0.885151,0.601961,3.446276,0.680065,3.893431,5.725084,1.024944,0.916875,0.685156,0.525574,3.302566,0.89456,0.668482,0.512783,3.222191,0.747274,0.573224,3.601982,0.767087,4.820164,6.283727,1.021274,0.945454,0.679724,0.58954,3.366682,0.925759,0.665565,0.57726,3.296552,0.71894,0.623553,3.560918,0.867322,4.95301,5.710693,1.027398,0.986107,0.698508,0.662908,3.292796,0.95981,0.67988,0.645229,3.204984,0.708349,0.672247,3.339186,0.949034,4.714044,4.967201,1.048679,1.154012,0.770558,1.145112,3.065177,1.100444,0.734789,1.091957,2.922894,0.667721,0.992288,2.656105,1.486082,3.977867,2.676748,1.000455,0.826509,0.621395,0.425249,3.652801,0.826133,0.621113,0.425055,3.651141,0.751831,0.514512,4.419555,0.684345,5.878388,8.589802,0.996009,0.804351,0.609934,0.400792,3.722063,0.807574,0.612378,0.402398,3.736978,0.758294,0.49828,4.62741,0.657107,6.1024,9.286767,0.984102,0.749634,0.58077,0.346531,3.924681,0.761744,0.590153,0.352129,3.988084,0.774739,0.462267,5.235462,0.596674,6.757715,11.325635
1,1.310531,1.221309,1.406961,1.390092,0.991008,0.931919,1.073581,1.060709,0.756188,1.152011,1.138198,0.811431,0.98801,0.704361,0.712909,1.544068,1.344863,1.462921,1.632183,0.961395,0.870987,0.947446,1.057067,0.622638,1.087784,1.213642,0.714865,1.115702,0.657175,0.589024,1.622761,1.511187,1.618656,1.778428,0.97883,0.931244,0.997471,1.095928,0.603188,1.071116,1.176842,0.647723,1.098707,0.604718,0.55039,-6.188008,-0.732517,-0.989055,-1.45875,1.7769,0.118377,0.159834,0.235738,-0.287152,1.350214,1.991421,-2.425745,1.474892,-1.796563,-1.218098,1.022056,0.953112,1.180482,1.052843,1.009433,0.932544,1.155007,1.030123,0.987649,1.238555,1.104638,1.059091,0.891876,0.855102,0.958768,1.090497,1.087966,1.338649,1.160466,1.033476,0.997679,1.227558,1.064163,0.947711,1.230415,1.066639,0.949916,0.866894,0.772029,0.89057,1.063717,1.061193,1.179808,1.209161,1.046612,0.997628,1.109138,1.136732,0.983919,1.111775,1.139435,0.986259,1.024879,0.887103,0.865569,1.040311,1.040299,1.153224,1.191193,1.077117,0.999988,1.108538,1.145036,1.035379,1.108551,1.145049,1.035392,1.032925,0.934005,0.904233,0.962441,0.970065,1.064985,1.12948,1.206411,1.007922,1.106546,1.173558,1.253492,1.097849,1.164335,1.24364,1.06056,1.132796,1.068112,1.146868,1.134633,1.274485,1.270936,0.959082,0.989332,1.111274,1.10818,0.836262,1.123257,1.12013,0.845279,0.997216,0.752525,0.754626,1.165381,1.150818,1.295614,1.284271,0.943054,0.987503,1.111751,1.102017,0.809223,1.12582,1.115963,0.819464,0.991245,0.727882,0.734311,1.21636,1.195078,1.353889,1.320233,0.903837,0.982503,1.113066,1.085396,0.743067,1.132888,1.104725,0.7563,0.975141,0.667586,0.684604
2,1.414364,1.703914,2.787615,7.315548,7.069664,1.204721,1.970931,5.172323,4.998475,1.636006,4.293378,4.149072,2.624304,2.536098,0.966389,1.16063,1.409901,2.30026,7.703637,7.140051,1.214772,1.981906,6.637463,6.151876,1.631505,5.463957,5.064222,3.349029,3.104019,0.926842,1.114848,1.360236,2.183456,8.107528,7.564525,1.220109,1.958523,7.272318,6.785254,1.605203,5.960383,5.561185,3.713164,3.464474,0.933025,0.604275,0.775267,1.045873,-65.899307,-27.66599,1.282971,1.730791,-109.055248,-45.783811,1.349049,-85.002109,-35.68577,-63.008911,-26.452538,0.419822,3.172566,3.558597,6.494436,6.088671,6.229084,1.121678,2.047061,1.919163,1.963421,1.824999,1.710975,1.750433,0.937521,0.959142,1.023061,2.336039,2.73216,4.513387,6.705868,6.946085,1.16957,1.932068,2.870614,2.973446,1.651948,2.454419,2.542342,1.485773,1.538996,1.035822,1.222227,1.601376,2.334343,8.006361,8.189276,1.310211,1.909909,6.550632,6.700289,1.457711,4.999676,5.113899,3.429814,3.508172,1.022846,1.212811,1.590453,2.308084,8.308997,8.644825,1.311378,1.903086,6.851025,7.127926,1.451211,5.224295,5.435447,3.599955,3.745456,1.040417,1.189964,1.563871,2.245146,9.17307,10.036072,1.314217,1.886734,7.708696,8.43393,1.435634,5.865618,6.417455,4.085734,4.47012,1.09408,1.274477,1.661631,2.483536,6.714952,6.406652,1.303775,1.948671,5.268791,5.026889,1.494637,4.04118,3.85564,2.703787,2.57965,0.954088,1.293021,1.682875,2.537953,6.37024,5.970633,1.301506,1.962809,4.926634,4.617585,1.508106,3.785332,3.547877,2.509991,2.352539,0.93727,1.373616,1.774349,2.783931,5.277721,4.684914,1.291736,2.026717,3.84221,3.410644,1.568987,2.974454,2.640356,1.89578,1.682841,0.887678
3,0.762138,0.728942,0.680687,0.774836,6.933482,0.956444,0.893128,1.016661,9.097412,0.933801,1.062959,9.511701,1.138315,10.186009,8.948324,0.766969,0.692484,0.619504,0.701073,8.06383,0.902884,0.80773,0.914083,10.513898,0.894611,1.012403,11.644788,1.131669,13.016594,11.502126,0.760921,0.687324,0.629683,0.725159,8.183119,0.90328,0.827527,0.953002,10.754236,0.916136,1.055046,11.905759,1.151626,12.995625,11.284587,0.674005,0.612675,0.86103,1.654922,10.794392,0.909007,1.277483,2.455357,16.01531,1.405362,2.701143,17.618475,1.922026,12.536608,6.522599,0.77538,2.061361,5.029966,2.883528,2.636134,2.658518,6.4871,3.718858,3.399797,2.440119,1.398846,1.278832,0.57327,0.524086,0.914205,0.733368,1.084596,1.768561,2.247915,3.701692,1.478924,2.411559,3.065193,5.047522,1.630617,2.072583,3.412969,1.271042,2.093054,1.646723,0.791573,0.745303,0.842223,1.063537,5.95988,0.941547,1.063987,1.343574,7.529165,1.130041,1.426986,7.996592,1.262774,7.076371,5.603832,0.793041,0.745319,0.853844,1.090497,5.960927,0.939824,1.07667,1.375082,7.516541,1.145608,1.463128,7.997821,1.277162,6.981286,5.466249,0.796612,0.745359,0.88329,1.161643,5.963457,0.93566,1.108808,1.458229,7.486022,1.185053,1.558503,8.00079,1.315133,6.751417,5.13364,0.783457,0.745211,0.782641,0.934329,5.954032,0.951183,0.998959,1.192573,7.599696,1.050227,1.253778,7.989729,1.193816,7.607619,6.372521,0.78059,0.745178,0.763297,0.895363,5.95194,0.954635,0.977847,1.147034,7.624928,1.024315,1.201543,7.987273,1.17302,7.79767,6.647515,0.768211,0.745033,0.688379,0.756446,5.942746,0.969829,0.89608,0.984685,7.735825,0.923957,1.015318,7.976482,1.098881,8.632961,7.85614
4,0.749439,0.841533,1.103557,3.016048,9.889732,1.122884,1.472511,4.024407,13.196179,1.311365,3.583991,11.752039,2.733024,8.961685,3.279037,0.764887,0.794371,1.060492,3.142892,21.897495,1.038547,1.386469,4.108964,28.628412,1.335009,3.956454,27.565829,2.963617,20.648427,6.967307,0.75559,0.772197,1.01325,2.976005,22.565594,1.021979,1.341006,3.938652,29.864877,1.312166,3.853946,29.222594,2.937088,22.270509,7.582513,0.603286,0.4888,0.526295,1.415197,61.604279,0.81023,0.87238,2.345815,102.114591,1.076707,2.895244,126.031551,2.688982,117.052836,43.530541,0.701732,3.364922,4.316788,3.397885,1.73765,4.795166,6.15162,4.842141,2.47623,1.282879,1.009796,0.516401,0.787133,0.402533,0.511392,0.671356,1.278739,1.440406,2.447255,2.378531,1.90471,2.145516,3.645239,3.542873,1.126427,1.913803,1.86006,1.699003,1.651292,0.971918,0.786714,0.792011,0.959532,2.458961,9.19762,1.006734,1.21967,3.12561,11.691188,1.211513,3.104704,11.612991,2.562668,9.585531,3.74045,0.782018,0.782142,0.941047,2.387082,9.248265,1.000158,1.203357,3.052464,11.826156,1.203166,3.051981,11.824284,2.536624,9.827639,3.874298,0.77046,0.758391,0.89759,2.223721,9.378042,0.984335,1.165005,2.886224,12.172003,1.183545,2.932156,12.365708,2.477435,10.448025,4.217275,0.812078,0.847646,1.068724,2.915611,8.94289,1.043798,1.316036,3.590308,11.01235,1.260814,3.439657,10.550268,2.728123,8.36782,3.067244,0.820808,0.867754,1.110409,3.105885,8.861925,1.057194,1.352824,3.783935,10.796585,1.279636,3.579223,10.212487,2.797064,7.980777,2.853269,0.857196,0.957442,1.312571,4.184136,8.556043,1.116946,1.531238,4.881188,9.981429,1.370915,4.370121,8.936359,3.187741,6.518537,2.044877


In [42]:
print(train_metadata_kaggle.shape)

(7848, 133)


In [26]:
test_id = test_metadata_kaggle['object_id']

In [27]:
def multi_weighted_logloss(y_true, y_preds):
    """
    @author olivier https://www.kaggle.com/ogrellier
    multi logloss for PLAsTiCC challenge
    """
    # class_weights taken from Giba's topic : https://www.kaggle.com/titericz
    # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
    # with Kyle Boone's post https://www.kaggle.com/kyleboone
    classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]
    class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}
    if len(np.unique(y_true)) > 14:
        classes.append(99)
        class_weight[99] = 2
    y_p = y_preds
    # Trasform y_true in dummies
    y_ohe = pd.get_dummies(y_true)
    # Normalize rows and limit y_preds to 1e-15, 1-1e-15
    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)
    # Transform to log
    y_p_log = np.log(y_p)
    # Get the log for ones, .values is used to drop the index of DataFrames
    # Exclude class 99 for now, since there is no class99 in the training set
    # we gave a special process for that class
    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)
    # Get the number of positives for each class
    nb_pos = y_ohe.sum(axis=0).values.astype(float)
    # Weight average and divide by the number of positives
    class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])
    y_w = y_log_ones * class_arr / nb_pos

    loss = - np.sum(y_w) / np.sum(class_arr)
    return loss


def lgb_multi_weighted_logloss(y_true, y_preds):
    """
    @author olivier https://www.kaggle.com/ogrellier
    multi logloss for PLAsTiCC challenge
    """
    # class_weights taken from Giba's topic : https://www.kaggle.com/titericz
    # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
    # with Kyle Boone's post https://www.kaggle.com/kyleboone
    classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]
    class_weight = {6: 1, 15: 2, 16: 1, 42: 1, 52: 1, 53: 1, 62: 1, 64: 2, 65: 1, 67: 1, 88: 1, 90: 1, 92: 1, 95: 1}
    if len(np.unique(y_true)) > 14:
        classes.append(99)
        class_weight[99] = 2
    y_p = y_preds.reshape(y_true.shape[0], len(classes), order='F')

    # Trasform y_true in dummies
    y_ohe = pd.get_dummies(y_true)
    # Normalize rows and limit y_preds to 1e-15, 1-1e-15
    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)
    # Transform to log
    y_p_log = np.log(y_p)
    # Get the log for ones, .values is used to drop the index of DataFrames
    # Exclude class 99 for now, since there is no class99 in the training set
    # we gave a special process for that class
    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)
    # Get the number of positives for each class
    nb_pos = y_ohe.sum(axis=0).values.astype(float)
    # Weight average and divide by the number of positives
    class_arr = np.array([class_weight[k] for k in sorted(class_weight.keys())])
    y_w = y_log_ones * class_arr / nb_pos

    loss = - np.sum(y_w) / np.sum(class_arr)
    return 'wloss', loss, False

In [89]:
"""used_columns1 = ['div_A4_A2_median_min_flux', 'div_A5_A2_median_min_flux','div_A5_A2_minus_1_sigma',
                'div_A5_A4_median_mean_flux','div_A3_A0_plus_1_sigma','div_A4_A1_median_min_flux',
                'div_A4_A1_minus_1_sigma','div_A1_A0_median_min_flux','div_A5_A0_max_median_flux',
                'div_A4_A0_minus_1_sigma']"""

In [137]:
used_columns1 = ['div_A4_A2_median_min_flux', 'div_A5_A2_median_min_flux','div_A5_A2_minus_1_sigma',
                'div_A5_A4_median_mean_flux','div_A3_A0_plus_1_sigma','div_A4_A1_minus_1_sigma']

In [138]:
"""used_columns2 = ['div_A5_A1_median_min_flux','div_A4_A3_max_mean_flux',
                'div_A5_A0_minus_1_sigma','div_A3_A1_max_median_flux',
                'div_A5_A1_median_mean_flux','div_A2_A1_minus_1_sigma',
                'div_A4_A2_max_mean_flux','div_A3_A2_median_min_flux',
                'div_A5_A0_plus_1_sigma']"""

"used_columns2 = ['div_A5_A1_median_min_flux','div_A4_A3_max_mean_flux',\n                'div_A5_A0_minus_1_sigma','div_A3_A1_max_median_flux',\n                'div_A5_A1_median_mean_flux','div_A2_A1_minus_1_sigma',\n                'div_A4_A2_max_mean_flux','div_A3_A2_median_min_flux',\n                'div_A5_A0_plus_1_sigma']"

In [139]:
train_metadata[list(train_metadata.columns)[-180:]].head()

Unnamed: 0,div_A5_A4_max_min_flux,div_A5_A3_max_min_flux,div_A5_A2_max_min_flux,div_A5_A1_max_min_flux,div_A5_A0_max_min_flux,div_A4_A3_max_min_flux,div_A4_A2_max_min_flux,div_A4_A1_max_min_flux,div_A4_A0_max_min_flux,div_A3_A2_max_min_flux,div_A3_A1_max_min_flux,div_A3_A0_max_min_flux,div_A2_A1_max_min_flux,div_A2_A0_max_min_flux,div_A1_A0_max_min_flux,div_A5_A4_max_mean_flux,div_A5_A3_max_mean_flux,div_A5_A2_max_mean_flux,div_A5_A1_max_mean_flux,div_A5_A0_max_mean_flux,div_A4_A3_max_mean_flux,div_A4_A2_max_mean_flux,div_A4_A1_max_mean_flux,div_A4_A0_max_mean_flux,div_A3_A2_max_mean_flux,div_A3_A1_max_mean_flux,div_A3_A0_max_mean_flux,div_A2_A1_max_mean_flux,div_A2_A0_max_mean_flux,div_A1_A0_max_mean_flux,div_A5_A4_max_median_flux,div_A5_A3_max_median_flux,div_A5_A2_max_median_flux,div_A5_A1_max_median_flux,div_A5_A0_max_median_flux,div_A4_A3_max_median_flux,div_A4_A2_max_median_flux,div_A4_A1_max_median_flux,div_A4_A0_max_median_flux,div_A3_A2_max_median_flux,div_A3_A1_max_median_flux,div_A3_A0_max_median_flux,div_A2_A1_max_median_flux,div_A2_A0_max_median_flux,div_A1_A0_max_median_flux,div_A5_A4_median_mean_flux,div_A5_A3_median_mean_flux,div_A5_A2_median_mean_flux,div_A5_A1_median_mean_flux,div_A5_A0_median_mean_flux,div_A4_A3_median_mean_flux,div_A4_A2_median_mean_flux,div_A4_A1_median_mean_flux,div_A4_A0_median_mean_flux,div_A3_A2_median_mean_flux,div_A3_A1_median_mean_flux,div_A3_A0_median_mean_flux,div_A2_A1_median_mean_flux,div_A2_A0_median_mean_flux,div_A1_A0_median_mean_flux,div_A5_A4_median_min_flux,div_A5_A3_median_min_flux,div_A5_A2_median_min_flux,div_A5_A1_median_min_flux,div_A5_A0_median_min_flux,div_A4_A3_median_min_flux,div_A4_A2_median_min_flux,div_A4_A1_median_min_flux,div_A4_A0_median_min_flux,div_A3_A2_median_min_flux,div_A3_A1_median_min_flux,div_A3_A0_median_min_flux,div_A2_A1_median_min_flux,div_A2_A0_median_min_flux,div_A1_A0_median_min_flux,div_A5_A4_mean_min_flux,div_A5_A3_mean_min_flux,div_A5_A2_mean_min_flux,div_A5_A1_mean_min_flux,div_A5_A0_mean_min_flux,div_A4_A3_mean_min_flux,div_A4_A2_mean_min_flux,div_A4_A1_mean_min_flux,div_A4_A0_mean_min_flux,div_A3_A2_mean_min_flux,div_A3_A1_mean_min_flux,div_A3_A0_mean_min_flux,div_A2_A1_mean_min_flux,div_A2_A0_mean_min_flux,div_A1_A0_mean_min_flux,div_A5_A4_plus_3_sigma,div_A5_A3_plus_3_sigma,div_A5_A2_plus_3_sigma,div_A5_A1_plus_3_sigma,div_A5_A0_plus_3_sigma,div_A4_A3_plus_3_sigma,div_A4_A2_plus_3_sigma,div_A4_A1_plus_3_sigma,div_A4_A0_plus_3_sigma,div_A3_A2_plus_3_sigma,div_A3_A1_plus_3_sigma,div_A3_A0_plus_3_sigma,div_A2_A1_plus_3_sigma,div_A2_A0_plus_3_sigma,div_A1_A0_plus_3_sigma,div_A5_A4_plus_2_sigma,div_A5_A3_plus_2_sigma,div_A5_A2_plus_2_sigma,div_A5_A1_plus_2_sigma,div_A5_A0_plus_2_sigma,div_A4_A3_plus_2_sigma,div_A4_A2_plus_2_sigma,div_A4_A1_plus_2_sigma,div_A4_A0_plus_2_sigma,div_A3_A2_plus_2_sigma,div_A3_A1_plus_2_sigma,div_A3_A0_plus_2_sigma,div_A2_A1_plus_2_sigma,div_A2_A0_plus_2_sigma,div_A1_A0_plus_2_sigma,div_A5_A4_plus_1_sigma,div_A5_A3_plus_1_sigma,div_A5_A2_plus_1_sigma,div_A5_A1_plus_1_sigma,div_A5_A0_plus_1_sigma,div_A4_A3_plus_1_sigma,div_A4_A2_plus_1_sigma,div_A4_A1_plus_1_sigma,div_A4_A0_plus_1_sigma,div_A3_A2_plus_1_sigma,div_A3_A1_plus_1_sigma,div_A3_A0_plus_1_sigma,div_A2_A1_plus_1_sigma,div_A2_A0_plus_1_sigma,div_A1_A0_plus_1_sigma,div_A5_A4_minus_3_sigma,div_A5_A3_minus_3_sigma,div_A5_A2_minus_3_sigma,div_A5_A1_minus_3_sigma,div_A5_A0_minus_3_sigma,div_A4_A3_minus_3_sigma,div_A4_A2_minus_3_sigma,div_A4_A1_minus_3_sigma,div_A4_A0_minus_3_sigma,div_A3_A2_minus_3_sigma,div_A3_A1_minus_3_sigma,div_A3_A0_minus_3_sigma,div_A2_A1_minus_3_sigma,div_A2_A0_minus_3_sigma,div_A1_A0_minus_3_sigma,div_A5_A4_minus_2_sigma,div_A5_A3_minus_2_sigma,div_A5_A2_minus_2_sigma,div_A5_A1_minus_2_sigma,div_A5_A0_minus_2_sigma,div_A4_A3_minus_2_sigma,div_A4_A2_minus_2_sigma,div_A4_A1_minus_2_sigma,div_A4_A0_minus_2_sigma,div_A3_A2_minus_2_sigma,div_A3_A1_minus_2_sigma,div_A3_A0_minus_2_sigma,div_A2_A1_minus_2_sigma,div_A2_A0_minus_2_sigma,div_A1_A0_minus_2_sigma,div_A5_A4_minus_1_sigma,div_A5_A3_minus_1_sigma,div_A5_A2_minus_1_sigma,div_A5_A1_minus_1_sigma,div_A5_A0_minus_1_sigma,div_A4_A3_minus_1_sigma,div_A4_A2_minus_1_sigma,div_A4_A1_minus_1_sigma,div_A4_A0_minus_1_sigma,div_A3_A2_minus_1_sigma,div_A3_A1_minus_1_sigma,div_A3_A0_minus_1_sigma,div_A2_A1_minus_1_sigma,div_A2_A0_minus_1_sigma,div_A1_A0_minus_1_sigma
0,0.996101,0.820501,0.619021,0.454954,3.308618,0.823712,0.621443,0.456734,3.321567,0.754443,0.554483,4.032437,0.734957,5.344923,7.27243,0.97198,0.750895,0.57046,0.406754,3.313973,0.772542,0.586905,0.41848,3.409509,0.759706,0.541692,4.413362,0.713028,5.8093,8.147367,0.955133,0.762889,0.528276,0.40371,3.429876,0.798726,0.553091,0.422675,3.590992,0.692467,0.529186,4.495902,0.764204,6.492584,8.49588,0.800106,0.928725,0.289202,0.372549,5.631758,1.160752,0.361455,0.465624,7.038766,0.311397,0.40114,6.06397,1.288194,19.473429,15.116844,1.058522,0.915556,0.810406,0.551129,3.155258,0.864938,0.765601,0.520659,2.980815,0.885151,0.601961,3.446276,0.680065,3.893431,5.725084,1.024944,0.916875,0.685156,0.525574,3.302566,0.89456,0.668482,0.512783,3.222191,0.747274,0.573224,3.601982,0.767087,4.820164,6.283727,1.021274,0.945454,0.679724,0.58954,3.366682,0.925759,0.665565,0.57726,3.296552,0.71894,0.623553,3.560918,0.867322,4.95301,5.710693,1.027398,0.986107,0.698508,0.662908,3.292796,0.95981,0.67988,0.645229,3.204984,0.708349,0.672247,3.339186,0.949034,4.714044,4.967201,1.048679,1.154012,0.770558,1.145112,3.065177,1.100444,0.734789,1.091957,2.922894,0.667721,0.992288,2.656105,1.486082,3.977867,2.676748,1.000455,0.826509,0.621395,0.425249,3.652801,0.826133,0.621113,0.425055,3.651141,0.751831,0.514512,4.419555,0.684345,5.878388,8.589802,0.996009,0.804351,0.609934,0.400792,3.722063,0.807574,0.612378,0.402398,3.736978,0.758294,0.49828,4.62741,0.657107,6.1024,9.286767,0.984102,0.749634,0.58077,0.346531,3.924681,0.761744,0.590153,0.352129,3.988084,0.774739,0.462267,5.235462,0.596674,6.757715,11.325635
1,1.310531,1.221309,1.406961,1.390092,0.991008,0.931919,1.073581,1.060709,0.756188,1.152011,1.138198,0.811431,0.98801,0.704361,0.712909,1.544068,1.344863,1.462921,1.632183,0.961395,0.870987,0.947446,1.057067,0.622638,1.087784,1.213642,0.714865,1.115702,0.657175,0.589024,1.622761,1.511187,1.618656,1.778428,0.97883,0.931244,0.997471,1.095928,0.603188,1.071116,1.176842,0.647723,1.098707,0.604718,0.55039,-6.188008,-0.732517,-0.989055,-1.45875,1.7769,0.118377,0.159834,0.235738,-0.287152,1.350214,1.991421,-2.425745,1.474892,-1.796563,-1.218098,1.022056,0.953112,1.180482,1.052843,1.009433,0.932544,1.155007,1.030123,0.987649,1.238555,1.104638,1.059091,0.891876,0.855102,0.958768,1.090497,1.087966,1.338649,1.160466,1.033476,0.997679,1.227558,1.064163,0.947711,1.230415,1.066639,0.949916,0.866894,0.772029,0.89057,1.063717,1.061193,1.179808,1.209161,1.046612,0.997628,1.109138,1.136732,0.983919,1.111775,1.139435,0.986259,1.024879,0.887103,0.865569,1.040311,1.040299,1.153224,1.191193,1.077117,0.999988,1.108538,1.145036,1.035379,1.108551,1.145049,1.035392,1.032925,0.934005,0.904233,0.962441,0.970065,1.064985,1.12948,1.206411,1.007922,1.106546,1.173558,1.253492,1.097849,1.164335,1.24364,1.06056,1.132796,1.068112,1.146868,1.134633,1.274485,1.270936,0.959082,0.989332,1.111274,1.10818,0.836262,1.123257,1.12013,0.845279,0.997216,0.752525,0.754626,1.165381,1.150818,1.295614,1.284271,0.943054,0.987503,1.111751,1.102017,0.809223,1.12582,1.115963,0.819464,0.991245,0.727882,0.734311,1.21636,1.195078,1.353889,1.320233,0.903837,0.982503,1.113066,1.085396,0.743067,1.132888,1.104725,0.7563,0.975141,0.667586,0.684604
2,1.414364,1.703914,2.787615,7.315548,7.069664,1.204721,1.970931,5.172323,4.998475,1.636006,4.293378,4.149072,2.624304,2.536098,0.966389,1.16063,1.409901,2.30026,7.703637,7.140051,1.214772,1.981906,6.637463,6.151876,1.631505,5.463957,5.064222,3.349029,3.104019,0.926842,1.114848,1.360236,2.183456,8.107528,7.564525,1.220109,1.958523,7.272318,6.785254,1.605203,5.960383,5.561185,3.713164,3.464474,0.933025,0.604275,0.775267,1.045873,-65.899307,-27.66599,1.282971,1.730791,-109.055248,-45.783811,1.349049,-85.002109,-35.68577,-63.008911,-26.452538,0.419822,3.172566,3.558597,6.494436,6.088671,6.229084,1.121678,2.047061,1.919163,1.963421,1.824999,1.710975,1.750433,0.937521,0.959142,1.023061,2.336039,2.73216,4.513387,6.705868,6.946085,1.16957,1.932068,2.870614,2.973446,1.651948,2.454419,2.542342,1.485773,1.538996,1.035822,1.222227,1.601376,2.334343,8.006361,8.189276,1.310211,1.909909,6.550632,6.700289,1.457711,4.999676,5.113899,3.429814,3.508172,1.022846,1.212811,1.590453,2.308084,8.308997,8.644825,1.311378,1.903086,6.851025,7.127926,1.451211,5.224295,5.435447,3.599955,3.745456,1.040417,1.189964,1.563871,2.245146,9.17307,10.036072,1.314217,1.886734,7.708696,8.43393,1.435634,5.865618,6.417455,4.085734,4.47012,1.09408,1.274477,1.661631,2.483536,6.714952,6.406652,1.303775,1.948671,5.268791,5.026889,1.494637,4.04118,3.85564,2.703787,2.57965,0.954088,1.293021,1.682875,2.537953,6.37024,5.970633,1.301506,1.962809,4.926634,4.617585,1.508106,3.785332,3.547877,2.509991,2.352539,0.93727,1.373616,1.774349,2.783931,5.277721,4.684914,1.291736,2.026717,3.84221,3.410644,1.568987,2.974454,2.640356,1.89578,1.682841,0.887678
3,0.762138,0.728942,0.680687,0.774836,6.933482,0.956444,0.893128,1.016661,9.097412,0.933801,1.062959,9.511701,1.138315,10.186009,8.948324,0.766969,0.692484,0.619504,0.701073,8.06383,0.902884,0.80773,0.914083,10.513898,0.894611,1.012403,11.644788,1.131669,13.016594,11.502126,0.760921,0.687324,0.629683,0.725159,8.183119,0.90328,0.827527,0.953002,10.754236,0.916136,1.055046,11.905759,1.151626,12.995625,11.284587,0.674005,0.612675,0.86103,1.654922,10.794392,0.909007,1.277483,2.455357,16.01531,1.405362,2.701143,17.618475,1.922026,12.536608,6.522599,0.77538,2.061361,5.029966,2.883528,2.636134,2.658518,6.4871,3.718858,3.399797,2.440119,1.398846,1.278832,0.57327,0.524086,0.914205,0.733368,1.084596,1.768561,2.247915,3.701692,1.478924,2.411559,3.065193,5.047522,1.630617,2.072583,3.412969,1.271042,2.093054,1.646723,0.791573,0.745303,0.842223,1.063537,5.95988,0.941547,1.063987,1.343574,7.529165,1.130041,1.426986,7.996592,1.262774,7.076371,5.603832,0.793041,0.745319,0.853844,1.090497,5.960927,0.939824,1.07667,1.375082,7.516541,1.145608,1.463128,7.997821,1.277162,6.981286,5.466249,0.796612,0.745359,0.88329,1.161643,5.963457,0.93566,1.108808,1.458229,7.486022,1.185053,1.558503,8.00079,1.315133,6.751417,5.13364,0.783457,0.745211,0.782641,0.934329,5.954032,0.951183,0.998959,1.192573,7.599696,1.050227,1.253778,7.989729,1.193816,7.607619,6.372521,0.78059,0.745178,0.763297,0.895363,5.95194,0.954635,0.977847,1.147034,7.624928,1.024315,1.201543,7.987273,1.17302,7.79767,6.647515,0.768211,0.745033,0.688379,0.756446,5.942746,0.969829,0.89608,0.984685,7.735825,0.923957,1.015318,7.976482,1.098881,8.632961,7.85614
4,0.749439,0.841533,1.103557,3.016048,9.889732,1.122884,1.472511,4.024407,13.196179,1.311365,3.583991,11.752039,2.733024,8.961685,3.279037,0.764887,0.794371,1.060492,3.142892,21.897495,1.038547,1.386469,4.108964,28.628412,1.335009,3.956454,27.565829,2.963617,20.648427,6.967307,0.75559,0.772197,1.01325,2.976005,22.565594,1.021979,1.341006,3.938652,29.864877,1.312166,3.853946,29.222594,2.937088,22.270509,7.582513,0.603286,0.4888,0.526295,1.415197,61.604279,0.81023,0.87238,2.345815,102.114591,1.076707,2.895244,126.031551,2.688982,117.052836,43.530541,0.701732,3.364922,4.316788,3.397885,1.73765,4.795166,6.15162,4.842141,2.47623,1.282879,1.009796,0.516401,0.787133,0.402533,0.511392,0.671356,1.278739,1.440406,2.447255,2.378531,1.90471,2.145516,3.645239,3.542873,1.126427,1.913803,1.86006,1.699003,1.651292,0.971918,0.786714,0.792011,0.959532,2.458961,9.19762,1.006734,1.21967,3.12561,11.691188,1.211513,3.104704,11.612991,2.562668,9.585531,3.74045,0.782018,0.782142,0.941047,2.387082,9.248265,1.000158,1.203357,3.052464,11.826156,1.203166,3.051981,11.824284,2.536624,9.827639,3.874298,0.77046,0.758391,0.89759,2.223721,9.378042,0.984335,1.165005,2.886224,12.172003,1.183545,2.932156,12.365708,2.477435,10.448025,4.217275,0.812078,0.847646,1.068724,2.915611,8.94289,1.043798,1.316036,3.590308,11.01235,1.260814,3.439657,10.550268,2.728123,8.36782,3.067244,0.820808,0.867754,1.110409,3.105885,8.861925,1.057194,1.352824,3.783935,10.796585,1.279636,3.579223,10.212487,2.797064,7.980777,2.853269,0.857196,0.957442,1.312571,4.184136,8.556043,1.116946,1.531238,4.881188,9.981429,1.370915,4.370121,8.936359,3.187741,6.518537,2.044877


In [140]:
#used_column4 = ['diff_A5_A2_median_min_flux','diff_A1_A0_minus_1_sigma','diff_A5_A1_max_min_flux']

In [141]:
%%time
final_dict = {}

loss_list = []
temp = train_metadata_kaggle.copy()
temp = temp.merge(train_metadata[['object_id'] + used_columns1],on = 'object_id',how = 'left')
print(temp.shape)
#temp = temp.merge(train_metadata[['object_id',column_]],on = 'object_id',how = 'left')
y = temp['target']
del temp['target']
classes = sorted(y.unique())

# Taken from Giba's topic : https://www.kaggle.com/titericz
# https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
# with Kyle Boone's post https://www.kaggle.com/kyleboone
class_weight = {
    c: 1 for c in classes
}
for c in [64, 15]:
    class_weight[c] = 2

#print('Unique classes : ', classes)

train_id = temp['object_id']
del temp['object_id']
# Compute weights
w = y.value_counts()
weights = {i : np.sum(w) / w[i] for i in w.index}
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=51)
clfs = []
importances = pd.DataFrame()
lgb_params = {
'random_state':51,
'device': 'cpu', 
'objective': 'multiclass', 
'num_class': 14, 
'boosting_type': 'gbdt', 
'n_jobs': -1, 
'max_depth': 7, 
'n_estimators': 1000, 
'subsample_freq': 2, 
'subsample_for_bin': 5000, 
'min_data_per_group': 100, 
'max_cat_to_onehot': 4, 
'cat_l2': 1.0, 
'cat_smooth': 59.5, 
'max_cat_threshold': 32, 
'metric_freq': 10, 
'verbosity': -1, 
'metric': 'multi_logloss', 
'xgboost_dart_mode': False, 
'uniform_drop': False, 
'colsample_bytree': 0.5, 
'drop_rate': 0.173, 
'learning_rate': 0.0267, 
'max_drop': 5, 
'min_child_samples': 10,
'min_child_weight': 200.0, 
#'min_child_weight': 100.0, 
'min_split_gain': 0.1, 
'num_leaves': 7, 
#'reg_alpha': 0.1,
'reg_alpha': 0.0, 
'reg_lambda': 0.00023, 
'skip_drop': 0.44, 
'subsample': 0.75}
oof_preds = np.zeros((len(temp), np.unique(y).shape[0]))
for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
    trn_x, trn_y = temp.iloc[trn_], y.iloc[trn_]
    val_x, val_y = temp.iloc[val_], y.iloc[val_]

    clf = lgb.LGBMClassifier(**lgb_params)
    clf.fit(
        trn_x, trn_y,
        eval_set=[(trn_x, trn_y), (val_x, val_y)],
        eval_metric=lgb_multi_weighted_logloss,
        verbose=False,
        early_stopping_rounds=50,
        sample_weight=trn_y.map(weights)
    )
    oof_preds[val_, :] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)
    loss_oof = multi_weighted_logloss(val_y, oof_preds[val_, :])
    #loss_list.append(loss_oof)
    print(fold_,loss_oof)

    imp_df = pd.DataFrame()
    imp_df['feature'] = temp.columns
    imp_df['gain'] = clf.feature_importances_
    imp_df['fold'] = fold_ + 1
    importances = pd.concat([importances, imp_df], axis=0, sort=False)

    clfs.append(clf)
print('MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))
#final_dict[column_] = loss_list

(7848, 139)
0 0.5846232690590423
1 0.5598478406280183
2 0.6060594241195905
3 0.5551329424743721
4 0.5738283897556737
MULTI WEIGHTED LOG LOSS : 0.57588 
CPU times: user 11min 16s, sys: 1.19 s, total: 11min 17s
Wall time: 1min 30s


In [33]:
imp_df.sort_values(by = 'gain',ascending=False)

Unnamed: 0,feature,gain,fold
57,distmod,1048,5
55,hostgal_photoz,1005,5
54,mjd_diff_det,921,5
61,hostgal_photoz_certain,565,5
56,hostgal_photoz_err,534,5
52,mjd__mean_abs_change,423,5
3,flux_median,389,5
46,flux__longest_strike_above_mean,370,5
75,__median_absolute_deviation___2_,358,5
19,flux_diff2,357,5


In [None]:
"""%%time
final_dict = {}

for column_ in list(train_metadata.columns)[233:]:
    loss_list = []
    temp = train_metadata_kaggle.copy()
    temp = temp.merge(train_metadata[['object_id',column_]],on = 'object_id',how = 'left')
    y = temp['target']
    del temp['target']
    classes = sorted(y.unique())

    class_weight = {
        c: 1 for c in classes
    }
    for c in [64, 15]:
        class_weight[c] = 2

    train_id = temp['object_id']
    del temp['object_id']
    # Compute weights
    w = y.value_counts()
    weights = {i : np.sum(w) / w[i] for i in w.index}
    folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=51)
    clfs = []
    importances = pd.DataFrame()
    lgb_params = {
    'random_state':51,
    'device': 'cpu', 
    'objective': 'multiclass', 
    'num_class': 14, 
    'boosting_type': 'gbdt', 
    'n_jobs': -1, 
    'max_depth': 7, 
    'n_estimators': 1000, 
    'subsample_freq': 2, 
    'subsample_for_bin': 5000, 
    'min_data_per_group': 100, 
    'max_cat_to_onehot': 4, 
    'cat_l2': 1.0, 
    'cat_smooth': 59.5, 
    'max_cat_threshold': 32, 
    'metric_freq': 10, 
    'verbosity': -1, 
    'metric': 'multi_logloss', 
    'xgboost_dart_mode': False, 
    'uniform_drop': False, 
    'colsample_bytree': 0.5, 
    'drop_rate': 0.173, 
    'learning_rate': 0.0267, 
    'max_drop': 5, 
    'min_child_samples': 10,
    'min_child_weight': 200.0, 
    #'min_child_weight': 100.0, 
    'min_split_gain': 0.1, 
    'num_leaves': 7, 
    #'reg_alpha': 0.1,
    'reg_alpha': 0.0, 
    'reg_lambda': 0.00023, 
    'skip_drop': 0.44, 
    'subsample': 0.75}
    oof_preds = np.zeros((len(temp), np.unique(y).shape[0]))
    for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
        trn_x, trn_y = temp.iloc[trn_], y.iloc[trn_]
        val_x, val_y = temp.iloc[val_], y.iloc[val_]

        clf = lgb.LGBMClassifier(**lgb_params)
        clf.fit(
            trn_x, trn_y,
            eval_set=[(trn_x, trn_y), (val_x, val_y)],
            eval_metric=lgb_multi_weighted_logloss,
            verbose=False,
            early_stopping_rounds=50,
            sample_weight=trn_y.map(weights)
        )
        oof_preds[val_, :] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)
        loss_oof = multi_weighted_logloss(val_y, oof_preds[val_, :])
        loss_list.append(loss_oof)
        #print(fold_,loss_oof)

        imp_df = pd.DataFrame()
        imp_df['feature'] = temp.columns
        imp_df['gain'] = clf.feature_importances_
        imp_df['fold'] = fold_ + 1
        importances = pd.concat([importances, imp_df], axis=0, sort=False)

        clfs.append(clf)
    print(column_,'MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))
    final_dict[column_] = loss_list"""

In [None]:
"""
final_dict2 = pd.Series(final_dict)
final_dict2 = pd.DataFrame(final_dict2)
final_dict2['fold1'] = final_dict2[0].apply(lambda x: x[0])
final_dict2['fold2'] = final_dict2[0].apply(lambda x: x[1])
final_dict2['fold3'] = final_dict2[0].apply(lambda x: x[2])
final_dict2['fold4'] = final_dict2[0].apply(lambda x: x[3])
final_dict2['fold5'] = final_dict2[0].apply(lambda x: x[4])

del final_dict2[0]

final_dict2 = final_dict2.reset_index(drop=False)

final_dict2 = final_dict2.rename(columns={'index':'column_name'})

final_dict2['fold1_1'] = (final_dict2['fold1'] > 0.62756) * 1
final_dict2['fold2_1'] = (final_dict2['fold2'] > 0.59905) * 1
final_dict2['fold3_1'] = (final_dict2['fold3'] > 0.65624) * 1
final_dict2['fold4_1'] = (final_dict2['fold4'] > 0.60978) * 1
final_dict2['fold5_1'] = (final_dict2['fold5'] > 0.61012) * 1
final_dict2['fold_sum'] = final_dict2['fold1_1']+ final_dict2['fold2_1'] + final_dict2['fold3_1'] + final_dict2['fold4_1'] + final_dict2['fold5_1']
"""

In [None]:
#final_dict2[final_dict2['column_name'].isin(most_imp_ones)]
#final_dict2[final_dict2['fold_sum'] == 1]

In [None]:
#final_dict2.head()

In [None]:
#final_dict2.to_csv('final_features3.csv',index=False)

In [143]:
#modify to work with kfold
#def smoteAdataset(Xig, yig, test_size=0.2, random_state=0):
def smoteAdataset(Xig_train, yig_train, Xig_test, yig_test):
    
        
    sm=SMOTE(random_state=51)
    Xig_train_res, yig_train_res = sm.fit_sample(Xig_train, yig_train.ravel())

        
    return Xig_train_res, pd.Series(yig_train_res), Xig_test, pd.Series(yig_test)

In [145]:
%%time
final_dict = {}

loss_list = []
temp = train_metadata_kaggle.copy()

#temp = temp.merge(train_metadata[['object_id'] + most_imp_ones2 ],on = 'object_id',how = 'left')

temp = temp.merge(train_metadata[['object_id'] + used_columns1  ],on = 'object_id',how = 'left')


print(temp.shape)
temp.fillna(0, inplace=True)

y = temp['target']
del temp['target']
classes = sorted(y.unique())

# Taken from Giba's topic : https://www.kaggle.com/titericz
# https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
# with Kyle Boone's post https://www.kaggle.com/kyleboone
class_weight = {
    c: 1 for c in classes
}
for c in [64, 15]:
    class_weight[c] = 2

#print('Unique classes : ', classes)

train_id = temp['object_id']
del temp['object_id']
# Compute weights
w = y.value_counts()
weights = {i : np.sum(w) / w[i] for i in w.index}
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=51)
clfs = []
importances = pd.DataFrame()
lgb_params = {
'random_state':51,
'device': 'cpu', 
'objective': 'multiclass', 
'num_class': 14, 
'boosting_type': 'gbdt', 
'n_jobs': -1, 
'max_depth': 7, 
'n_estimators': 1000, 
'subsample_freq': 2, 
'subsample_for_bin': 5000, 
'min_data_per_group': 100, 
'max_cat_to_onehot': 4, 
'cat_l2': 1.0, 
'cat_smooth': 59.5, 
'max_cat_threshold': 32, 
'metric_freq': 10, 
'verbosity': -1, 
'metric': 'multi_logloss', 
'xgboost_dart_mode': False, 
'uniform_drop': False, 
'colsample_bytree': 0.5, 
'drop_rate': 0.173, 
'learning_rate': 0.0267, 
'max_drop': 5, 
'min_child_samples': 10, 
'min_child_weight': 200.0, 
'min_split_gain': 0.1, 
'num_leaves': 7, 
'reg_alpha': 0.0, 
'reg_lambda': 0.00023, 
'skip_drop': 0.44, 
'subsample': 0.75}
oof_preds = np.zeros((len(temp), np.unique(y).shape[0]))
for fold_, (trn_, val_) in enumerate(folds.split(y, y)):
    trn_x, trn_y = temp.iloc[trn_], y.iloc[trn_]
    val_x, val_y = temp.iloc[val_], y.iloc[val_]

    trn_xa, trn_y, val_xa, val_y=smoteAdataset(trn_x.values, trn_y.values, val_x.values, val_y.values)
    trn_x=pd.DataFrame(data=trn_xa, columns=trn_x.columns)
    val_x=pd.DataFrame(data=val_xa, columns=val_x.columns)
    
    print(trn_x.shape,trn_y.shape,val_x.shape,val_y.shape)
    
    clf = lgb.LGBMClassifier(**lgb_params)
    clf.fit(
        trn_x, trn_y,
        eval_set=[(trn_x, trn_y), (val_x, val_y)],
        eval_metric=lgb_multi_weighted_logloss,
        verbose=False,
        early_stopping_rounds=50,
        sample_weight=trn_y.map(weights)
    )
    oof_preds[val_, :] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)
    loss_oof = multi_weighted_logloss(val_y, oof_preds[val_, :])
    #loss_list.append(loss_oof)
    print(fold_,loss_oof)

    imp_df = pd.DataFrame()
    imp_df['feature'] = temp.columns
    imp_df['gain'] = clf.feature_importances_
    imp_df['fold'] = fold_ + 1
    importances = pd.concat([importances, imp_df], axis=0, sort=False)

    clfs.append(clf)
print('MULTI WEIGHTED LOG LOSS : %.5f ' % multi_weighted_logloss(y_true=y, y_preds=oof_preds))
#final_dict[column_] = loss_list

(7848, 139)
(25900, 137) (25900,) (1574, 137) (1574,)
0 0.5307443319232668
(25900, 137) (25900,) (1572, 137) (1572,)
1 0.5564314031865383
(25900, 137) (25900,) (1571, 137) (1571,)
2 0.5893145525114981
(25914, 137) (25914,) (1567, 137) (1567,)
3 0.5409958222769201
(25914, 137) (25914,) (1564, 137) (1564,)
4 0.5588997848985131
MULTI WEIGHTED LOG LOSS : 0.55516 
CPU times: user 28min 25s, sys: 2.4 s, total: 28min 27s
Wall time: 4min 3s


In [None]:
#used_columns = used_columns1 + used_columns2 + used_columns3 

In [147]:
used_columns1

['div_A4_A2_median_min_flux',
 'div_A5_A2_median_min_flux',
 'div_A5_A2_minus_1_sigma',
 'div_A5_A4_median_mean_flux',
 'div_A3_A0_plus_1_sigma',
 'div_A4_A1_minus_1_sigma']

In [148]:
test_metadata['div_A4_A2_median_min_flux'] = test_metadata['A4_median_min_diff_flux'] / test_metadata['A2_median_min_diff_flux']
test_metadata['div_A5_A2_median_min_flux'] = test_metadata['A5_median_min_diff_flux'] / test_metadata['A2_median_min_diff_flux']
test_metadata['div_A5_A2_minus_1_sigma'] = test_metadata['A5_minus_1_sigma'] / test_metadata['A2_minus_1_sigma']
test_metadata['div_A5_A4_median_mean_flux'] = test_metadata['A5_median_mean_diff_flux'] / test_metadata['A4_median_mean_diff_flux']
test_metadata['div_A3_A0_plus_1_sigma'] = test_metadata['A3_plus_1_sigma'] / test_metadata['A0_plus_1_sigma']
test_metadata['div_A4_A1_minus_1_sigma'] = test_metadata['A4_minus_1_sigma'] / test_metadata['A1_minus_1_sigma']

In [149]:
print(train_metadata.shape,test_metadata.shape)

(7848, 283) (3492890, 109)


In [150]:
#train_metadata.drop([x for x in train_metadata.columns if x not in ['object_id'] + used_columns ] ,axis = 1,inplace=True)

In [151]:
#test_metadata = test_metadata[[x for x in test_metadata.columns if x in ['object_id'] + used_columns ]]

In [161]:
train_metadata_kaggle = train_metadata_kaggle.merge(train_metadata[['object_id'] + used_columns1 ],on = 'object_id',how = 'left')
test_metadata_kaggle = test_metadata_kaggle.merge(test_metadata[['object_id'] + used_columns1 ],on = 'object_id',how = 'left')

In [162]:
print(train_metadata_kaggle.shape,test_metadata_kaggle.shape)

(7848, 139) (3492890, 138)


In [163]:
train_metadata_kaggle.head()

Unnamed: 0,object_id,flux_min,flux_max,flux_mean,flux_median,flux_std,flux_skew,flux_err_min,flux_err_max,flux_err_mean,flux_err_median,flux_err_std,flux_err_skew,detected_mean,flux_ratio_sq_sum,flux_ratio_sq_skew,flux_by_flux_ratio_sq_sum,flux_by_flux_ratio_sq_skew,flux_w_mean,flux_diff1,flux_diff2,flux_diff3,"0__fft_coefficient__coeff_0__attr_""abs""","0__fft_coefficient__coeff_1__attr_""abs""",0__kurtosis,0__skewness,"1__fft_coefficient__coeff_0__attr_""abs""","1__fft_coefficient__coeff_1__attr_""abs""",1__kurtosis,1__skewness,"2__fft_coefficient__coeff_0__attr_""abs""","2__fft_coefficient__coeff_1__attr_""abs""",2__kurtosis,2__skewness,"3__fft_coefficient__coeff_0__attr_""abs""","3__fft_coefficient__coeff_1__attr_""abs""",3__kurtosis,3__skewness,"4__fft_coefficient__coeff_0__attr_""abs""","4__fft_coefficient__coeff_1__attr_""abs""",4__kurtosis,4__skewness,"5__fft_coefficient__coeff_0__attr_""abs""","5__fft_coefficient__coeff_1__attr_""abs""",5__kurtosis,5__skewness,flux__length,flux__longest_strike_above_mean,flux__longest_strike_below_mean,flux__mean_abs_change,flux__mean_change,flux_by_flux_ratio_sq__longest_strike_above_mean,flux_by_flux_ratio_sq__longest_strike_below_mean,mjd__mean_abs_change,mjd__mean_change,mjd_diff_det,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target,haversine,latlon1,hostgal_photoz_certain,A0_sum_flux,A0_mean_flux,A0_std_detected,A1_mean_detected,A2_sum_detected,A4_mean_detected,A5_std_detected,A5_mean_detected,percent_p2_region_minus_1,A2_min_flux,A5_sum_detected,__flux_percentile_ratio_mid50___5_,__flux_percentile_ratio_mid65___2_,__median_absolute_deviation___2_,__qso_log_chi2_qsonu___0_,__stetson_k___1_,__freq1_signif___2_,__stetson_k___2_,__freq3_amplitude1___1_,__median_absolute_deviation___2_.1,__percent_close_to_median___2_,__freq_varrat___5_,__freq_varrat___4_,__qso_log_chi2_qsonu___3_,__qso_log_chi2_qsonu___1_,__qso_log_chi2_qsonu___5_,__std___4_,__freq_varrat___3_,__amplitude___2_,outlierScore,hipd,lipd,highEnergy_transitory_1.0_TF,highEnergy_transitory_1.5_TF,lowEnergy_transitory_1.0_TF,lowEnergy_transitory_1.5_TF,A1_minus_3_sigma,A5_max_median_diff_flux,A5_minus_3_sigma,A5_max_mean_diff_flux,diff_A5_A4_max_min_flux,diff_A2_A1_max_min_flux,diff_A3_A2_median_min_flux,diff_A5_A4_max_median_flux,diff_A4_A3_max_median_flux,diff_A2_A0_median_min_flux,diff_A4_A3_max_mean_flux,diff_A5_A2_max_mean_flux,diff_A5_A3_max_mean_flux,diff_A4_A0_median_mean_flux,diff_A5_A4_max_mean_flux,diff_A2_A1_max_median_flux,diff_A5_A2_max_median_flux,diff_A5_A4_median_min_flux,diff_A4_A0_median_min_flux,diff_A4_A1_max_median_flux,diff_A4_A2_max_median_flux,diff_A5_A4_minus_1_sigma,diff_A5_A3_median_min_flux,diff_A5_A3_max_median_flux,diff_A3_A1_minus_1_sigma,diff_A3_A0_median_min_flux,diff_A3_A0_plus_1_sigma,diff_A1_A0_median_min_flux,diff_A4_A2_mean_min_flux,diff_A5_A1_plus_1_sigma,diff_A4_A1_median_mean_flux,diff_A3_A2_max_median_flux,diff_A5_A1_median_mean_flux,div_A4_A2_median_min_flux,div_A5_A2_median_min_flux,div_A5_A2_minus_1_sigma,div_A5_A4_median_mean_flux,div_A3_A0_plus_1_sigma,div_A4_A1_minus_1_sigma
0,615,-1100.440063,660.626343,-123.096998,-89.477524,394.109851,-0.34954,2.13051,12.845472,4.482743,3.835268,1.744747,1.62374,0.946023,2929669.0,0.812722,-960176600.0,-1.414322,-327.742307,1761.066406,-14.306331,-5.373326,205.036926,1628.427737,-1.475181,0.128917,22370.594834,2806.374162,-1.255123,0.41558,7780.500807,2805.598113,-1.409885,0.339918,7024.003068,2536.068846,-1.449858,0.293128,3245.366349,2741.539785,-1.548319,0.200096,2704.641265,2893.344217,-1.59282,0.125268,352.0,19.0,29.0,202.114067,1.999688,35.0,4.0,2.631898,2.631898,873.7903,0.0,0.0,,0.017,92,0.319006,-1.528827,0.0,-205.03693,-3.254554,0.3528,0.9653,57,0.983,0.2854,0.912,0.362,-682.0,52,5.56223e-26,6.71941e-20,368.129,6.21789,1.09173,5.49891,1.05349,114.465,368.129,0.172414,0.401664,0.129578,9.16612,9.50875,7.34498,289.277,0.110785,646.922,0.0,1.0,1.0,0,0,0,0,-2191.1619,463.71245,-931.78835,425.63799,-3.135,-466.64178,-47.8,-21.78266,-122.34195,309.302005,-128.932226,-320.49313,-141.20257,-40.826106,-12.270344,-270.84178,-414.07211,18.64766,211.745135,-663.13123,-392.28945,5.528684,-31.10921,-144.12461,531.05875,261.502005,133.631375,505.102005,-181.623534,31.34235,54.613224,-269.9475,64.12554,0.765601,0.810406,0.58077,0.800106,2.656105,0.352129
1,713,-14.735178,14.770886,-1.423351,-0.873033,6.471144,0.014989,0.639458,9.115748,2.35962,1.998217,1.509888,1.633246,0.171429,5886.068,3.439423,-28750.87,-3.454554,-4.884564,29.506064,-20.730002,-6.040676,190.427851,299.586559,-1.014003,0.260052,57.109047,192.539229,-1.09717,-0.087865,44.477327,191.057528,-1.188472,-0.022678,55.270113,212.522263,-1.142896,-0.167176,50.414646,203.892482,-1.190245,-0.064134,100.473776,143.963093,-0.797047,0.218182,350.0,50.0,73.0,2.935177,-0.050944,199.0,8.0,14.352571,14.352571,846.8017,1.6267,0.2552,45.4063,0.007,88,1.698939,3.258921,2.099614,-190.42786,-2.720398,0.3525,0.2678,15,0.0893,0.0,0.0,0.25,-10.07,0,0.0211907,0.0824318,5.10035,2.18719,1.0661,3.95669,1.08818,0.851103,5.10035,0.178571,0.369518,0.166179,2.79753,3.12481,0.659762,6.34953,0.111883,10.2985,0.875,1.909016,2.0,1,0,1,1,-18.157002,17.233897,-23.076394,16.565061,6.867315,-0.24998,2.3741,6.613788,-0.784106,-1.686373,-1.589087,5.241782,4.247778,0.484493,5.836865,0.95652,6.586856,0.253527,-0.143748,0.929589,-0.026931,-1.580997,-0.577947,5.829681,-0.705045,0.687727,1.070336,-0.479873,2.110777,0.607564,-0.350414,0.757175,-1.127337,1.155007,1.180482,1.353889,-6.188008,1.24364,1.085396
2,730,-19.159811,47.310059,2.267434,0.409172,8.022239,3.177854,0.695106,11.281384,2.471061,1.990851,1.721134,1.823726,0.069697,4124.452,5.480405,104650.2,5.989138,25.37311,66.46987,29.315018,2.619697,3.46179,4.729538,0.474215,0.35691,7.334944,13.515895,0.976374,0.471342,124.84525,119.500254,5.13129,2.385066,168.280524,162.799417,7.125665,2.662075,219.745132,202.532898,6.081065,2.537802,231.509177,199.28637,3.58313,1.680352,330.0,13.0,32.0,4.227614,-0.008131,4.0,222.0,3.580623,3.580623,78.7737,0.2262,0.0157,40.2561,0.021,42,1.81803,3.128522,0.229779,-3.46179,-0.04808,0.0,0.0,7,0.098,0.2715,0.0784,0.769,-2.85,4,0.000194228,0.55118,1.04253,-0.307228,0.933091,4.61663,0.634723,0.454918,1.04253,0.769231,0.500549,0.318256,3.04833,0.127758,1.66943,10.6048,0.292954,11.9218,0.0,1.0,1.0,0,0,0,0,-5.280586,44.767413,-35.458878,42.770664,19.473581,14.758601,2.7569,4.611787,7.244126,-0.142353,6.515316,24.176824,12.434728,-3.376547,5.919412,14.981301,24.264403,14.861794,3.356611,34.633917,19.652616,-2.391746,15.603858,11.855913,-3.289694,2.614547,9.647359,0.080347,4.894168,15.923825,-3.334674,12.40849,-2.027049,2.047061,6.494436,2.783931,0.604275,6.417455,3.84221
3,745,-15.494463,220.795212,8.909206,1.035895,27.558208,4.979826,0.56717,55.892746,2.555576,1.819875,3.537324,10.741655,0.173789,94161.65,9.611274,14391250.0,11.141069,152.835617,236.289675,26.521968,1.546038,129.421659,123.298327,4.629801,2.023211,320.174052,280.440312,50.86888,7.007099,543.845781,491.54827,36.088137,5.688194,807.123762,710.721942,16.392533,3.751603,735.528417,680.05528,13.747434,3.47642,591.037583,523.503586,12.134629,3.170857,351.0,19.0,115.0,7.065548,0.008044,4.0,201.0,2.061453,2.061453,123.6872,0.2813,1.1523,40.7951,0.007,90,0.495223,6.893743,0.890445,129.42166,1.797523,0.1655,0.125,16,0.2322,0.3364,0.1273,0.768,-2.16,7,0.0084016,0.546369,1.41645,1.4322,0.295163,3.96789,0.394683,3.59567,1.41645,0.892857,0.489589,0.360868,6.06886,5.84082,2.82044,32.7725,0.290652,111.477,0.0,1.0,1.0,0,0,0,0,-72.173977,138.763735,-67.434252,130.767152,-47.364906,27.09091,3.7217,-43.599235,-19.526731,-2.346763,-18.339091,-80.316526,-58.070623,-11.123477,-39.731532,29.01461,-81.607175,-3.765671,11.833551,-8.993331,-38.007941,4.620626,6.692942,-63.125965,-0.310113,1.374937,43.208654,-0.423063,16.757367,5.121609,-7.032286,-18.48121,-3.164583,6.4871,5.029966,0.688379,0.674005,8.00079,0.984685
4,1124,-16.543753,143.600189,7.145702,1.141288,20.051722,4.406298,0.695277,11.38369,2.753004,2.214854,1.933837,1.794938,0.173295,34324.18,7.868462,3015599.0,7.908174,87.85639,160.143942,22.411225,1.822792,41.639721,32.987125,0.822496,-0.332169,268.808929,207.812015,6.112295,2.377222,594.150153,498.50982,10.343254,3.075437,643.020183,555.512641,14.095862,3.603208,574.553907,524.107264,16.377058,3.904008,393.114268,357.907185,14.43447,3.657305,352.0,19.0,158.0,6.727352,0.012543,10.0,231.0,2.231855,2.231855,133.9113,0.2415,0.0176,40.4166,0.024,90,0.395162,-1.928064,0.245788,41.63972,0.660948,0.0,0.2241,18,0.1724,0.2578,0.0702,0.707,-2.084,4,0.0270226,0.601811,1.33779,-0.064359,0.674119,5.24444,0.560453,1.20558,1.33779,0.741379,0.673592,0.382847,5.07231,3.4079,2.31292,26.6333,0.250639,54.3781,0.375,1.0,1.0,0,0,1,0,-19.689575,107.184313,-57.407143,102.260844,-40.125818,68.962513,0.841,-34.670865,3.050778,-4.41273,4.962243,5.833122,-26.471001,-8.081169,-31.433244,69.766513,1.401623,-5.454953,10.903035,105.839001,36.072488,2.421924,9.019812,-31.620087,-11.709569,-3.57173,34.336172,-3.60873,14.121887,15.590844,-4.68209,33.02171,-1.444469,6.15162,4.316788,1.312571,0.603286,12.365708,4.881188


In [164]:
test_metadata_kaggle.head()

Unnamed: 0,object_id,flux_min,flux_max,flux_mean,flux_median,flux_std,flux_skew,flux_err_min,flux_err_max,flux_err_mean,flux_err_median,flux_err_std,flux_err_skew,detected_mean,flux_ratio_sq_sum,flux_ratio_sq_skew,flux_by_flux_ratio_sq_sum,flux_by_flux_ratio_sq_skew,flux_w_mean,flux_diff1,flux_diff2,flux_diff3,"0__fft_coefficient__coeff_0__attr_""abs""","0__fft_coefficient__coeff_1__attr_""abs""",0__kurtosis,0__skewness,"1__fft_coefficient__coeff_0__attr_""abs""","1__fft_coefficient__coeff_1__attr_""abs""",1__kurtosis,1__skewness,"2__fft_coefficient__coeff_0__attr_""abs""","2__fft_coefficient__coeff_1__attr_""abs""",2__kurtosis,2__skewness,"3__fft_coefficient__coeff_0__attr_""abs""","3__fft_coefficient__coeff_1__attr_""abs""",3__kurtosis,3__skewness,"4__fft_coefficient__coeff_0__attr_""abs""","4__fft_coefficient__coeff_1__attr_""abs""",4__kurtosis,4__skewness,"5__fft_coefficient__coeff_0__attr_""abs""","5__fft_coefficient__coeff_1__attr_""abs""",5__kurtosis,5__skewness,flux__length,flux__longest_strike_above_mean,flux__longest_strike_below_mean,flux__mean_abs_change,flux__mean_change,flux_by_flux_ratio_sq__longest_strike_above_mean,flux_by_flux_ratio_sq__longest_strike_below_mean,mjd__mean_abs_change,mjd__mean_change,mjd_diff_det,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,haversine,latlon1,hostgal_photoz_certain,A0_sum_flux,A0_mean_flux,A0_std_detected,A1_mean_detected,A2_sum_detected,A4_mean_detected,A5_std_detected,A5_mean_detected,percent_p2_region_minus_1,A2_min_flux,A5_sum_detected,__flux_percentile_ratio_mid50___5_,__flux_percentile_ratio_mid65___2_,__median_absolute_deviation___2_,__qso_log_chi2_qsonu___0_,__stetson_k___1_,__freq1_signif___2_,__stetson_k___2_,__freq3_amplitude1___1_,__median_absolute_deviation___2_.1,__percent_close_to_median___2_,__freq_varrat___5_,__freq_varrat___4_,__qso_log_chi2_qsonu___3_,__qso_log_chi2_qsonu___1_,__qso_log_chi2_qsonu___5_,__std___4_,__freq_varrat___3_,__amplitude___2_,outlierScore,hipd,lipd,highEnergy_transitory_1.0_TF,highEnergy_transitory_1.5_TF,lowEnergy_transitory_1.0_TF,lowEnergy_transitory_1.5_TF,A1_minus_3_sigma,A5_max_median_diff_flux,A5_minus_3_sigma,A5_max_mean_diff_flux,diff_A5_A4_max_min_flux,diff_A2_A1_max_min_flux,diff_A3_A2_median_min_flux,diff_A5_A4_max_median_flux,diff_A4_A3_max_median_flux,diff_A2_A0_median_min_flux,diff_A4_A3_max_mean_flux,diff_A5_A2_max_mean_flux,diff_A5_A3_max_mean_flux,diff_A4_A0_median_mean_flux,diff_A5_A4_max_mean_flux,diff_A2_A1_max_median_flux,diff_A5_A2_max_median_flux,diff_A5_A4_median_min_flux,diff_A4_A0_median_min_flux,diff_A4_A1_max_median_flux,diff_A4_A2_max_median_flux,diff_A5_A4_minus_1_sigma,diff_A5_A3_median_min_flux,diff_A5_A3_max_median_flux,diff_A3_A1_minus_1_sigma,diff_A3_A0_median_min_flux,diff_A3_A0_plus_1_sigma,diff_A1_A0_median_min_flux,diff_A4_A2_mean_min_flux,diff_A5_A1_plus_1_sigma,diff_A4_A1_median_mean_flux,diff_A3_A2_max_median_flux,diff_A5_A1_median_mean_flux,div_A4_A2_median_min_flux,div_A5_A2_median_min_flux,div_A5_A2_minus_1_sigma,div_A5_A4_median_mean_flux,div_A3_A0_plus_1_sigma,div_A4_A1_minus_1_sigma
0,13,-12.680235,42.765503,3.997127,0.616561,9.149645,2.037355,0.691634,11.257108,2.46181,1.972973,1.718101,1.826388,0.157576,7806.412424,4.771625,189634.6,5.396523,24.292155,55.445738,13.871398,2.282455,29.002872,37.684425,-0.24716,0.147622,92.03969,79.990817,10.567412,3.117684,164.640622,139.733762,4.512783,2.171455,300.547278,246.788411,2.951479,1.806797,408.305525,349.008202,0.094963,1.153076,324.51588,251.116361,0.072799,0.804929,330.0,15.0,32.0,4.426774,0.017885,9.0,222.0,2.350061,2.350061,119.8531,0.3193,0.0542,41.1123,0.019,1.851382,3.049709,0.337084,29.002872,0.402818,0.0,0.0769,11.0,0.3137,0.3254,0.1177,0.7114,-1.778855,6.0,0.005627,0.528087,1.38469,-0.007597,0.61019,4.81984,0.681284,1.78339,1.38469,0.711538,0.25503,0.09055,3.49944,2.78816,1.51079,13.2494,0.124063,13.1542,1.0,1.0,1.0,1,1,1,0,-13.032169,33.023636,-29.949324,30.439033,1.203725,-1.244573,3.14635,-9.275401,3.413229,-1.906972,0.572977,9.075555,-3.747502,-7.224033,-4.320479,1.245629,8.963043,10.479126,1.824685,19.484073,18.238444,-0.365809,11.064433,-5.862173,-1.251189,1.239378,13.511033,0.58323,8.574067,11.763127,-6.359525,14.825216,-1.404603,2.660059,7.321785,1.927288,0.342807,6.483993,1.698851
1,14,-11.142164,14.839427,0.884047,0.072856,3.399946,0.970525,0.690589,11.249375,2.45758,1.973559,1.717591,1.826703,0.012121,806.406927,11.486148,5525.817,12.348124,6.852393,25.981591,29.389389,3.791608,22.708482,26.159787,0.099267,0.502325,13.685195,27.630359,13.429229,3.109318,31.012899,33.427074,13.769006,3.247873,56.042403,59.784625,1.75084,1.082798,83.561278,77.494564,-0.322108,-0.099957,84.725142,27.013154,-0.348744,0.132025,330.0,10.0,15.0,3.055953,-0.025935,4.0,85.0,9.3351,9.3351,28.0053,0.6323,0.0179,42.8774,0.018,1.855173,3.009107,0.64372,22.70848,0.315396,0.0,0.01923,1.0,0.0,0.0,0.0,0.615,-2.655194,0.0,0.014907,0.278418,0.8139,0.266805,0.640702,3.31422,0.665239,1.07237,0.8139,0.711538,0.623417,0.258451,0.848093,1.33958,-0.158567,3.44694,0.306123,8.56024,0.0,1.0,1.0,0,0,0,0,-8.039201,13.70517,-14.805055,13.17815,10.362185,-1.201913,1.962808,6.524891,-2.977615,-1.396471,-1.799531,-0.690726,3.786349,0.761845,5.58588,0.557757,-0.730177,3.837294,4.357531,-6.697311,-7.255068,-1.984715,7.628487,3.547276,0.777005,0.566338,1.195116,0.363199,4.775541,4.119388,0.804491,-4.277454,-0.13452,3.142918,4.572011,1.764152,-1.279205,1.444674,0.735858
2,17,-14.202744,16.76128,0.791032,0.45839,3.886578,0.377131,0.66368,11.278636,2.702947,2.184483,1.922641,1.802497,0.014205,784.835502,9.509911,4124.4,9.923556,5.255113,30.964024,39.143819,5.89217,6.030862,46.450439,0.590786,-0.427429,53.366119,41.192678,4.127763,1.710048,58.60717,42.353288,8.642889,2.570936,26.975615,59.958041,3.484929,1.141959,57.222812,72.448018,2.085327,0.429582,76.240782,73.533783,0.063837,-0.175525,352.0,15.0,15.0,3.247664,0.02484,4.0,75.0,0.6907,0.6907,2.7628,0.8297,0.0605,43.6,0.016,0.309914,-1.49029,0.881446,6.030863,0.095728,0.0,0.0345,2.0,0.0,0.0,0.0,0.6206,-2.790141,0.0,0.000377,0.323662,0.845784,0.335972,0.798601,3.18365,0.696152,1.0381,0.845784,0.775862,0.669229,0.427519,0.684981,1.2138,0.066402,3.80032,0.493005,7.88173,0.0,1.0,1.0,0,0,0,0,-7.832717,14.683493,-19.129198,15.423722,8.578852,0.193475,2.646153,3.214523,1.212961,-4.544296,0.695796,3.460874,4.996827,-0.503209,4.301032,1.040025,2.09292,5.364329,3.199021,-0.081578,-1.121603,-2.637786,10.461493,4.427483,-0.369331,-1.898143,0.430738,-3.697746,7.461872,4.322237,0.368021,-2.334563,1.454529,3.440466,5.131144,3.490031,-2.137666,1.150274,1.425184
3,23,-12.631923,28.061138,0.970396,0.465986,4.100713,2.177402,0.697639,11.305429,2.480364,2.003704,1.723367,1.821626,0.018182,876.027511,8.010597,8293.673,9.227223,9.467365,40.693061,41.934474,4.298245,0.776942,14.92649,-0.580529,-0.058688,2.371129,22.476077,1.429683,-0.000571,34.44773,14.240274,1.223998,0.468594,58.911457,62.709899,4.269139,1.41438,115.918277,70.371095,7.377841,2.409871,112.547521,123.413344,1.793238,0.953107,330.0,10.0,12.0,3.299337,-0.001116,4.0,98.0,0.6025,0.6025,3.0125,0.6533,0.1479,42.964,0.023,1.845038,3.078301,0.757434,0.776942,0.010791,0.0,0.0,2.0,0.0392,0.0,0.0,0.3845,-4.254661,0.0,0.01371,0.402374,0.90629,0.039595,0.918965,3.55265,0.934093,0.445288,0.90629,0.538462,0.709745,0.40574,1.51212,0.154437,0.664241,4.61269,0.415919,4.82121,0.0,1.0,1.0,0,0,0,0,-5.651117,26.62437,-20.850721,25.854324,15.809828,-0.666392,1.807202,6.525865,8.105404,0.318818,7.272845,21.129028,14.127387,-0.958548,6.854541,-0.398379,21.839384,9.283963,0.246119,14.91514,15.313518,-3.093347,7.404063,14.631269,-0.262982,2.126019,2.369858,0.586831,0.966333,8.069753,-1.239923,7.208114,-0.911246,0.985034,2.896326,4.842085,0.700856,2.143172,1.24637
4,34,-13.239577,124.475609,4.58007,0.301366,19.862714,4.537527,0.679312,11.365292,2.746784,2.210577,1.93079,1.798629,0.065341,47612.580669,9.140568,4815012.0,10.251332,101.128982,137.715186,30.068359,1.361778,65.064185,55.191226,7.457425,2.135457,202.641918,191.410916,16.39371,4.133929,425.940451,414.455381,15.206929,4.036551,368.228927,360.630612,15.386561,4.030676,377.744797,301.98395,15.302549,3.99274,172.564336,289.757599,12.507289,3.513977,352.0,24.0,52.0,5.054175,-0.026764,10.0,203.0,1.138964,1.138964,25.0572,0.4617,0.0122,42.054,0.023,0.391772,-1.993709,0.467367,65.064186,1.032765,0.1768,0.069,5.0,0.069,0.2253,0.05264,0.9136,-3.601765,3.0,0.005099,0.597406,1.02602,0.976849,0.393851,3.96443,0.359669,4.31107,1.02602,0.931034,0.813533,0.614606,5.15045,4.771,2.482,22.2476,0.534743,64.0387,0.0,1.0,1.0,0,0,0,0,-38.006542,91.693475,-58.217627,87.733245,-11.865965,53.806883,0.909528,-18.816458,-1.051308,-3.743664,-0.873642,-29.398564,-18.15145,-5.04837,-17.277809,54.58217,-32.303375,6.950493,-2.46789,41.095253,-13.486917,-1.458527,7.316738,-19.867766,-6.951712,-2.834136,24.852925,-2.968377,-0.090388,6.114955,-2.169479,-12.435609,-0.63083,1.312649,3.015982,0.895981,0.720189,5.839107,1.5406


In [167]:
%%time
train_metadata_kaggle.to_csv('mydata_train_metadata.csv',index=False)
test_metadata_kaggle.to_csv('mydata_test_metadata.csv',index=False)

CPU times: user 9min 55s, sys: 8.56 s, total: 10min 4s
Wall time: 10min 4s


In [170]:
imp_df.sort_values('gain',ascending=False).head()

Unnamed: 0,feature,gain,fold
57,distmod,1592,5
55,hostgal_photoz,1557,5
54,mjd_diff_det,1386,5
61,hostgal_photoz_certain,873,5
56,hostgal_photoz_err,752,5


In [171]:
temp_test = test_metadata_kaggle.copy()

In [172]:
del temp_test['object_id']

In [173]:
temp_test.fillna(0,inplace = True)

In [174]:
print(temp.shape,temp_test.shape)

(7848, 137) (3492890, 137)


In [175]:
list(temp.columns) == list(temp_test.columns)

True

In [176]:
%%time
test_pred0 = pd.DataFrame()
test_pred1 = pd.DataFrame()
test_pred2 = pd.DataFrame()
test_pred3 = pd.DataFrame()
test_pred4 = pd.DataFrame()

list_of_df = [test_pred0,test_pred1,test_pred2,test_pred3,test_pred4]

for num,c in enumerate(clfs):
    print(num)
    for k in range(0,len(temp_test),500000):
        test_pred = pd.DataFrame(c.predict_proba(temp_test[ k:k+500000] ))
        list_of_df[num] = pd.concat([list_of_df[num],test_pred],axis=0)
        del test_pred

0
1
2
3
4
CPU times: user 4h 29min 16s, sys: 42.2 s, total: 4h 29min 59s
Wall time: 39min 38s


In [177]:
test_pred2 = pd.DataFrame()
test_pred2 = (list_of_df[0] + list_of_df[1] + list_of_df[2] + list_of_df[3] + list_of_df[4])/5

In [178]:
print(test_pred2.shape)

(3492890, 14)


In [179]:
#test_pred2 = pd.DataFrame(np.random.rand(10,14))

In [180]:
test_pred2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.000179,0.001038,0.000141,0.805593,0.047541,0.00035,0.074855,4.2e-05,0.000131,0.002381,0.000263,0.066049,0.00027,0.001166
1,0.000121,0.006711,0.000132,0.136203,0.039605,0.000256,0.033613,0.000156,0.00049,0.018491,0.001953,0.750983,0.00025,0.011037
2,0.000263,0.016892,0.000293,0.139133,0.096388,0.000566,0.03768,0.004083,0.000686,0.061668,0.008164,0.603198,0.000369,0.030618
3,0.000313,0.002297,0.000565,0.052961,0.022658,0.000584,0.090238,0.003904,0.000502,0.578036,0.001415,0.195498,0.000462,0.050568
4,7.1e-05,0.001033,7.9e-05,0.047133,0.209952,0.000136,0.009144,2.7e-05,9.2e-05,0.004313,9.6e-05,0.727727,0.000104,9.3e-05


In [181]:
temp_columns = ['object_id','class_6','class_15','class_16','class_42','class_52','class_53','class_62','class_64','class_65','class_67','class_88','class_90','class_92','class_95','class_99']

In [182]:
test_pred2.columns = temp_columns[1:15]

In [183]:
def getUnknown(data):
    return ((((((data["mymedian"]) + (((data["mymean"]) / 2.0)))/2.0)) + (((((1.0) - (((data["mymax"]) * (((data["mymax"]) * (data["mymax"]))))))) / 2.0)))/2.0)

feats = ['class_6', 'class_15', 'class_16', 'class_42', 'class_52', 'class_53',
         'class_62', 'class_64', 'class_65', 'class_67', 'class_88', 'class_90',
         'class_92', 'class_95']

In [184]:
klm = pd.DataFrame()
klm['mymean'] = test_pred2[feats].mean(axis=1)
klm['mymedian'] = test_pred2[feats].median(axis=1)
klm['mymax'] = test_pred2[feats].max(axis=1)

In [185]:
test_pred2['class_99'] = getUnknown(klm)

In [186]:
test_pred2.tail()

Unnamed: 0,class_6,class_15,class_16,class_42,class_52,class_53,class_62,class_64,class_65,class_67,class_88,class_90,class_92,class_95,class_99
492885,0.000286,0.384119,0.000529,0.204638,0.119041,0.000606,0.121809,0.000283,0.002518,0.016498,0.002954,0.145506,0.000406,0.000808,0.245444
492886,0.000296,0.009702,0.000319,0.036255,0.213757,0.000397,0.017968,0.387354,0.000531,0.022403,0.001201,0.305817,0.00037,0.003629,0.246065
492887,0.00079,0.019711,0.000207,0.915776,0.023825,0.000475,0.027487,7.2e-05,0.000249,0.001283,0.000521,0.007552,0.000329,0.001722,0.067185
492888,0.000115,0.664491,0.000184,0.15534,0.004457,0.000217,0.011132,0.128702,0.000631,0.00039,0.000379,0.03299,0.000187,0.000785,0.185754
492889,0.000343,0.101076,0.000465,0.499759,0.140719,0.000773,0.060824,0.000149,0.000944,0.023887,0.001747,0.16721,0.001007,0.001097,0.228079


In [187]:
test_pred2 = test_pred2.reset_index(drop=True)

In [188]:
print(test_pred2.shape,test_id.shape)

(3492890, 15) (3492890,)


In [189]:
test_id.tail()

3492885    130787966
3492886    130787971
3492887    130787974
3492888    130788053
3492889    130788054
Name: object_id, dtype: int64

In [190]:
test_id = test_id.reset_index(drop=True)

In [191]:
test_id.index == test_pred2.index

array([ True,  True,  True, ...,  True,  True,  True])

In [192]:
%%time
test_pred = pd.concat([test_id,test_pred2],axis=1)

CPU times: user 87.9 ms, sys: 148 ms, total: 236 ms
Wall time: 234 ms


In [193]:
test_pred = test_pred[temp_columns]

In [194]:
test_pred.head()

Unnamed: 0,object_id,class_6,class_15,class_16,class_42,class_52,class_53,class_62,class_64,class_65,class_67,class_88,class_90,class_92,class_95,class_99
0,13,0.000179,0.001038,0.000141,0.805593,0.047541,0.00035,0.074855,4.2e-05,0.000131,0.002381,0.000263,0.066049,0.00027,0.001166,0.128398
1,14,0.000121,0.006711,0.000132,0.136203,0.039605,0.000256,0.033613,0.000156,0.00049,0.018491,0.001953,0.750983,0.00025,0.011037,0.154128
2,17,0.000263,0.016892,0.000293,0.139133,0.096388,0.000566,0.03768,0.004083,0.000686,0.061668,0.008164,0.603198,0.000369,0.030618,0.207193
3,23,0.000313,0.002297,0.000565,0.052961,0.022658,0.000584,0.090238,0.003904,0.000502,0.578036,0.001415,0.195498,0.000462,0.050568,0.21142
4,34,7.1e-05,0.001033,7.9e-05,0.047133,0.209952,0.000136,0.009144,2.7e-05,9.2e-05,0.004313,9.6e-05,0.727727,0.000104,9.3e-05,0.16261


In [195]:
print(test_pred.shape)

(3492890, 16)


In [196]:
%%time
test_pred.to_csv('test_pred_32.csv',index=False)

CPU times: user 1min 40s, sys: 1.95 s, total: 1min 42s
Wall time: 1min 42s


In [None]:
#!kaggle competitions submit -c PLAsTiCC-2018 -f test_pred_27.csv -m "Message"