<a href="https://colab.research.google.com/github/TanimotoRui/signate/blob/main/17_lgb_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ライブラリのインポート / データの読み込み

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import pandas as pd

pd.set_option('max_colwidth', 500)
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 500)

%matplotlib inline

from matplotlib import pyplot as plt
import matplotlib.ticker as mtick # For specifying the axes tick format 

import seaborn as sns
import re

import json, os, gc, math, time
import datetime
import collections
from tqdm import tqdm
import glob

from statistics import mean
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold

from sklearn import metrics
import time

import lightgbm as lgb

import warnings
warnings.filterwarnings("ignore")

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/My Drive/00_datascience/19_ufj_bank"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/00_datascience/19_ufj_bank


In [None]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')
sub = pd.read_csv('input/sample_submit.csv')

# bertによる特徴抽出

In [None]:
def remove_html(text):
    html_pattern = re.compile('<.*?>')
    return html_pattern.sub(r'', text)

In [None]:
for i in range(len(train)):
  train.loc[i, 'cleaned_text'] = remove_html(train.loc[i, 'html_content'])

for i in range(len(test)):
  test.loc[i, 'cleaned_text'] = remove_html(test.loc[i, 'html_content'])

In [None]:
#すでに抽出済みの特徴量を使用
text_train_df = pd.read_csv('input/05_seq_train_df.csv')
text_train_df.drop(['Unnamed: 0'], axis=1, inplace=True)
text_test_df = pd.read_csv('input/05_seq_test_df.csv')
text_test_df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [None]:
merge_text = pd.concat([text_train_df, text_test_df], ignore_index=True)

# ラベルデータの前処理

In [None]:
#学習しやすいようにtrain, testをマージ
merge_df = pd.concat([train, test], ignore_index=True)

In [None]:
#goalカラムの数値化
for i in range(len(merge_df)):
  price_list = re.findall(r"\d+", merge_df.loc[i, 'goal'])
  merge_df.loc[i, 'goal_min'] = price_list[0]
  if len(price_list) == 1:
    merge_df.loc[i, 'goal_max'] = price_list[0]
  else:
    merge_df.loc[i, 'goal_max'] = price_list[1]

In [None]:
#int型に変換
merge_df['goal_max'] = merge_df['goal_max'].astype(int)
merge_df['goal_min'] = merge_df['goal_min'].astype(int)

#1日あたりの目標金額の算出
merge_df['goal_per_day_max'] = merge_df['goal_max'] / merge_df['duration']
merge_df['goal_per_day_min'] = merge_df['goal_min'] / merge_df['duration']
#merge_df.head()

In [None]:
# ワンホットエンコーディング

dummy_cols = ['goal',	'country', 'category1',	'category2']
from sklearn.preprocessing import OneHotEncoder

dummy_df = pd.get_dummies(merge_df[dummy_cols])

In [None]:
merge_df_new = pd.merge(merge_df, dummy_df, left_index=True, right_index=True)

In [None]:
#ラベルエンコーディング
from sklearn.preprocessing import LabelEncoder

le_cols = ['goal', 'country', 'category1', 'category2']

#カテゴリ変数をループしてlabel encoding
for c in le_cols:
  le = LabelEncoder()
  le.fit(merge_df_new[c])
  merge_df_new[c] = le.transform(merge_df_new[c])

# tfidfによる特徴抽出

In [None]:
#tfidfによる特徴抽出
from sklearn.feature_extraction.text import TfidfVectorizer
tv = TfidfVectorizer()
features = tv.fit_transform(merge_df_new["cleaned_text"].fillna(""))

In [None]:
from sklearn.decomposition import TruncatedSVD, NMF, LatentDirichletAllocation
from sklearn.pipeline import Pipeline



tfidf_svd = Pipeline(steps=[
    ("TfidfVectorizer", TfidfVectorizer()),
    ("TruncatedSVD", TruncatedSVD(n_components=100, random_state=42))
])

features_svd = tfidf_svd.fit_transform(merge_df_new["cleaned_text"].fillna(""))

In [None]:
svd_df_merge = pd.DataFrame(features_svd)
svd_df_merge.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
0,0.294975,0.000612,0.023093,-0.043301,-0.00465,0.020541,-0.045336,0.02375,-0.013548,0.034169,-0.013015,0.006802,-0.034911,0.006794,-0.015047,0.005609,-0.006645,0.028476,0.020401,-0.021404,0.013798,0.013175,-0.021458,0.00786,-0.033133,0.021228,-0.028845,0.01822,0.040855,0.015536,-0.009352,-0.025442,-0.007781,0.021415,-0.003995,-0.002129,0.006594,-0.018173,-0.025319,-0.037783,0.027987,-0.042046,-0.017961,0.007527,-0.014963,0.028129,-0.023459,-0.019673,-0.003277,-0.039588,0.015771,-0.002947,0.044751,-0.01005,-0.024048,-0.011614,-0.02343,-0.030329,-0.001305,0.012928,0.024367,0.003222,0.029102,0.000107,0.024981,-0.015438,-0.007298,-0.024717,-0.01588,-0.007488,-0.009271,0.007005,0.014566,-0.031326,0.013497,-0.01296,-0.01224,-0.031298,-0.022263,0.014773,-0.003791,-0.008527,-0.014285,-2.7e-05,-0.010357,0.023047,-0.01188,-0.016816,-0.013937,-0.00833,0.016746,-0.007637,-0.006122,-0.005141,0.014197,-0.002046,0.005369,-0.0048,-0.021516,0.000758
1,0.163203,-0.003916,0.027252,0.018163,-0.00254,-0.027746,0.007868,0.058627,-0.076373,0.005526,0.001523,0.063076,0.065007,-0.036947,0.008218,-0.003941,0.08455,-0.047071,-0.007865,-0.024665,-0.022845,0.037984,0.000789,-0.017015,0.020834,-0.002827,0.013883,-0.020393,-0.010918,-0.013796,0.024166,0.002487,-0.000861,-0.031051,-0.004367,-0.01824,0.003508,-0.01332,-0.00116,0.019565,-0.003766,0.00677,0.0068,0.015419,-0.015384,0.009552,-0.021994,0.022982,0.007848,-0.000524,0.032245,-0.003441,0.021012,-0.015403,-0.023079,0.024831,-0.002129,-0.005817,-0.010186,0.009194,0.02607,0.012619,0.007577,0.002824,0.01972,-0.009614,0.014508,0.00881,0.012031,0.016641,-0.012554,-0.021404,-0.012819,0.021749,0.008886,-0.008196,0.005917,-0.008584,0.013794,-0.004741,-0.001814,-0.000274,0.018601,-0.005945,0.012709,-0.015027,0.023367,-0.002542,0.012418,-0.013684,0.009684,-0.030274,-0.001825,-0.025953,0.004676,-0.02431,-0.029303,0.023187,-0.023117,0.013129
2,0.416711,0.004678,-0.052108,-0.079082,-0.010637,0.082928,-0.018374,0.036624,-0.071622,-0.046464,-0.014945,0.041355,-0.02023,-0.01095,-0.087223,-0.007249,-0.030307,0.014107,-0.012528,-0.046732,-0.023661,0.035883,0.028221,-0.034219,-0.007161,-0.004547,0.046385,-0.009552,0.02552,-0.041551,0.013066,0.021094,0.029878,0.006288,-0.000849,-0.046704,-0.016616,-0.026287,-0.004538,0.00416,0.008717,0.002065,-0.04487,0.015324,0.036764,-0.038749,0.029809,-0.01962,0.012642,0.052022,0.012008,0.030016,0.04677,-0.004494,-0.028914,0.046057,0.028594,-0.005432,0.041683,-0.04366,-0.016415,-0.003372,0.040315,-0.002028,0.003212,0.026453,0.002681,0.016203,0.00495,-0.034195,0.01177,0.008469,-0.004297,0.001236,-0.025254,0.0113,-0.022994,0.02176,-0.022508,0.020674,-0.015725,-0.012703,-0.039726,-0.029907,-0.0099,0.017603,0.020235,0.002021,-0.010242,0.01385,-0.022286,-0.033044,0.070526,-0.007964,0.013953,0.036927,0.00291,-0.01023,-0.00497,-0.02813
3,0.354831,0.040357,-0.074435,-0.139759,-0.016493,0.005695,-0.074027,-0.054699,-0.033433,0.030256,0.012643,-0.062602,0.008236,-0.00022,-0.03251,-0.007413,-0.049098,0.035602,-0.013406,-0.027928,0.020493,-0.020341,0.06054,-0.026912,-0.018913,-0.024155,0.03455,-0.004458,-0.079302,0.025177,-0.036539,-0.026917,-0.002154,-0.015356,-0.00721,0.007557,0.001892,0.008017,-0.002577,0.028486,-0.033584,0.013869,-0.032748,0.004899,0.018109,0.003867,0.003197,-0.002485,-0.019856,-0.00288,0.02524,0.004297,-0.009714,0.023688,0.011628,0.002504,8e-06,0.000278,-0.022154,-0.011611,0.005533,-0.038461,0.021524,-0.008633,0.003877,-0.047406,0.027908,4.1e-05,0.038034,0.019338,0.013657,0.014267,0.05544,-0.032712,-0.025378,-0.009006,-0.012042,-0.034948,0.00029,-0.041465,0.001809,0.006383,0.014765,0.02563,0.01026,-0.040052,-0.014977,-0.003671,0.000366,-0.014751,0.012951,-0.014472,-0.012784,-0.003175,0.022642,-0.010334,0.008542,-0.005884,-0.028169,0.0009
4,0.260418,0.001437,0.023826,-0.039866,-0.012193,0.048334,0.003215,0.018025,0.000457,0.010451,-0.007498,0.005902,-0.013798,-0.001269,0.006445,-0.006212,-0.013353,-0.004642,-0.011441,0.004561,-0.020013,0.021934,-0.038513,-0.031869,0.021594,-0.043526,0.026722,0.028592,-0.051489,-0.027249,0.002086,0.008673,0.006422,0.004141,0.009734,-0.014023,0.000653,-0.018494,-0.017617,-0.011407,-0.015436,0.002867,-0.003034,-0.004306,-0.008132,0.010548,0.026769,0.00149,-0.004458,0.020306,-0.006473,0.003492,0.001217,0.01346,-0.032308,-0.006383,0.009282,0.01194,-0.003439,-0.003248,-0.005907,-0.015235,-0.008688,0.015696,-0.012893,-0.002453,0.012246,-0.021435,0.015108,0.019133,0.009312,0.011678,0.000823,-0.017628,0.000487,-0.023413,-0.001761,0.00352,0.03874,-0.009838,-0.013699,0.005064,0.013515,0.023001,-0.001761,-0.015497,0.030531,-0.009453,0.008935,0.00179,-0.006393,0.009243,0.004041,-0.001661,-0.004956,0.001733,0.001411,-0.000822,0.029325,0.038022


# w2vによる特徴抽出

https://qiita.com/propella/items/febc423998fd210800ca
よりフォーク

In [None]:
#単語に対して分散表現を与える
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(merge_df_new["cleaned_text"])]
model = Doc2Vec(documents, vector_size=100, window=5, min_count=1, workers=4)



In [None]:
#コサイン類似度が上位n個のものを抽出
n = 10
for i in range(len(merge_df_new)):
  print(i)
  for j in range(n):
    merge_df_new.loc[i, 'similar{}'.format(j)] = model.docvecs.most_similar(i)[j][0]

[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m
14591
14592
14593
14594
14595
14596
14597
14598
14599
14600
14601
14602
14603
14604
14605
14606
14607
14608
14609
14610
14611
14612
14613
14614
14615
14616
14617
14618
14619
14620
14621
14622
14623
14624
14625
14626
14627
14628
14629
14630
14631
14632
14633
14634
14635
14636
14637
14638
14639
14640
14641
14642
14643
14644
14645
14646
14647
14648
14649
14650
14651
14652
14653
14654
14655
14656
14657
14658
14659
14660
14661
14662
14663
14664
14665
14666
14667
14668
14669
14670
14671
14672
14673
14674
14675
14676
14677
14678
14679
14680
14681
14682
14683
14684
14685
14686
14687
14688
14689
14690
14691
14692
14693
14694
14695
14696
14697
14698
14699
14700
14701
14702
14703
14704
14705
14706
14707
14708
14709
14710
14711
14712
14713
14714
14715
14716
14717
14718
14719
14720
14721
14722
14723
14724
14725
14726
14727
14728
14729
14730
14731
14732
14733
14734
14735
14736
14737
14738
14739
14740
14741
14742
14743
14744
14745
14746
14747
14748
14749


In [None]:
merge_df_new.head()

Unnamed: 0,id,goal,country,duration,category1,category2,html_content,state,cleaned_text,goal_min,goal_max,goal_per_day_max,goal_per_day_min,goal_1-1000,goal_100000+,goal_10001-11000,goal_1001-2000,goal_11001-12000,goal_12001-13000,goal_13001-14000,goal_14001-15000,goal_15001-16000,goal_16001-17000,goal_17001-18000,goal_18001-19000,goal_19001-20000,goal_20001-21000,goal_2001-3000,goal_21001-22000,goal_22001-23000,goal_23001-24000,goal_24001-25000,goal_25001-26000,goal_26001-27000,goal_27001-28000,goal_28001-29000,goal_29001-30000,goal_30001-31000,goal_3001-4000,goal_31001-32000,goal_32001-33000,goal_33001-34000,goal_34001-35000,goal_35001-36000,goal_36001-37000,goal_37001-38000,goal_38001-39000,goal_39001-40000,goal_40001-41000,goal_4001-5000,goal_41001-42000,goal_42001-43000,goal_43001-44000,goal_44001-45000,goal_45001-46000,goal_46001-47000,goal_47001-48000,goal_48001-49000,goal_49001-50000,goal_50001-51000,goal_5001-6000,goal_51001-52000,goal_52001-53000,goal_53001-54000,goal_54001-55000,goal_55001-56000,goal_56001-57000,goal_57001-58000,goal_58001-59000,goal_59001-60000,goal_60001-61000,goal_6001-7000,goal_61001-62000,goal_62001-63000,goal_63001-64000,goal_64001-65000,goal_65001-66000,goal_66001-67000,goal_67001-68000,goal_68001-69000,goal_69001-70000,goal_70001-71000,goal_7001-8000,goal_71001-72000,goal_72001-73000,goal_73001-74000,goal_74001-75000,goal_75001-76000,goal_76001-77000,goal_77001-78000,goal_78001-79000,goal_79001-80000,goal_80001-81000,goal_8001-9000,goal_81001-82000,goal_82001-83000,goal_83001-84000,goal_84001-85000,goal_85001-86000,goal_86001-87000,goal_87001-88000,goal_88001-89000,goal_89001-90000,goal_90001-91000,goal_9001-10000,goal_91001-92000,goal_92001-93000,goal_93001-94000,goal_94001-95000,goal_95001-96000,goal_96001-97000,goal_97001-98000,goal_98001-99000,goal_99001-100000,country_AT,country_AU,country_BE,country_CA,country_CH,country_DE,country_DK,country_ES,country_FR,country_GB,country_HK,country_IE,country_IT,country_JP,country_LU,country_MX,country_NL,country_NO,country_NZ,country_SE,country_SG,country_US,category1_art,category1_comics,category1_crafts,category1_dance,category1_design,category1_fashion,category1_film & video,category1_food,category1_games,category1_journalism,category1_music,category1_photography,category1_publishing,category1_technology,category1_theater,category2_3d printing,category2_academic,category2_accessories,category2_action,category2_animals,category2_animation,category2_anthologies,category2_apparel,category2_apps,category2_architecture,category2_art books,category2_audio,category2_bacon,category2_blues,category2_calendars,category2_camera equipment,category2_candles,category2_ceramics,category2_children's books,category2_childrenswear,category2_chiptune,category2_civic design,category2_classical music,category2_comedy,category2_comic books,category2_community gardens,category2_conceptual art,category2_cookbooks,category2_country & folk,category2_couture,category2_crochet,category2_digital art,category2_diy,category2_diy electronics,category2_documentary,category2_drama,category2_drinks,category2_electronic music,category2_embroidery,category2_events,category2_experimental,category2_fabrication tools,category2_faith,category2_family,category2_fantasy,category2_farmer's markets,category2_farms,category2_festivals,category2_fiction,category2_fine art,category2_flight,category2_food trucks,category2_footwear,category2_gadgets,category2_gaming hardware,category2_glass,category2_graphic design,category2_graphic novels,category2_hardware,category2_hip-hop,category2_horror,category2_illustration,category2_immersive,category2_indie rock,category2_installations,category2_interactive design,category2_jazz,category2_jewelry,category2_kids,category2_knitting,category2_latin,category2_letterpress,category2_literary journals,category2_literary spaces,category2_live games,category2_makerspaces,category2_metal,category2_mixed media,category2_mobile games,category2_movie theaters,category2_music videos,category2_musical,category2_narrative film,category2_nature,category2_nonfiction,category2_painting,category2_people,category2_performance art,category2_performances,category2_periodicals,category2_pet fashion,category2_photo,category2_photobooks,category2_places,category2_playing cards,category2_plays,category2_poetry,category2_pop,category2_pottery,category2_print,category2_printing,category2_product design,category2_public art,category2_punk,category2_puzzles,category2_quilts,category2_r&b,category2_radio & podcasts,category2_ready-to-wear,category2_residencies,category2_restaurants,category2_robots,category2_rock,category2_romance,category2_science fiction,category2_sculpture,category2_shorts,category2_small batch,category2_social practice,category2_software,category2_sound,category2_space exploration,category2_spaces,category2_stationery,category2_tabletop games,category2_television,category2_textiles,category2_thrillers,category2_toys,category2_translations,category2_typography,category2_vegan,category2_video,category2_video art,category2_video games,category2_wearables,category2_weaving,category2_web,category2_webcomics,category2_webseries,category2_woodworking,category2_workshops,category2_world music,category2_young adult,category2_zines,similar0,similar1,similar2,similar3,similar4,similar5,similar6,similar7,similar8,similar9
0,train_00000,13,21,45,0,77,"<div class=""contents""><div><p><a href=""http://dummy.com"">http://dummy.com<p>In its first year, The Shillito's Elves Display won an international \ndesign award for Shillito's department store. The elves display is arts\n and crafts at its finest. The mixed media exhibit displays the talents\n of local fine arts graduates, and the display, while ""folksy"", is as \ntechnologically advanced as Disney World's famous ""It's a Small World"" \nride. </p><p>The Shillito's Elves attracted close to 100...",1.0,"http://dummy.comIn its first year, The Shillito's Elves Display won an international \ndesign award for Shillito's department store. The elves display is arts\n and crafts at its finest. The mixed media exhibit displays the talents\n of local fine arts graduates, and the display, while ""folksy"", is as \ntechnologically advanced as Disney World's famous ""It's a Small World"" \nride. The Shillito's Elves attracted close to 100,000 people each\n year. It was one of the most beloved Christmas ...",20001,21000,466.666667,444.466667,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18507.0,18351.0,18941.0,393.0,14123.0,15004.0,16388.0,9817.0,16943.0,58.0
1,train_00001,12,21,59,7,110,"<div class=""contents""><div><p>Cultural Pretzel Sports Bar is a place where people can come and watch their favorite local and world wide sports teams, while enjoying their favorite ethnic foods on a soft pretzel. Our menu includes a variety of appetizers and soft pretzels. Our pretzels include - Mexican Taco, Mexican Steak Fajita, Greek Gyro, Italian Sausage and Peppers, and American Steak Philly Soft Pretzels. With more to be added to the menu as the business grows.</p><p>Cultural Pretze...",0.0,"Cultural Pretzel Sports Bar is a place where people can come and watch their favorite local and world wide sports teams, while enjoying their favorite ethnic foods on a soft pretzel. Our menu includes a variety of appetizers and soft pretzels. Our pretzels include - Mexican Taco, Mexican Steak Fajita, Greek Gyro, Italian Sausage and Peppers, and American Steak Philly Soft Pretzels. With more to be added to the menu as the business grows.Cultural Pretzel plans to start with one Sports Bar ...",19001,20000,338.983051,322.050847,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19176.0,98.0,1146.0,15183.0,18387.0,18161.0,16815.0,17711.0,18409.0,19191.0
2,train_00002,14,21,38,0,87,"<div class=""contents""><div><p>I want to perform this piece guerilla style, off the back off a 24 Ft truck on Christmas Eve, amid last minute Christmas shopping in New York City.</p>\n<p>Our truck pulls in, the door rolls up and I perform. </p>\n<p>New York is a city where you just do it - no excuses. A place where the new becomes the normal real fast and where the artist is challenged to capture the attention and the hearts of the unshockable and unstoppable. In times of celebration and re...",0.0,"I want to perform this piece guerilla style, off the back off a 24 Ft truck on Christmas Eve, amid last minute Christmas shopping in New York City.\nOur truck pulls in, the door rolls up and I perform. \nNew York is a city where you just do it - no excuses. A place where the new becomes the normal real fast and where the artist is challenged to capture the attention and the hearts of the unshockable and unstoppable. In times of celebration and rest our intellectual faculties are more easil...",2001,3000,78.947368,52.657895,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,202.0,14.0,204.0,9.0,206.0,199.0,200.0,10.0,11.0,409.0
3,train_00003,3,21,30,0,77,"<div class=""contents""><div><div class=""template asset"" contenteditable=""false"" data-alt-text="""" data-caption="""" data-id=""_xxx_"">\n<figure>\n<img alt="""" class=""fit lazyload"" data-src=""http://dummy.com""/>\n</figure>\n</div>\n<h1 class=""page-anchor"" id=""_xxx_"">\n<figure>\n<img alt=""Canyon de Chelley, Dine' (Navajo) Reservation, Arizona. Photo by: Demian Dine' Yazhi'"" class=""fit lazyload"" data-src=""http://dummy.com""/>\n<figcaption class=""px2"">Canyon de Chelley, Dine' (Navajo) Reservation, Arizo...",1.0,"\n\n\n\n\n\n\n\nCanyon de Chelley, Dine' (Navajo) Reservation, Arizona. Photo by: Demian Dine' Yazhi'\n\n\nAnother goal of the project is to actively document the process through photography, digital video, and text/prose/poetry. By using a variety of media and content, I hope this will help the project to take a unique shape as the weeks unfold. Ultimately, the end goal of TRANSPLANT ///. is independent zine distribution and a potential gallery showing. At this point, my interests lie i...",1001,2000,66.666667,33.366667,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,222.0,432.0,651.0,190.0,220.0,228.0,392.0,310.0,312.0,25.0
4,train_00004,3,21,29,6,139,"<div class=""contents""><div><p>The story of the show, both on and off screen, is about daring to dream after something impossible.Even with an incredible amount of difficulty and opposition, the project continued to grow in size and depth. A year ago, we were finally able to release the pilot episode of Azusa Ghost Hunter's Society (check it out! <a href=""http://dummy.com"">\n<figure>\n<img alt="""" class=""fit lazyload"" data-src=""http://dummy.com""/>\n</figure>\n</div>\n<p>Which brings us to now...",1.0,"The story of the show, both on and off screen, is about daring to dream after something impossible.Even with an incredible amount of difficulty and opposition, the project continued to grow in size and depth. A year ago, we were finally able to release the pilot episode of Azusa Ghost Hunter's Society (check it out! \n\n\n\n\nWhich brings us to now! We have three more episodes in the works to finish out the season, and we need your help to bring them to life! All financial support will go t...",1001,2000,68.965517,34.517241,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,974.0,213.0,15984.0,4125.0,1755.0,17278.0,19041.0,18813.0,18173.0,3239.0


# データのマージ/kmeans用の前処理

In [None]:
#不要なカラムの定義
non_use_cols = ['id',	'html_content',	'cleaned_text', 'state']

#スケーリングするカラムの選択
sc_cols = ['duration', 'goal_max', 'goal_min', 'goal_per_day_max', 'goal_per_day_min']

In [None]:
#bertから抽出した特徴量を追加
full_merge_df = pd.merge(merge_df_new.drop(non_use_cols, axis=1), merge_text, left_index=True, right_index=True)

In [None]:
#tfidfから抽出した特徴量を追加
full_merge_df_fin = pd.merge(full_merge_df, svd_df_merge, left_index=True, right_index=True)

In [None]:
full_merge_df_fin.head()

Unnamed: 0,goal,country,duration,category1,category2,goal_min,goal_max,goal_per_day_max,goal_per_day_min,goal_1-1000,goal_100000+,goal_10001-11000,goal_1001-2000,goal_11001-12000,goal_12001-13000,goal_13001-14000,goal_14001-15000,goal_15001-16000,goal_16001-17000,goal_17001-18000,goal_18001-19000,goal_19001-20000,goal_20001-21000,goal_2001-3000,goal_21001-22000,goal_22001-23000,goal_23001-24000,goal_24001-25000,goal_25001-26000,goal_26001-27000,goal_27001-28000,goal_28001-29000,goal_29001-30000,goal_30001-31000,goal_3001-4000,goal_31001-32000,goal_32001-33000,goal_33001-34000,goal_34001-35000,goal_35001-36000,goal_36001-37000,goal_37001-38000,goal_38001-39000,goal_39001-40000,goal_40001-41000,goal_4001-5000,goal_41001-42000,goal_42001-43000,goal_43001-44000,goal_44001-45000,goal_45001-46000,goal_46001-47000,goal_47001-48000,goal_48001-49000,goal_49001-50000,goal_50001-51000,goal_5001-6000,goal_51001-52000,goal_52001-53000,goal_53001-54000,goal_54001-55000,goal_55001-56000,goal_56001-57000,goal_57001-58000,goal_58001-59000,goal_59001-60000,goal_60001-61000,goal_6001-7000,goal_61001-62000,goal_62001-63000,goal_63001-64000,goal_64001-65000,goal_65001-66000,goal_66001-67000,goal_67001-68000,goal_68001-69000,goal_69001-70000,goal_70001-71000,goal_7001-8000,goal_71001-72000,goal_72001-73000,goal_73001-74000,goal_74001-75000,goal_75001-76000,goal_76001-77000,goal_77001-78000,goal_78001-79000,goal_79001-80000,goal_80001-81000,goal_8001-9000,goal_81001-82000,goal_82001-83000,goal_83001-84000,goal_84001-85000,goal_85001-86000,goal_86001-87000,goal_87001-88000,goal_88001-89000,goal_89001-90000,goal_90001-91000,goal_9001-10000,goal_91001-92000,goal_92001-93000,goal_93001-94000,goal_94001-95000,goal_95001-96000,goal_96001-97000,goal_97001-98000,goal_98001-99000,goal_99001-100000,country_AT,country_AU,country_BE,country_CA,country_CH,country_DE,country_DK,country_ES,country_FR,country_GB,country_HK,country_IE,country_IT,country_JP,country_LU,country_MX,country_NL,country_NO,country_NZ,country_SE,country_SG,country_US,category1_art,category1_comics,category1_crafts,category1_dance,category1_design,category1_fashion,category1_film & video,category1_food,category1_games,category1_journalism,category1_music,category1_photography,category1_publishing,category1_technology,category1_theater,category2_3d printing,category2_academic,category2_accessories,category2_action,category2_animals,category2_animation,category2_anthologies,category2_apparel,category2_apps,category2_architecture,category2_art books,category2_audio,category2_bacon,category2_blues,category2_calendars,category2_camera equipment,category2_candles,category2_ceramics,category2_children's books,category2_childrenswear,category2_chiptune,category2_civic design,category2_classical music,category2_comedy,category2_comic books,category2_community gardens,category2_conceptual art,category2_cookbooks,category2_country & folk,category2_couture,category2_crochet,category2_digital art,category2_diy,category2_diy electronics,category2_documentary,category2_drama,category2_drinks,category2_electronic music,category2_embroidery,category2_events,category2_experimental,category2_fabrication tools,category2_faith,category2_family,category2_fantasy,category2_farmer's markets,category2_farms,category2_festivals,category2_fiction,category2_fine art,category2_flight,category2_food trucks,category2_footwear,category2_gadgets,category2_gaming hardware,category2_glass,category2_graphic design,category2_graphic novels,category2_hardware,category2_hip-hop,category2_horror,category2_illustration,category2_immersive,category2_indie rock,category2_installations,category2_interactive design,category2_jazz,category2_jewelry,category2_kids,category2_knitting,category2_latin,category2_letterpress,category2_literary journals,category2_literary spaces,category2_live games,category2_makerspaces,category2_metal,category2_mixed media,category2_mobile games,category2_movie theaters,category2_music videos,category2_musical,category2_narrative film,category2_nature,category2_nonfiction,category2_painting,category2_people,category2_performance art,category2_performances,category2_periodicals,category2_pet fashion,category2_photo,category2_photobooks,category2_places,category2_playing cards,category2_plays,category2_poetry,category2_pop,category2_pottery,category2_print,category2_printing,category2_product design,category2_public art,...,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
0,13,21,45,0,77,20001,21000,466.666667,444.466667,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.099016,-0.27689,0.432394,0.493577,-0.315419,-0.105057,-0.042713,-0.204413,-0.0653,0.050056,-0.029868,0.050241,0.272299,-0.072101,-0.081447,0.370532,-0.100922,-0.17103,0.023219,0.199858,-0.089891,0.492024,-0.169314,0.258255,0.045725,-0.120114,0.04264,0.040575,-0.100747,0.095728,0.043895,-0.016952,0.399043,0.316103,0.307979,0.151861,0.1196,-0.035822,0.532072,0.318842,0.169312,0.09744,0.037351,-0.058728,0.446208,0.114807,0.226325,0.105076,0.141254,0.558709,0.254437,0.223112,0.267748,-0.364477,-0.158809,-0.142169,0.094735,-0.123932,-0.123077,0.173399,0.084587,0.130867,-0.390244,-0.144823,0.017667,0.353554,-0.045962,-0.230578,-0.144372,0.172423,-0.01224,-0.084559,-0.281694,0.041238,-0.065108,-0.001608,0.004597,-0.102077,-0.024896,-0.224232,0.194239,-0.02384,0.034052,-0.087112,-0.001468,-0.163935,0.181688,0.06966,0.13559,-0.000271,0.286472,-0.167668,-0.170211,0.153974,0.140964,0.029608,0.163608,-0.138477,-0.26283,0.103486,-0.01619,0.348189,-0.282846,-0.099151,-0.202355,-0.119073,0.090693,0.160036,-0.095885,-0.360658,-0.144671,-0.282785,0.022838,0.098948,-0.155007,-0.055355,0.024589,0.01183,-0.020386,0.021952,-0.040813,0.125426,-0.077865,0.097894,-0.126713,-0.277077,-0.159203,-0.085657,0.065597,0.022468,0.020938,0.117016,0.039094,-0.148795,0.491972,-0.069884,-0.041319,0.079746,-0.269651,-0.151853,-0.194008,-0.131417,0.170059,-0.133536,0.008463,-0.054571,0.017246,0.081685,0.019783,-0.061422,0.294975,0.000612,0.023093,-0.043301,-0.00465,0.020541,-0.045336,0.02375,-0.013548,0.034169,-0.013015,0.006802,-0.034911,0.006794,-0.015047,0.005609,-0.006645,0.028476,0.020401,-0.021404,0.013798,0.013175,-0.021458,0.00786,-0.033133,0.021228,-0.028845,0.01822,0.040855,0.015536,-0.009352,-0.025442,-0.007781,0.021415,-0.003995,-0.002129,0.006594,-0.018173,-0.025319,-0.037783,0.027987,-0.042046,-0.017961,0.007527,-0.014963,0.028129,-0.023459,-0.019673,-0.003277,-0.039588,0.015771,-0.002947,0.044751,-0.01005,-0.024048,-0.011614,-0.02343,-0.030329,-0.001305,0.012928,0.024367,0.003222,0.029102,0.000107,0.024981,-0.015438,-0.007298,-0.024717,-0.01588,-0.007488,-0.009271,0.007005,0.014566,-0.031326,0.013497,-0.01296,-0.01224,-0.031298,-0.022263,0.014773,-0.003791,-0.008527,-0.014285,-2.7e-05,-0.010357,0.023047,-0.01188,-0.016816,-0.013937,-0.00833,0.016746,-0.007637,-0.006122,-0.005141,0.014197,-0.002046,0.005369,-0.0048,-0.021516,0.000758
1,12,21,59,7,110,19001,20000,338.983051,322.050847,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.048337,-0.314898,0.409534,0.356073,-0.272879,-0.021678,0.030351,-0.052478,-0.14031,0.097961,0.092207,-0.010259,0.403746,-0.060153,-0.061244,0.21352,-0.252267,-0.081116,-0.041176,0.028254,-0.086761,0.343052,-0.119474,0.124151,0.064441,-0.018781,-0.080492,0.125397,-0.2552,0.011655,-0.112948,0.096387,0.009225,0.212272,0.385499,0.118064,0.20665,-0.092136,0.444054,0.124852,-0.006826,-0.028825,-0.092556,0.061963,0.2655,-0.13708,0.121477,-0.020793,0.08658,0.389501,0.214367,0.291178,0.389282,-0.41088,-0.328601,0.012085,-0.014353,-0.253426,0.028698,0.404181,-0.056057,0.076601,-0.204445,0.07409,0.052101,0.192723,-0.199436,0.057413,-0.049738,0.360082,0.009528,-0.052249,-0.243141,-0.054949,0.075687,0.105598,-0.225232,-0.047159,-0.155283,-0.079932,-0.143969,-0.214284,0.222476,-0.236891,0.10003,-0.088848,0.222703,-0.016411,0.157261,-0.11518,0.254034,-0.277487,0.067499,0.004428,0.19827,-0.035823,-0.066045,0.139501,-0.251705,-0.011609,-0.129165,0.15137,-0.311153,-0.034188,-0.059417,-0.049409,0.072307,0.084273,-0.040405,-0.237866,-0.067265,-0.227875,0.017409,0.219331,-0.303999,0.08434,-0.01513,0.056093,-0.000501,0.039358,0.003777,-0.001044,0.040922,0.028158,-0.107737,-0.268793,-0.110241,0.119759,0.048302,0.077601,0.061099,0.14612,0.029014,0.057201,0.470359,-0.038782,-0.006706,-0.034757,-0.443099,-0.05153,-0.163835,-0.082303,0.05906,0.098551,-0.04358,-0.05922,-0.109869,0.091215,0.038218,-0.165437,0.163203,-0.003916,0.027252,0.018163,-0.00254,-0.027746,0.007868,0.058627,-0.076373,0.005526,0.001523,0.063076,0.065007,-0.036947,0.008218,-0.003941,0.08455,-0.047071,-0.007865,-0.024665,-0.022845,0.037984,0.000789,-0.017015,0.020834,-0.002827,0.013883,-0.020393,-0.010918,-0.013796,0.024166,0.002487,-0.000861,-0.031051,-0.004367,-0.01824,0.003508,-0.01332,-0.00116,0.019565,-0.003766,0.00677,0.0068,0.015419,-0.015384,0.009552,-0.021994,0.022982,0.007848,-0.000524,0.032245,-0.003441,0.021012,-0.015403,-0.023079,0.024831,-0.002129,-0.005817,-0.010186,0.009194,0.02607,0.012619,0.007577,0.002824,0.01972,-0.009614,0.014508,0.00881,0.012031,0.016641,-0.012554,-0.021404,-0.012819,0.021749,0.008886,-0.008196,0.005917,-0.008584,0.013794,-0.004741,-0.001814,-0.000274,0.018601,-0.005945,0.012709,-0.015027,0.023367,-0.002542,0.012418,-0.013684,0.009684,-0.030274,-0.001825,-0.025953,0.004676,-0.02431,-0.029303,0.023187,-0.023117,0.013129
2,14,21,38,0,87,2001,3000,78.947368,52.657895,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.02017,-0.283541,0.464422,0.342346,-0.283308,-0.149057,-0.039157,-0.095827,-0.102801,0.093466,-0.021114,-0.049955,0.457888,-0.106103,0.174561,0.401628,-0.185652,-0.075269,-0.072373,0.176677,-0.131597,0.4035,-0.350533,0.437464,-0.003757,-0.094513,0.098958,0.031341,-0.008934,0.040028,-0.035853,0.049786,0.253934,0.417872,0.331323,0.109843,0.310389,0.196955,0.532525,0.309496,0.153459,0.283344,0.165395,-0.199057,0.28388,0.086808,0.360177,0.338971,-0.072918,0.687798,0.225192,0.373279,0.413552,-0.435887,-0.299448,0.085308,0.190575,-0.346351,-0.100857,-0.107976,0.105638,-0.000752,-0.409167,-0.208216,0.041769,0.432589,0.077675,-0.086776,-0.217319,0.193178,-0.019456,-0.100732,-0.432224,-0.131132,-0.025486,-0.100318,0.038829,-0.056264,-0.143085,-0.242891,-0.038077,0.183769,0.228161,-0.057335,-0.120349,-0.175932,0.200536,-0.013608,0.315079,-0.026398,0.23529,-0.043466,-0.197427,0.123099,0.189525,-0.103015,-0.005976,0.202459,-0.18738,-0.006165,0.088286,0.273363,-0.396318,0.072737,-0.199762,-0.040016,0.035863,0.000122,-0.091035,-0.254547,-0.336109,-0.343129,-0.056594,0.107794,-0.299168,0.003168,0.153805,0.274932,-0.057594,0.078457,-0.035131,0.332087,-0.006184,-0.033015,0.008405,-0.214106,-0.258577,0.124668,-0.177172,-0.009706,0.205013,0.303661,-0.056582,-0.082093,0.437609,-0.185092,-0.262578,-0.058233,-0.200374,-0.38604,-0.144589,-0.05326,0.115027,-0.190575,0.018579,-0.334871,0.025476,-0.040297,0.028787,-0.025234,0.416711,0.004678,-0.052108,-0.079082,-0.010637,0.082928,-0.018374,0.036624,-0.071622,-0.046464,-0.014945,0.041355,-0.02023,-0.01095,-0.087223,-0.007249,-0.030307,0.014107,-0.012528,-0.046732,-0.023661,0.035883,0.028221,-0.034219,-0.007161,-0.004547,0.046385,-0.009552,0.02552,-0.041551,0.013066,0.021094,0.029878,0.006288,-0.000849,-0.046704,-0.016616,-0.026287,-0.004538,0.00416,0.008717,0.002065,-0.04487,0.015324,0.036764,-0.038749,0.029809,-0.01962,0.012642,0.052022,0.012008,0.030016,0.04677,-0.004494,-0.028914,0.046057,0.028594,-0.005432,0.041683,-0.04366,-0.016415,-0.003372,0.040315,-0.002028,0.003212,0.026453,0.002681,0.016203,0.00495,-0.034195,0.01177,0.008469,-0.004297,0.001236,-0.025254,0.0113,-0.022994,0.02176,-0.022508,0.020674,-0.015725,-0.012703,-0.039726,-0.029907,-0.0099,0.017603,0.020235,0.002021,-0.010242,0.01385,-0.022286,-0.033044,0.070526,-0.007964,0.013953,0.036927,0.00291,-0.01023,-0.00497,-0.02813
3,3,21,30,0,77,1001,2000,66.666667,33.366667,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.084124,-0.114182,0.534585,0.501327,-0.291453,-0.174247,-0.175823,-0.19591,-0.080897,-0.088237,0.172489,-0.046042,0.497426,0.016538,0.003041,0.33433,-0.215311,-0.151867,0.031725,0.214983,-0.121652,0.336897,-0.251209,0.238166,0.039906,-0.011877,0.006651,0.051042,-0.009679,-0.068706,-0.270961,0.279052,0.203568,0.308887,0.520364,0.093526,0.312526,0.069493,0.502856,0.217217,0.2045,0.218722,-0.094464,-0.109029,0.280555,0.086552,0.423173,0.196197,0.02849,0.620524,0.325132,0.249123,0.370745,-0.504188,-0.291015,-0.107864,0.148325,-0.407765,0.032222,-0.029692,0.248889,0.238606,-0.444766,-0.170246,-0.161998,0.437545,0.123714,-0.063325,-0.088773,0.007023,-0.073526,-0.288811,-0.237271,-0.038173,0.082235,-0.283352,0.036181,-0.072218,-0.003539,-0.226531,0.022845,-0.039846,0.17423,-0.061211,-0.011614,-0.19911,0.288792,0.153029,0.381599,-0.024611,0.083327,-0.130643,-0.033748,0.200941,0.241217,0.098606,0.01998,0.246637,-0.313034,0.010752,0.08923,0.187483,-0.491927,0.01097,0.155175,0.081185,0.020089,0.160122,-0.172304,-0.268231,-0.203885,-0.23484,-0.01174,0.315742,-0.406544,-0.086321,0.083227,0.101767,-0.137597,0.075571,0.12997,0.212556,0.064413,0.153079,-0.195678,-0.45485,-0.217247,0.323315,-0.110531,0.049794,0.051333,0.201352,0.045719,-0.057521,0.406207,-0.169024,-0.152101,-0.187742,-0.298506,-0.254729,-0.033543,-0.07083,-0.021828,-0.386574,-0.217737,-0.196374,-0.018924,-0.112999,-0.021013,-0.094477,0.354831,0.040357,-0.074435,-0.139759,-0.016493,0.005695,-0.074027,-0.054699,-0.033433,0.030256,0.012643,-0.062602,0.008236,-0.00022,-0.03251,-0.007413,-0.049098,0.035602,-0.013406,-0.027928,0.020493,-0.020341,0.06054,-0.026912,-0.018913,-0.024155,0.03455,-0.004458,-0.079302,0.025177,-0.036539,-0.026917,-0.002154,-0.015356,-0.00721,0.007557,0.001892,0.008017,-0.002577,0.028486,-0.033584,0.013869,-0.032748,0.004899,0.018109,0.003867,0.003197,-0.002485,-0.019856,-0.00288,0.02524,0.004297,-0.009714,0.023688,0.011628,0.002504,8e-06,0.000278,-0.022154,-0.011611,0.005533,-0.038461,0.021524,-0.008633,0.003877,-0.047406,0.027908,4.1e-05,0.038034,0.019338,0.013657,0.014267,0.05544,-0.032712,-0.025378,-0.009006,-0.012042,-0.034948,0.00029,-0.041465,0.001809,0.006383,0.014765,0.02563,0.01026,-0.040052,-0.014977,-0.003671,0.000366,-0.014751,0.012951,-0.014472,-0.012784,-0.003175,0.022642,-0.010334,0.008542,-0.005884,-0.028169,0.0009
4,3,21,29,6,139,1001,2000,68.965517,34.517241,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.078654,-0.185649,0.434834,0.376679,-0.240298,-0.185552,-0.061799,-0.244584,-0.093922,-0.074016,0.123118,-0.115381,0.361706,-0.043762,0.091367,0.342106,-0.042614,0.017608,0.016053,0.206972,0.058559,0.394547,-0.007321,0.125167,0.008275,-0.147875,0.174358,0.10849,0.119163,0.049783,-0.358355,0.176943,0.290512,0.499278,0.321621,0.240702,0.232741,-0.175735,0.449033,0.225973,0.05352,0.280643,-0.101096,-0.06822,0.192382,0.002725,0.2517,0.186946,0.015407,0.465305,0.179443,0.036956,0.351708,-0.327923,-0.201456,-0.02685,0.200876,-0.280625,0.022876,0.00977,0.207958,0.177905,-0.254566,-0.252076,0.049693,0.263227,0.175732,-0.16274,-0.242306,0.044779,-0.103326,0.015833,-0.432321,-0.161794,-0.040915,-0.086671,-0.027571,-0.008098,-0.008049,-0.293039,0.099849,-0.104121,0.110217,-0.004879,-0.151635,-0.194842,0.291665,0.154227,0.389608,-0.043151,0.178419,-0.002178,-0.196511,0.044351,0.174697,-0.004709,-0.022601,0.171543,-0.304055,-0.063062,0.072249,0.333934,-0.378038,0.080425,0.008151,-0.007922,0.172276,0.166345,-0.138192,-0.195835,-0.20264,-0.12837,0.076689,0.180041,-0.266202,0.127223,-0.073341,-0.002601,-0.15531,-0.127922,-0.028397,0.140795,-0.049614,0.201103,-0.138729,-0.424335,-0.16845,0.096977,-0.187073,-0.00348,0.102417,0.17349,0.009545,-0.016481,0.395774,0.09517,-0.239783,-0.127131,-0.151571,-0.25196,-0.230313,-0.175219,0.085709,-0.371372,-0.141627,-0.16327,-0.156299,0.161962,0.167632,-0.225799,0.260418,0.001437,0.023826,-0.039866,-0.012193,0.048334,0.003215,0.018025,0.000457,0.010451,-0.007498,0.005902,-0.013798,-0.001269,0.006445,-0.006212,-0.013353,-0.004642,-0.011441,0.004561,-0.020013,0.021934,-0.038513,-0.031869,0.021594,-0.043526,0.026722,0.028592,-0.051489,-0.027249,0.002086,0.008673,0.006422,0.004141,0.009734,-0.014023,0.000653,-0.018494,-0.017617,-0.011407,-0.015436,0.002867,-0.003034,-0.004306,-0.008132,0.010548,0.026769,0.00149,-0.004458,0.020306,-0.006473,0.003492,0.001217,0.01346,-0.032308,-0.006383,0.009282,0.01194,-0.003439,-0.003248,-0.005907,-0.015235,-0.008688,0.015696,-0.012893,-0.002453,0.012246,-0.021435,0.015108,0.019133,0.009312,0.011678,0.000823,-0.017628,0.000487,-0.023413,-0.001761,0.00352,0.03874,-0.009838,-0.013699,0.005064,0.013515,0.023001,-0.001761,-0.015497,0.030531,-0.009453,0.008935,0.00179,-0.006393,0.009243,0.004041,-0.001661,-0.004956,0.001733,0.001411,-0.000822,0.029325,0.038022


In [None]:
#sc_colsのスケーリング
from sklearn.preprocessing import StandardScaler

for c in sc_cols:
  std = StandardScaler()
  full_merge_df_fin[c] = std.fit_transform(pd.DataFrame(full_merge_df_fin[c]))

# kmeansによるクラスタリング

In [None]:
from sklearn.cluster import KMeans
non_use_cols_for_km = [
    'goal', 
    'country', 
    'category1', 
    'category2', 
    'similar0', 
    'similar1', 
    'similar2', 
    'similar3', 
    'similar4', 
    'similar5', 
    'similar6', 
    'similar7', 
    'similar8', 
    'similar9']

In [None]:
# エルボー方による推定。クラスター数を1から20に増やして、それぞれの距離の総和を求める
"""
dist_list =[]
for i in range(1,50):
    print('今はクラスター数{}のクラスタリングを実行しています。'.format(i))
    kmeans= KMeans(n_clusters=i, init='random', random_state=0)
    kmeans.fit(full_merge_df_fin.drop(non_use_cols_for_km, axis=1))
    dist_list.append(kmeans.inertia_)
    
# グラフを表示
plt.plot(range(1,50), dist_list,marker='+')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')
"""

"\ndist_list =[]\nfor i in range(1,50):\n    print('今はクラスター数{}のクラスタリングを実行しています。'.format(i))\n    kmeans= KMeans(n_clusters=i, init='random', random_state=0)\n    kmeans.fit(full_merge_df_fin.drop(non_use_cols_for_km, axis=1))\n    dist_list.append(kmeans.inertia_)\n    \n# グラフを表示\nplt.plot(range(1,50), dist_list,marker='+')\nplt.xlabel('Number of clusters')\nplt.ylabel('Distortion')\n"

In [None]:
#クラスタリング
clusters_list=[5, 10, 15, 20]

for n_clusters in clusters_list:
  print('今クラスター数[{}]のクラスタリングをしています。'.format(n_clusters))
  kmeans = KMeans(init='random', n_clusters=n_clusters, random_state=0)
  kmeans.fit(full_merge_df_fin.drop(non_use_cols_for_km, axis=1))
  full_merge_df_fin['cluster_number_{}'.format(n_clusters)] =pd.Series(kmeans.labels_, name='cluster_number_{}'.format(5))

今クラスター数[5]のクラスタリングをしています。
今クラスター数[10]のクラスタリングをしています。
今クラスター数[15]のクラスタリングをしています。
今クラスター数[20]のクラスタリングをしています。


# aggrigation特徴量の作成

In [None]:
#aggrigationの際にgroup化するカラム
group_cols = ['goal',	'country', 'category1',	'category2', 'cluster_number_5', 'cluster_number_10', 'cluster_number_15', 'cluster_number_20', 'similar0', 'similar1', 'similar2', 'similar3', 'similar4', 'similar5', 'similar6', 'similar7', 'similar8', 'similar9']

In [None]:
#関数定義
def agg_func_duration(input_df, cols):
  agg_func = ["mean", "median", "max", "min", "std"]
  tmp = input_df.groupby([c])["duration"].agg(agg_func)
  output_df = pd.merge(input_df, tmp, how="left", on=[c])[agg_func].add_suffix("_agg_func_duration")

  return output_df

def agg_func_state(input_df, cols):
  agg_func = ["mean", "median", "max", "min", "std"]
  tmp = input_df.groupby([c])["duration"].agg(agg_func)
  output_df = pd.merge(input_df, tmp, how="left", on=[c])[agg_func].add_suffix("_agg_func_state")

  return output_df

def agg_func_goal_per_day_max(input_df, cols):
  agg_func = ["mean", "median", "max", "min", "std"]
  tmp = input_df.groupby([c])["goal_per_day_max"].agg(agg_func)
  output_df = pd.merge(input_df, tmp, how="left", on=[c])[agg_func].add_suffix("_agg_func_state")

  return output_df

def agg_func_goal_per_day_min(input_df, cols):
  agg_func = ["mean", "median", "max", "min", "std"]
  tmp = input_df.groupby([c])["goal_per_day_min"].agg(agg_func)
  output_df = pd.merge(input_df, tmp, how="left", on=[c])[agg_func].add_suffix("_agg_func_state")

  return output_df

In [None]:
#実行の準備
full_merge_df_fin["state"] = train['state']
agg_df = agg_func_duration(full_merge_df_fin, group_cols[0])

In [None]:
#関数の実行
for c in group_cols[1:len(group_cols)]:
  agg_df = pd.merge(agg_df, agg_func_duration(full_merge_df_fin, c), left_index=True, right_index=True)

for c in group_cols:
  agg_df = pd.merge(agg_df, agg_func_state(full_merge_df_fin, c), left_index=True, right_index=True)

for c in group_cols:
  agg_df = pd.merge(agg_df, agg_func_goal_per_day_max(full_merge_df_fin, c), left_index=True, right_index=True)

for c in group_cols:
  agg_df = pd.merge(agg_df, agg_func_goal_per_day_min(full_merge_df_fin, c), left_index=True, right_index=True)

In [None]:
agg_df

Unnamed: 0,mean_agg_func_duration_x,median_agg_func_duration_x,max_agg_func_duration_x,min_agg_func_duration_x,std_agg_func_duration_x,mean_agg_func_duration_y,median_agg_func_duration_y,max_agg_func_duration_y,min_agg_func_duration_y,std_agg_func_duration_y,mean_agg_func_duration_x.1,median_agg_func_duration_x.1,max_agg_func_duration_x.1,min_agg_func_duration_x.1,std_agg_func_duration_x.1,mean_agg_func_duration_y.1,median_agg_func_duration_y.1,max_agg_func_duration_y.1,min_agg_func_duration_y.1,std_agg_func_duration_y.1,mean_agg_func_duration_x.2,median_agg_func_duration_x.2,max_agg_func_duration_x.2,min_agg_func_duration_x.2,std_agg_func_duration_x.2,mean_agg_func_duration_y.2,median_agg_func_duration_y.2,max_agg_func_duration_y.2,min_agg_func_duration_y.2,std_agg_func_duration_y.2,mean_agg_func_duration_x.3,median_agg_func_duration_x.3,max_agg_func_duration_x.3,min_agg_func_duration_x.3,std_agg_func_duration_x.3,mean_agg_func_duration_y.3,median_agg_func_duration_y.3,max_agg_func_duration_y.3,min_agg_func_duration_y.3,std_agg_func_duration_y.3,mean_agg_func_duration_x.4,median_agg_func_duration_x.4,max_agg_func_duration_x.4,min_agg_func_duration_x.4,std_agg_func_duration_x.4,mean_agg_func_duration_y.4,median_agg_func_duration_y.4,max_agg_func_duration_y.4,min_agg_func_duration_y.4,std_agg_func_duration_y.4,mean_agg_func_duration_x.5,median_agg_func_duration_x.5,max_agg_func_duration_x.5,min_agg_func_duration_x.5,std_agg_func_duration_x.5,mean_agg_func_duration_y.5,median_agg_func_duration_y.5,max_agg_func_duration_y.5,min_agg_func_duration_y.5,std_agg_func_duration_y.5,mean_agg_func_duration_x.6,median_agg_func_duration_x.6,max_agg_func_duration_x.6,min_agg_func_duration_x.6,std_agg_func_duration_x.6,mean_agg_func_duration_y.6,median_agg_func_duration_y.6,max_agg_func_duration_y.6,min_agg_func_duration_y.6,std_agg_func_duration_y.6,mean_agg_func_duration_x.7,median_agg_func_duration_x.7,max_agg_func_duration_x.7,min_agg_func_duration_x.7,std_agg_func_duration_x.7,mean_agg_func_duration_y.7,median_agg_func_duration_y.7,max_agg_func_duration_y.7,min_agg_func_duration_y.7,std_agg_func_duration_y.7,mean_agg_func_duration_x.8,median_agg_func_duration_x.8,max_agg_func_duration_x.8,min_agg_func_duration_x.8,std_agg_func_duration_x.8,mean_agg_func_duration_y.8,median_agg_func_duration_y.8,max_agg_func_duration_y.8,min_agg_func_duration_y.8,std_agg_func_duration_y.8,mean_agg_func_state_x,median_agg_func_state_x,max_agg_func_state_x,min_agg_func_state_x,std_agg_func_state_x,mean_agg_func_state_y,median_agg_func_state_y,max_agg_func_state_y,min_agg_func_state_y,std_agg_func_state_y,mean_agg_func_state_x.1,median_agg_func_state_x.1,max_agg_func_state_x.1,min_agg_func_state_x.1,std_agg_func_state_x.1,mean_agg_func_state_y.1,median_agg_func_state_y.1,max_agg_func_state_y.1,min_agg_func_state_y.1,std_agg_func_state_y.1,mean_agg_func_state_x.2,median_agg_func_state_x.2,max_agg_func_state_x.2,min_agg_func_state_x.2,std_agg_func_state_x.2,mean_agg_func_state_y.2,median_agg_func_state_y.2,max_agg_func_state_y.2,min_agg_func_state_y.2,std_agg_func_state_y.2,mean_agg_func_state_x.3,median_agg_func_state_x.3,max_agg_func_state_x.3,min_agg_func_state_x.3,std_agg_func_state_x.3,mean_agg_func_state_y.3,median_agg_func_state_y.3,max_agg_func_state_y.3,min_agg_func_state_y.3,std_agg_func_state_y.3,mean_agg_func_state_x.4,median_agg_func_state_x.4,max_agg_func_state_x.4,min_agg_func_state_x.4,std_agg_func_state_x.4,mean_agg_func_state_y.4,median_agg_func_state_y.4,max_agg_func_state_y.4,min_agg_func_state_y.4,std_agg_func_state_y.4,mean_agg_func_state_x.5,median_agg_func_state_x.5,max_agg_func_state_x.5,min_agg_func_state_x.5,std_agg_func_state_x.5,mean_agg_func_state_y.5,median_agg_func_state_y.5,max_agg_func_state_y.5,min_agg_func_state_y.5,std_agg_func_state_y.5,mean_agg_func_state_x.6,median_agg_func_state_x.6,max_agg_func_state_x.6,min_agg_func_state_x.6,std_agg_func_state_x.6,mean_agg_func_state_y.6,median_agg_func_state_y.6,max_agg_func_state_y.6,min_agg_func_state_y.6,std_agg_func_state_y.6,mean_agg_func_state_x.7,median_agg_func_state_x.7,max_agg_func_state_x.7,min_agg_func_state_x.7,std_agg_func_state_x.7,mean_agg_func_state_y.7,median_agg_func_state_y.7,max_agg_func_state_y.7,min_agg_func_state_y.7,std_agg_func_state_y.7,mean_agg_func_state_x.8,median_agg_func_state_x.8,max_agg_func_state_x.8,min_agg_func_state_x.8,std_agg_func_state_x.8,mean_agg_func_state_y.8,median_agg_func_state_y.8,max_agg_func_state_y.8,min_agg_func_state_y.8,std_agg_func_state_y.8,mean_agg_func_state_x.9,median_agg_func_state_x.9,max_agg_func_state_x.9,min_agg_func_state_x.9,std_agg_func_state_x.9,mean_agg_func_state_y.9,median_agg_func_state_y.9,max_agg_func_state_y.9,min_agg_func_state_y.9,std_agg_func_state_y.9,mean_agg_func_state_x.10,median_agg_func_state_x.10,max_agg_func_state_x.10,min_agg_func_state_x.10,std_agg_func_state_x.10,mean_agg_func_state_y.10,median_agg_func_state_y.10,max_agg_func_state_y.10,min_agg_func_state_y.10,std_agg_func_state_y.10,mean_agg_func_state_x.11,median_agg_func_state_x.11,max_agg_func_state_x.11,min_agg_func_state_x.11,std_agg_func_state_x.11,mean_agg_func_state_y.11,median_agg_func_state_y.11,max_agg_func_state_y.11,min_agg_func_state_y.11,std_agg_func_state_y.11,mean_agg_func_state_x.12,median_agg_func_state_x.12,max_agg_func_state_x.12,min_agg_func_state_x.12,std_agg_func_state_x.12,mean_agg_func_state_y.12,median_agg_func_state_y.12,max_agg_func_state_y.12,min_agg_func_state_y.12,std_agg_func_state_y.12,mean_agg_func_state_x.13,median_agg_func_state_x.13,max_agg_func_state_x.13,min_agg_func_state_x.13,std_agg_func_state_x.13,mean_agg_func_state_y.13,median_agg_func_state_y.13,max_agg_func_state_y.13,min_agg_func_state_y.13,std_agg_func_state_y.13,mean_agg_func_state_x.14,median_agg_func_state_x.14,max_agg_func_state_x.14,min_agg_func_state_x.14,std_agg_func_state_x.14,mean_agg_func_state_y.14,median_agg_func_state_y.14,max_agg_func_state_y.14,min_agg_func_state_y.14,std_agg_func_state_y.14,mean_agg_func_state_x.15,median_agg_func_state_x.15,max_agg_func_state_x.15,min_agg_func_state_x.15,std_agg_func_state_x.15,mean_agg_func_state_y.15,median_agg_func_state_y.15,max_agg_func_state_y.15,min_agg_func_state_y.15,std_agg_func_state_y.15,mean_agg_func_state_x.16,median_agg_func_state_x.16,max_agg_func_state_x.16,min_agg_func_state_x.16,std_agg_func_state_x.16,mean_agg_func_state_y.16,median_agg_func_state_y.16,max_agg_func_state_y.16,min_agg_func_state_y.16,std_agg_func_state_y.16,mean_agg_func_state_x.17,median_agg_func_state_x.17,max_agg_func_state_x.17,min_agg_func_state_x.17,std_agg_func_state_x.17,mean_agg_func_state_y.17,median_agg_func_state_y.17,max_agg_func_state_y.17,min_agg_func_state_y.17,std_agg_func_state_y.17,mean_agg_func_state_x.18,median_agg_func_state_x.18,max_agg_func_state_x.18,min_agg_func_state_x.18,std_agg_func_state_x.18,mean_agg_func_state_y.18,median_agg_func_state_y.18,max_agg_func_state_y.18,min_agg_func_state_y.18,std_agg_func_state_y.18,mean_agg_func_state_x.19,median_agg_func_state_x.19,max_agg_func_state_x.19,min_agg_func_state_x.19,std_agg_func_state_x.19,mean_agg_func_state_y.19,median_agg_func_state_y.19,max_agg_func_state_y.19,min_agg_func_state_y.19,std_agg_func_state_y.19,mean_agg_func_state_x.20,median_agg_func_state_x.20,max_agg_func_state_x.20,min_agg_func_state_x.20,std_agg_func_state_x.20,mean_agg_func_state_y.20,median_agg_func_state_y.20,max_agg_func_state_y.20,min_agg_func_state_y.20,std_agg_func_state_y.20,mean_agg_func_state_x.21,median_agg_func_state_x.21,max_agg_func_state_x.21,min_agg_func_state_x.21,std_agg_func_state_x.21,mean_agg_func_state_y.21,median_agg_func_state_y.21,max_agg_func_state_y.21,min_agg_func_state_y.21,std_agg_func_state_y.21,mean_agg_func_state_x.22,median_agg_func_state_x.22,max_agg_func_state_x.22,min_agg_func_state_x.22,std_agg_func_state_x.22,mean_agg_func_state_y.22,median_agg_func_state_y.22,max_agg_func_state_y.22,min_agg_func_state_y.22,std_agg_func_state_y.22,mean_agg_func_state_x.23,median_agg_func_state_x.23,max_agg_func_state_x.23,min_agg_func_state_x.23,std_agg_func_state_x.23,mean_agg_func_state_y.23,median_agg_func_state_y.23,max_agg_func_state_y.23,min_agg_func_state_y.23,std_agg_func_state_y.23,mean_agg_func_state_x.24,median_agg_func_state_x.24,max_agg_func_state_x.24,min_agg_func_state_x.24,std_agg_func_state_x.24,mean_agg_func_state_y.24,median_agg_func_state_y.24,max_agg_func_state_y.24,min_agg_func_state_y.24,std_agg_func_state_y.24,mean_agg_func_state_x.25,median_agg_func_state_x.25,max_agg_func_state_x.25,min_agg_func_state_x.25,std_agg_func_state_x.25,mean_agg_func_state_y.25,median_agg_func_state_y.25,max_agg_func_state_y.25,min_agg_func_state_y.25,std_agg_func_state_y.25,mean_agg_func_state_x.26,median_agg_func_state_x.26,max_agg_func_state_x.26,min_agg_func_state_x.26,std_agg_func_state_x.26,mean_agg_func_state_y.26,median_agg_func_state_y.26,max_agg_func_state_y.26,min_agg_func_state_y.26,std_agg_func_state_y.26
0,1.020501,1.020501,1.020501,1.020501,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,2.048082,2.261207,4.742618,0.606933,0.569379,0.790878,0.979145,3.171058,-2.618901,1.233982,1.020501,1.020501,1.020501,1.020501,,0.209907,-0.137490,1.020501,-0.220204,0.571264,0.606933,0.400149,2.261207,-0.633772,1.307818,1.020501,1.020501,1.020501,1.020501,,0.441506,0.441506,1.020501,-0.137490,0.818824,-0.267469,-0.385631,1.020501,-1.460909,0.978515,1.020501,1.020501,1.020501,1.020501,,-0.096133,-0.220204,1.020501,-0.964627,0.822991,-0.468345,-0.964627,1.020501,-1.460909,1.313039,1.020501,1.020501,1.020501,1.020501,,0.258354,-0.220204,2.261207,-0.964627,0.889830,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,2.048082,2.261207,4.742618,0.606933,0.569379,0.790878,0.979145,3.171058,-2.618901,1.233982,1.020501,1.020501,1.020501,1.020501,,0.209907,-0.137490,1.020501,-0.220204,0.571264,0.606933,0.400149,2.261207,-0.633772,1.307818,1.020501,1.020501,1.020501,1.020501,,0.441506,0.441506,1.020501,-0.137490,0.818824,-0.267469,-0.385631,1.020501,-1.460909,0.978515,1.020501,1.020501,1.020501,1.020501,,-0.096133,-0.220204,1.020501,-0.964627,0.822991,-0.468345,-0.964627,1.020501,-1.460909,1.313039,1.020501,1.020501,1.020501,1.020501,,0.196629,0.255796,0.521505,-0.054198,0.146281,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.132644,-0.275622,10.707018,-0.353121,0.498883,-0.187126,-0.275622,3.178595,-0.349430,0.341657,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.177129,-0.260861,2.799011,-0.354351,0.237668,-0.240465,-0.290384,0.329604,-0.354351,0.131072,-0.214905,-0.275622,2.588131,-0.34943,0.190420,0.049133,0.049133,0.049133,0.049133,,-0.096483,-0.221338,0.226273,-0.305145,0.225105,-0.185576,-0.231337,0.049133,-0.328764,0.173853,0.049133,0.049133,0.049133,0.049133,,-0.057531,-0.057531,0.049133,-0.164196,0.150846,-0.097475,-0.237664,0.324683,-0.305145,0.239107,0.049133,0.049133,0.049133,0.049133,,-0.191270,-0.239773,0.049133,-0.334669,0.179434,-0.108324,-0.068960,0.049133,-0.305145,0.180390,0.049133,0.049133,0.049133,0.049133,,0.202338,0.258802,0.512372,-0.037030,0.139598,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.135602,-0.273669,10.760287,-0.332851,0.500612,-0.191517,-0.273669,3.181483,-0.332847,0.340144,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.179635,-0.269441,2.709957,-0.332852,0.232586,-0.224548,-0.273684,0.347532,-0.332852,0.131067,-0.209434,-0.272162,2.477550,-0.332847,0.179261,0.061581,0.061581,0.061581,0.061581,,-0.090536,-0.218324,0.229220,-0.303251,0.225953,-0.178766,-0.221908,0.061581,-0.332827,0.181160,0.061581,0.061581,0.061581,0.061581,,-0.049744,-0.049744,0.061581,-0.161069,0.157437,-0.100361,-0.248301,0.337677,-0.332803,0.251095,0.061581,0.061581,0.061581,0.061581,,-0.189854,-0.244081,0.061581,-0.332833,0.187343,-0.116828,-0.079262,0.061581,-0.332803,0.199857,0.061581,0.061581,0.061581,0.061581,
1,2.178493,2.178493,2.178493,2.178493,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.107058,-0.220204,2.261207,-2.618901,1.016086,0.176685,-0.220204,2.261207,-2.618901,1.052416,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,2.048082,2.261207,4.742618,0.606933,0.569379,0.790878,0.979145,3.171058,-2.618901,1.233982,0.888160,0.524219,2.261207,-0.302918,1.257955,0.389810,-0.220204,2.261207,-1.378196,1.299525,2.178493,2.178493,2.178493,2.178493,,1.792496,2.178493,2.178493,1.020501,0.668567,0.896431,0.896431,2.178493,-0.385631,1.813110,0.979145,0.979145,2.178493,-0.220204,1.696135,0.094108,-0.137490,2.178493,-1.460909,1.314862,0.379470,-0.137490,2.178493,-0.385631,1.212586,0.648290,0.648290,2.178493,-0.881914,2.164034,2.178493,2.178493,2.178493,2.178493,,0.242664,-0.220204,4.329049,-2.618901,1.028299,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.107058,-0.220204,2.261207,-2.618901,1.016086,0.176685,-0.220204,2.261207,-2.618901,1.052416,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,2.048082,2.261207,4.742618,0.606933,0.569379,0.790878,0.979145,3.171058,-2.618901,1.233982,0.888160,0.524219,2.261207,-0.302918,1.257955,0.389810,-0.220204,2.261207,-1.378196,1.299525,2.178493,2.178493,2.178493,2.178493,,1.792496,2.178493,2.178493,1.020501,0.668567,0.896431,0.896431,2.178493,-0.385631,1.813110,0.979145,0.979145,2.178493,-0.220204,1.696135,0.094108,-0.137490,2.178493,-1.460909,1.314862,0.379470,-0.137490,2.178493,-0.385631,1.212586,0.648290,0.648290,2.178493,-0.881914,2.164034,2.178493,2.178493,2.178493,2.178493,,0.241578,0.226273,17.349745,-0.155793,0.857872,-0.000641,-0.216576,17.349745,-0.354351,0.663338,0.233995,-0.068960,58.977496,-0.349430,1.502638,0.650141,0.226273,58.977496,-0.336514,3.175402,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.177129,-0.260861,2.799011,-0.354351,0.237668,-0.240465,-0.290384,0.329604,-0.354351,0.131072,-0.214905,-0.275622,2.588131,-0.34943,0.190420,-0.117083,-0.216576,0.317113,-0.349430,0.264296,-0.095196,-0.152991,0.324683,-0.334669,0.258744,-0.063956,-0.063956,-0.063956,-0.063956,,0.416978,-0.063956,1.604023,-0.289133,1.034158,-0.198258,-0.198258,-0.063956,-0.332560,0.189932,0.450199,0.450199,0.964353,-0.063956,0.727124,-0.220628,-0.246099,-0.063956,-0.335621,0.103618,-0.157531,-0.122450,-0.047872,-0.337353,0.133748,-0.053038,-0.053038,-0.042120,-0.063956,0.015440,-0.063956,-0.063956,-0.063956,-0.063956,,0.243790,0.229220,16.529612,-0.134480,0.816636,-0.000649,-0.214506,17.416177,-0.332852,0.664832,0.236176,-0.066597,58.239854,-0.332847,1.490582,0.653640,0.229220,58.239854,-0.332834,3.139052,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.179635,-0.269441,2.709957,-0.332852,0.232586,-0.224548,-0.273684,0.347532,-0.332852,0.131067,-0.209434,-0.272162,2.477550,-0.332847,0.179261,-0.107796,-0.214506,0.327061,-0.332847,0.265361,-0.091351,-0.144042,0.337677,-0.332833,0.259193,-0.047058,-0.047058,-0.047058,-0.047058,,0.439838,-0.047058,1.639253,-0.272681,1.044832,-0.189944,-0.189944,-0.047058,-0.332830,0.202072,0.460853,0.460853,0.968764,-0.047058,0.718294,-0.220685,-0.273640,-0.047058,-0.332834,0.112151,-0.151823,-0.113700,-0.047058,-0.332835,0.135883,-0.048754,-0.048754,-0.047058,-0.050451,0.002399,-0.047058,-0.047058,-0.047058,-0.047058,
2,0.441506,0.441506,0.441506,0.441506,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,0.102029,-0.220204,4.659904,-2.536187,1.200040,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.088309,-0.220204,4.659904,-2.536187,0.935875,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.285441,-0.220204,2.261207,-2.536187,0.652257,0.507677,0.193365,2.261207,-0.220204,1.015393,-0.137490,-0.178847,0.441506,-0.633772,0.536046,0.731004,0.731004,1.020501,0.441506,0.409412,0.441506,0.441506,0.441506,0.441506,,0.524219,0.524219,0.606933,0.441506,0.116975,0.027937,-0.054777,0.441506,-0.302918,0.379042,-0.165061,0.110651,0.606933,-1.460909,0.881072,0.276078,0.441506,0.606933,-0.220204,0.437680,0.999823,0.937788,2.261207,-0.137490,1.062208,0.469077,0.441506,0.772360,0.193365,0.290481,-0.074289,-0.220204,4.659904,-2.536187,0.931173,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,0.102029,-0.220204,4.659904,-2.536187,1.200040,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.088309,-0.220204,4.659904,-2.536187,0.935875,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.285441,-0.220204,2.261207,-2.536187,0.652257,0.507677,0.193365,2.261207,-0.220204,1.015393,-0.137490,-0.178847,0.441506,-0.633772,0.536046,0.731004,0.731004,1.020501,0.441506,0.409412,0.441506,0.441506,0.441506,0.441506,,0.524219,0.524219,0.606933,0.441506,0.116975,0.027937,-0.054777,0.441506,-0.302918,0.379042,-0.165061,0.110651,0.606933,-1.460909,0.881072,0.276078,0.441506,0.606933,-0.220204,0.437680,0.999823,0.937788,2.261207,-0.137490,1.062208,0.469077,0.441506,0.772360,0.193365,0.290481,-0.266549,-0.275622,0.964353,-0.334337,0.062541,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.132644,-0.275622,10.707018,-0.353121,0.498883,-0.111421,-0.265781,2.588131,-0.353121,0.468439,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.169354,-0.246099,1.407202,-0.353121,0.215317,-0.161751,-0.239884,1.407202,-0.349430,0.220400,-0.161751,-0.239884,1.407202,-0.34943,0.220400,0.041880,-0.278479,0.595313,-0.334669,0.472690,-0.081343,-0.258374,0.521505,-0.330127,0.404383,0.654877,0.654877,1.604023,-0.294268,1.342295,-0.294268,-0.294268,-0.294268,-0.294268,,-0.318159,-0.318159,-0.294268,-0.342049,0.033786,0.444917,0.466149,1.162872,-0.294268,0.728802,-0.234796,-0.299707,-0.068960,-0.342049,0.126654,-0.067794,-0.076340,0.167226,-0.294268,0.230866,-0.183976,-0.238718,0.035800,-0.294268,0.154702,-0.242473,-0.294268,-0.111136,-0.322016,0.114585,-0.267605,-0.273669,0.555034,-0.312909,0.041797,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.135602,-0.273669,10.760287,-0.332851,0.500612,-0.114987,-0.273669,2.625311,-0.332851,0.467144,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.170296,-0.244087,1.361445,-0.332851,0.213742,-0.164163,-0.244087,1.361445,-0.332847,0.219328,-0.164163,-0.244087,1.361445,-0.332847,0.219328,0.049697,-0.275579,0.613768,-0.332833,0.477773,-0.079008,-0.256232,0.529259,-0.332828,0.407847,0.676561,0.676561,1.639253,-0.286131,1.361452,-0.286131,-0.286131,-0.286131,-0.286131,,-0.309485,-0.309485,-0.286131,-0.332840,0.033028,0.450648,0.471419,1.166656,-0.286131,0.726616,-0.236299,-0.309467,-0.066597,-0.332840,0.132686,-0.060893,-0.066604,0.170057,-0.286131,0.228147,-0.173399,-0.223393,0.039324,-0.286131,0.150573,-0.234159,-0.286131,-0.104635,-0.311711,0.112898
3,-0.220204,-0.220204,-0.220204,-0.220204,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.088309,-0.220204,4.659904,-2.536187,0.935875,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.220204,-0.220204,-0.220204,-0.220204,0.000000,-0.137490,-0.220204,0.027937,-0.220204,0.143264,-0.220204,-0.220204,-0.220204,-0.220204,,0.606933,-0.220204,2.261207,-0.220204,1.432643,0.016121,-0.220204,1.020501,-0.220204,0.468943,-0.085794,-0.220204,2.261207,-1.543623,1.053643,0.087018,-0.220204,2.261207,-1.047341,1.029461,0.992930,1.020501,2.178493,-0.220204,1.199586,0.400149,-0.220204,2.261207,-0.220204,1.240705,0.069294,0.069294,0.358792,-0.220204,0.409412,-0.145838,-0.220204,4.659904,-2.618901,0.965364,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.088309,-0.220204,4.659904,-2.536187,0.935875,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.220204,-0.220204,-0.220204,-0.220204,0.000000,-0.137490,-0.220204,0.027937,-0.220204,0.143264,-0.220204,-0.220204,-0.220204,-0.220204,,0.606933,-0.220204,2.261207,-0.220204,1.432643,0.016121,-0.220204,1.020501,-0.220204,0.468943,-0.085794,-0.220204,2.261207,-1.543623,1.053643,0.087018,-0.220204,2.261207,-1.047341,1.029461,0.992930,1.020501,2.178493,-0.220204,1.199586,0.400149,-0.220204,2.261207,-0.220204,1.240705,0.069294,0.069294,0.358792,-0.220204,0.409412,-0.294839,-0.305145,1.407202,-0.344289,0.066360,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.132644,-0.275622,10.707018,-0.353121,0.498883,-0.187126,-0.275622,3.178595,-0.349430,0.341657,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.169354,-0.246099,1.407202,-0.353121,0.215317,-0.161751,-0.239884,1.407202,-0.349430,0.220400,-0.161751,-0.239884,1.407202,-0.34943,0.220400,-0.216576,-0.260861,-0.009913,-0.305145,0.108876,-0.306935,-0.305145,-0.305145,-0.310513,0.003099,-0.305145,-0.305145,-0.305145,-0.305145,,-0.216576,-0.246099,-0.098483,-0.305145,0.106448,-0.089244,-0.275622,0.324683,-0.338886,0.289535,-0.219875,-0.253480,-0.068960,-0.333651,0.102046,-0.172689,-0.290384,0.192532,-0.319907,0.203877,0.336582,-0.289133,1.604023,-0.305145,1.097666,-0.279313,-0.283003,-0.246099,-0.305145,0.025210,-0.310731,-0.310731,-0.305145,-0.316316,0.007899,-0.298082,-0.303251,0.555477,-0.322881,0.033279,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.135602,-0.273669,10.760287,-0.332851,0.500612,-0.191517,-0.273669,3.181483,-0.332847,0.340144,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.170296,-0.244087,1.361445,-0.332851,0.213742,-0.164163,-0.244087,1.361445,-0.332847,0.219328,-0.164163,-0.244087,1.361445,-0.332847,0.219328,-0.214506,-0.258878,-0.007434,-0.303251,0.109092,-0.304148,-0.303251,-0.303251,-0.305943,0.001554,-0.303251,-0.303251,-0.303251,-0.303251,,-0.209580,-0.244087,-0.081403,-0.303251,0.114879,-0.084912,-0.273669,0.337677,-0.332837,0.291868,-0.220314,-0.244095,-0.066597,-0.332832,0.097213,-0.169793,-0.273684,0.199634,-0.332818,0.206235,0.354441,-0.272681,1.639253,-0.303251,1.112785,-0.273673,-0.273677,-0.244087,-0.303251,0.024153,-0.306052,-0.306052,-0.303251,-0.308853,0.003961
4,-0.302918,-0.302918,-0.302918,-0.302918,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.015340,-0.220204,4.659904,-2.536187,1.007883,-0.035978,-0.220204,3.998195,-2.039905,1.003031,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,-0.427167,-0.220204,1.103215,-2.536187,0.596541,-0.427784,-0.220204,1.020501,-2.536187,0.595302,-0.261561,-0.261561,-0.220204,-0.302918,0.058487,0.234721,0.234721,0.772360,-0.302918,0.760336,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-1.005984,-1.005984,-0.302918,-1.709050,0.994286,-0.261561,-0.261561,-0.220204,-0.302918,0.058487,-0.054777,-0.054777,0.193365,-0.302918,0.350924,-0.323596,-0.261561,-0.220204,-0.551059,0.156575,0.255400,-0.261561,2.013066,-0.468345,1.176310,-0.145838,-0.220204,4.659904,-2.618901,0.965364,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.015340,-0.220204,4.659904,-2.536187,1.007883,-0.035978,-0.220204,3.998195,-2.039905,1.003031,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,-0.427167,-0.220204,1.103215,-2.536187,0.596541,-0.427784,-0.220204,1.020501,-2.536187,0.595302,-0.261561,-0.261561,-0.220204,-0.302918,0.058487,0.234721,0.234721,0.772360,-0.302918,0.760336,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-1.005984,-1.005984,-0.302918,-1.709050,0.994286,-0.261561,-0.261561,-0.220204,-0.302918,0.058487,-0.054777,-0.054777,0.193365,-0.302918,0.350924,-0.323596,-0.261561,-0.220204,-0.551059,0.156575,0.255400,-0.261561,2.013066,-0.468345,1.176310,-0.294839,-0.305145,1.407202,-0.344289,0.066360,-0.000641,-0.216576,17.349745,-0.354351,0.663338,0.062893,-0.216576,17.349745,-0.349430,0.739416,-0.118436,-0.246099,3.267165,-0.349180,0.411444,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.177129,-0.260861,2.799011,-0.354351,0.237668,-0.160608,-0.246099,2.799011,-0.344938,0.253790,-0.211674,-0.253480,0.742929,-0.34451,0.142472,-0.318889,-0.318889,-0.303109,-0.334669,0.022316,-0.302019,-0.302019,-0.300928,-0.303109,0.001543,-0.303109,-0.303109,-0.303109,-0.303109,,-0.303109,-0.303109,-0.303109,-0.303109,,-0.303109,-0.303109,-0.303109,-0.303109,,-0.296747,-0.296747,-0.290384,-0.303109,0.008998,-0.259843,-0.259843,-0.216576,-0.303109,0.061188,0.931631,0.931631,2.166371,-0.303109,1.746186,0.201801,-0.148445,1.407202,-0.303109,0.809507,-0.306536,-0.304127,-0.286499,-0.331388,0.018554,-0.298082,-0.303251,0.555477,-0.322881,0.033279,-0.000649,-0.214506,17.416177,-0.332852,0.664832,0.064095,-0.214506,17.416177,-0.332847,0.744667,-0.118431,-0.244087,3.261363,-0.332847,0.411999,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.179635,-0.269441,2.709957,-0.332852,0.232586,-0.166615,-0.267092,2.709957,-0.332843,0.251191,-0.217667,-0.269425,0.665632,-0.332842,0.140352,-0.317531,-0.317531,-0.302230,-0.332833,0.021639,-0.296406,-0.296406,-0.290581,-0.302230,0.008237,-0.302230,-0.302230,-0.302230,-0.302230,,-0.302230,-0.302230,-0.302230,-0.302230,,-0.302230,-0.302230,-0.302230,-0.302230,,-0.317509,-0.317509,-0.302230,-0.332788,0.021608,-0.258368,-0.258368,-0.214506,-0.302230,0.062030,0.937577,0.937577,2.177384,-0.302230,1.753352,0.203308,-0.148513,1.412490,-0.302230,0.812130,-0.302220,-0.302740,-0.270569,-0.332829,0.025428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19586,-0.302918,-0.302918,-0.302918,-0.302918,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.156565,-0.220204,4.659904,-2.536187,1.082846,-0.047148,-0.220204,4.659904,-2.536187,0.881864,0.211079,-0.220204,4.659904,-2.536187,1.014902,0.200858,-0.220204,3.584626,-2.536187,0.993485,0.200858,-0.220204,3.584626,-2.536187,0.993485,-0.302918,-0.302918,-0.302918,-0.302918,,0.590390,-0.220204,2.261207,-0.964627,1.515490,-0.247775,-0.220204,-0.220204,-0.302918,0.047755,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-0.220204,-0.220204,-0.137490,-0.302918,0.116975,-0.302918,-0.302918,2.261207,-1.709050,1.572648,0.413934,-0.137490,1.682211,-0.302918,1.101470,-0.826771,-0.633772,-0.302918,-1.543623,0.642475,0.083215,-0.220204,4.659904,-2.536187,0.975628,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.156565,-0.220204,4.659904,-2.536187,1.082846,-0.047148,-0.220204,4.659904,-2.536187,0.881864,0.211079,-0.220204,4.659904,-2.536187,1.014902,0.200858,-0.220204,3.584626,-2.536187,0.993485,0.200858,-0.220204,3.584626,-2.536187,0.993485,-0.302918,-0.302918,-0.302918,-0.302918,,0.590390,-0.220204,2.261207,-0.964627,1.515490,-0.247775,-0.220204,-0.220204,-0.302918,0.047755,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-0.302918,-0.302918,-0.302918,-0.302918,,-0.220204,-0.220204,-0.137490,-0.302918,0.116975,-0.302918,-0.302918,2.261207,-1.709050,1.572648,0.413934,-0.137490,1.682211,-0.302918,1.101470,-0.826771,-0.633772,-0.302918,-1.543623,0.642475,-0.210986,-0.216576,1.850050,-0.314434,0.118223,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.143427,-0.253480,9.476884,-0.350133,0.447636,-0.083593,-0.231337,3.042334,-0.349430,0.459714,-0.053585,-0.187052,2.625035,-0.349430,0.335958,-0.124736,-0.209195,1.407202,-0.349430,0.224995,-0.124345,-0.208806,1.407202,-0.349430,0.225102,-0.124345,-0.208806,1.407202,-0.34943,0.225102,-0.211485,-0.211485,-0.211485,-0.211485,,-0.266090,-0.275622,-0.211485,-0.349430,0.056817,-0.224720,-0.216576,-0.211485,-0.246099,0.018689,-0.211485,-0.211485,-0.211485,-0.211485,,-0.211485,-0.211485,-0.211485,-0.211485,,-0.211485,-0.211485,-0.211485,-0.211485,,-0.244982,-0.244982,-0.211485,-0.278479,0.047372,-0.047988,-0.246099,0.857459,-0.349430,0.508782,-0.142583,-0.211485,0.064371,-0.280636,0.182532,-0.235686,-0.237664,-0.211485,-0.257908,0.023274,-0.210024,-0.214506,1.442485,-0.292967,0.094789,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.143601,-0.258883,9.527715,-0.332848,0.447000,-0.083550,-0.237746,3.046316,-0.332847,0.455785,-0.053366,-0.184924,2.551468,-0.332847,0.334947,-0.122183,-0.199730,1.353385,-0.332847,0.223518,-0.121836,-0.199730,1.353385,-0.332847,0.223654,-0.121836,-0.199730,1.353385,-0.332847,0.223654,-0.210424,-0.210424,-0.210424,-0.210424,,-0.260995,-0.273669,-0.197473,-0.332847,0.056546,-0.223006,-0.214506,-0.210424,-0.244087,0.018371,-0.210424,-0.210424,-0.210424,-0.210424,,-0.210424,-0.210424,-0.210424,-0.210424,,-0.210424,-0.210424,-0.210424,-0.210424,,-0.243001,-0.243001,-0.210424,-0.275579,0.046071,-0.057812,-0.273640,0.860638,-0.332847,0.515917,-0.136114,-0.210424,0.067951,-0.265868,0.178886,-0.247221,-0.261831,-0.210424,-0.269409,0.032092
19587,-0.220204,-0.220204,-0.220204,-0.220204,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.017138,-0.220204,4.659904,-2.536187,0.925905,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.047148,-0.220204,4.659904,-2.536187,0.881864,-0.225204,-0.220204,4.659904,-2.536187,0.744234,-0.225204,-0.220204,4.659904,-2.536187,0.744234,-0.259872,-0.220204,3.419199,-2.536187,0.742253,-0.137490,-0.220204,0.193365,-0.385631,0.298228,0.193365,-0.220204,2.261207,-0.302918,1.014382,-0.096133,-0.220204,0.606933,-0.633772,0.392968,-0.468345,-0.220204,-0.220204,-0.964627,0.429793,0.510434,-0.220204,2.261207,-0.633772,1.303014,0.027937,-0.220204,2.261207,-0.633772,0.997149,-0.302918,-0.302918,-0.220204,-0.385631,0.116975,-0.220204,-0.220204,-0.220204,-0.220204,0.000000,0.379470,-0.220204,2.178493,-0.220204,1.199349,-0.178847,-0.178847,-0.137490,-0.220204,0.058487,0.049235,-0.220204,3.419199,-2.122619,0.895374,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.017138,-0.220204,4.659904,-2.536187,0.925905,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.047148,-0.220204,4.659904,-2.536187,0.881864,-0.225204,-0.220204,4.659904,-2.536187,0.744234,-0.225204,-0.220204,4.659904,-2.536187,0.744234,-0.259872,-0.220204,3.419199,-2.536187,0.742253,-0.137490,-0.220204,0.193365,-0.385631,0.298228,0.193365,-0.220204,2.261207,-0.302918,1.014382,-0.096133,-0.220204,0.606933,-0.633772,0.392968,-0.468345,-0.220204,-0.220204,-0.964627,0.429793,0.510434,-0.220204,2.261207,-0.633772,1.303014,0.027937,-0.220204,2.261207,-0.633772,0.997149,-0.302918,-0.302918,-0.220204,-0.385631,0.116975,-0.220204,-0.220204,-0.220204,-0.220204,0.000000,0.379470,-0.220204,2.178493,-0.220204,1.199349,-0.178847,-0.178847,-0.137490,-0.220204,0.058487,-0.036354,-0.039436,1.027617,-0.232534,0.148217,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.141905,-0.256835,5.540454,-0.349430,0.395621,-0.183626,-0.242027,1.850050,-0.349430,0.213956,-0.053585,-0.187052,2.625035,-0.349430,0.335958,-0.118067,-0.199149,1.850050,-0.349430,0.228613,-0.118067,-0.199149,1.850050,-0.349430,0.228613,-0.105126,-0.187052,1.850050,-0.34943,0.235016,-0.066148,-0.047872,-0.039436,-0.111136,0.039188,0.009049,-0.157529,0.949083,-0.290384,0.472000,0.080528,-0.173488,1.850050,-0.334669,0.730408,-0.163153,-0.128006,-0.039436,-0.322016,0.144531,-0.244691,-0.308301,-0.039436,-0.349430,0.129415,-0.180513,-0.265781,0.093927,-0.328764,0.161091,-0.075286,-0.075286,-0.039436,-0.111136,0.050699,0.388650,0.137703,1.407202,-0.128006,0.705475,-0.257108,-0.319907,-0.039436,-0.349180,0.146266,-0.116101,-0.116101,-0.039436,-0.192767,0.108421,-0.034207,-0.037015,0.935053,-0.212924,0.135023,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.141473,-0.254531,5.583484,-0.332847,0.395369,-0.183670,-0.241026,1.442485,-0.332847,0.197070,-0.053366,-0.184924,2.551468,-0.332847,0.334947,-0.119858,-0.206020,1.505495,-0.332847,0.225916,-0.119858,-0.206020,1.505495,-0.332847,0.225916,-0.107786,-0.184924,1.442485,-0.332847,0.231289,-0.063076,-0.047578,-0.037015,-0.104635,0.036376,0.014018,-0.155342,0.952444,-0.273684,0.470583,0.084062,-0.168537,1.863604,-0.332833,0.734629,-0.165199,-0.125760,-0.037015,-0.332820,0.151795,-0.238821,-0.302235,-0.037015,-0.332847,0.127359,-0.177719,-0.267092,0.095594,-0.332827,0.160318,-0.073991,-0.073991,-0.037015,-0.110967,0.052292,0.391920,0.140475,1.412490,-0.125760,0.706873,-0.251486,-0.318042,-0.037015,-0.332847,0.143660,-0.113356,-0.113356,-0.037015,-0.189696,0.107962
19588,-0.220204,-0.220204,-0.220204,-0.220204,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.089972,-0.220204,2.261207,-2.618901,1.104731,-0.020491,-0.220204,4.742618,-2.618901,1.037749,0.115076,-0.220204,2.261207,-2.122619,1.061762,0.105572,-0.220204,2.261207,-2.122619,1.054277,0.105572,-0.220204,2.261207,-2.122619,1.054277,-0.085794,-0.220204,0.937788,-0.551059,0.437540,-0.009284,-0.220204,2.261207,-2.122619,1.175422,-0.006527,-0.220204,1.847638,-1.709050,0.854112,-0.132625,-0.220204,2.261207,-2.370760,1.373978,0.193365,-0.220204,2.261207,-0.220204,0.884247,-0.385631,-0.220204,-0.220204,-1.709050,0.496282,0.288803,-0.220204,2.261207,-1.543623,1.066144,0.193365,0.193365,0.606933,-0.220204,0.584874,0.097591,-0.220204,2.261207,-0.964627,0.987290,-0.633772,-0.220204,-0.220204,-1.460909,0.716322,-0.074289,-0.220204,4.659904,-2.536187,0.931173,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.089972,-0.220204,2.261207,-2.618901,1.104731,-0.020491,-0.220204,4.742618,-2.618901,1.037749,0.115076,-0.220204,2.261207,-2.122619,1.061762,0.105572,-0.220204,2.261207,-2.122619,1.054277,0.105572,-0.220204,2.261207,-2.122619,1.054277,-0.085794,-0.220204,0.937788,-0.551059,0.437540,-0.009284,-0.220204,2.261207,-2.122619,1.175422,-0.006527,-0.220204,1.847638,-1.709050,0.854112,-0.132625,-0.220204,2.261207,-2.370760,1.373978,0.193365,-0.220204,2.261207,-0.220204,0.884247,-0.385631,-0.220204,-0.220204,-1.709050,0.496282,0.288803,-0.220204,2.261207,-1.543623,1.066144,0.193365,0.193365,0.606933,-0.220204,0.584874,0.097591,-0.220204,2.261207,-0.964627,0.987290,-0.633772,-0.220204,-0.220204,-1.460909,0.716322,-0.266549,-0.275622,0.964353,-0.334337,0.062541,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.143427,-0.253480,9.476884,-0.350133,0.447636,-0.029176,-0.268371,9.476884,-0.349430,0.923141,-0.170765,-0.275622,2.912886,-0.349430,0.272097,-0.033110,-0.214074,2.588131,-0.349430,0.464349,-0.033271,-0.206383,2.588131,-0.349430,0.457915,-0.033271,-0.206383,2.588131,-0.34943,0.457915,0.365733,-0.277051,2.588131,-0.330127,1.216394,-0.186855,-0.239845,0.399340,-0.334669,0.199187,-0.055266,-0.283003,2.056713,-0.342049,0.674820,-0.005540,-0.275622,2.056713,-0.338886,0.657716,-0.189820,-0.305145,0.300081,-0.334669,0.236776,0.286959,-0.157529,2.588131,-0.334669,1.021874,-0.123835,-0.300928,1.111970,-0.342049,0.404122,-0.198124,-0.198124,-0.120625,-0.275622,0.109599,0.165495,-0.272568,2.588131,-0.334669,0.872230,-0.236258,-0.275622,-0.128006,-0.305145,0.094904,-0.267605,-0.273669,0.555034,-0.312909,0.041797,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.143601,-0.258883,9.527715,-0.332848,0.447000,-0.030915,-0.273160,9.527715,-0.332847,0.925431,-0.173118,-0.273669,2.862054,-0.332847,0.269945,-0.031519,-0.206052,2.625311,-0.332847,0.467148,-0.031761,-0.197473,2.625311,-0.332847,0.460757,-0.031761,-0.197473,2.625311,-0.332847,0.460757,0.373608,-0.274624,2.625311,-0.332828,1.225976,-0.192370,-0.258392,0.401611,-0.332833,0.203314,-0.054074,-0.283086,2.063288,-0.332840,0.676692,-0.018294,-0.303266,2.063288,-0.332837,0.665222,-0.184927,-0.303251,0.317951,-0.332833,0.241357,0.288385,-0.155342,2.625311,-0.332833,1.035455,-0.119947,-0.302230,1.116672,-0.332840,0.405992,-0.192323,-0.192323,-0.110977,-0.273669,0.115041,0.169461,-0.271628,2.595759,-0.332833,0.872874,-0.244078,-0.273669,-0.125760,-0.332803,0.106646
19589,-0.220204,-0.220204,-0.220204,-0.220204,0.0,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.141818,-0.220204,3.088344,-2.453474,1.024294,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.088309,-0.220204,4.659904,-2.536187,0.935875,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.285441,-0.220204,2.261207,-2.536187,0.652257,0.524219,-0.220204,2.261207,-1.460909,1.664581,0.400149,0.400149,1.020501,-0.220204,0.877311,-0.799200,-0.799200,-0.220204,-1.378196,0.818824,-0.220204,-0.220204,-0.220204,-0.220204,,-0.275346,-0.220204,-0.220204,-0.385631,0.095510,0.122467,-0.220204,2.178493,-0.220204,0.906622,0.055508,-0.220204,1.020501,-0.633772,0.860911,-0.385631,-0.385631,-0.220204,-0.551059,0.233950,-0.220204,-0.220204,-0.220204,-0.220204,,-0.484888,-0.220204,-0.220204,-1.543623,0.591851,0.094585,-0.220204,4.577190,-2.288046,0.958105,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.141818,-0.220204,3.088344,-2.453474,1.024294,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.088309,-0.220204,4.659904,-2.536187,0.935875,-0.285441,-0.220204,2.261207,-2.536187,0.652257,-0.285441,-0.220204,2.261207,-2.536187,0.652257,0.524219,-0.220204,2.261207,-1.460909,1.664581,0.400149,0.400149,1.020501,-0.220204,0.877311,-0.799200,-0.799200,-0.220204,-1.378196,0.818824,-0.220204,-0.220204,-0.220204,-0.220204,,-0.275346,-0.220204,-0.220204,-0.385631,0.095510,0.122467,-0.220204,2.178493,-0.220204,0.906622,0.055508,-0.220204,1.020501,-0.633772,0.860911,-0.385631,-0.385631,-0.220204,-0.551059,0.233950,-0.220204,-0.220204,-0.220204,-0.220204,,-0.484888,-0.220204,-0.220204,-1.543623,0.591851,-0.128432,-0.128006,1.052923,-0.283674,0.102653,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.122974,-0.255157,4.696933,-0.349430,0.435576,-0.178605,-0.272568,2.292899,-0.349430,0.289099,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.169354,-0.246099,1.407202,-0.353121,0.215317,-0.161751,-0.239884,1.407202,-0.349430,0.220400,-0.161751,-0.239884,1.407202,-0.34943,0.220400,-0.098483,-0.128006,0.019610,-0.260861,0.114818,-0.206735,-0.206735,-0.128006,-0.285463,0.111339,-0.218421,-0.218421,-0.128006,-0.308836,0.127866,-0.128006,-0.128006,-0.128006,-0.128006,,0.719732,-0.128006,2.588131,-0.300928,1.620389,0.017609,-0.128006,1.111970,-0.319156,0.508313,0.303689,-0.128006,1.265490,-0.226417,0.834396,0.179717,0.179717,0.487440,-0.128006,0.435186,-0.128006,-0.128006,-0.128006,-0.128006,,-0.180304,-0.157529,-0.009913,-0.305145,0.124922,-0.126134,-0.125760,0.909748,-0.262259,0.090012,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.125558,-0.258893,4.611640,-0.332847,0.436518,-0.182705,-0.272634,2.299942,-0.332847,0.287048,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.170296,-0.244087,1.361445,-0.332851,0.213742,-0.164163,-0.244087,1.361445,-0.332847,0.219328,-0.164163,-0.244087,1.361445,-0.332847,0.219328,-0.096179,-0.125760,0.022148,-0.244102,0.116461,-0.199720,-0.199720,-0.125760,-0.273679,0.104594,-0.229284,-0.229284,-0.125760,-0.332807,0.146404,-0.125760,-0.125760,-0.125760,-0.125760,,0.722954,-0.125760,2.595759,-0.301136,1.624265,0.022218,-0.125760,1.116672,-0.303251,0.507744,0.308104,-0.125760,1.264587,-0.214516,0.829527,0.180299,0.180299,0.486358,-0.125760,0.432833,-0.125760,-0.125760,-0.125760,-0.125760,,-0.184917,-0.155342,-0.007434,-0.332799,0.133928


In [None]:
#データのマージ
full_merge_df_fin = pd.merge(full_merge_df_fin, agg_df, left_index=True, right_index=True)
full_merge_df_fin.drop(['state'], axis=1, inplace=True)

# データセットの整理

In [None]:
train_df = pd.DataFrame(full_merge_df_fin.loc[:len(train)-1,])
test_df = pd.DataFrame(full_merge_df_fin.loc[len(train):19591,])

In [None]:
test_df = test_df.reset_index()
test_df.drop(['index'], axis=1, inplace=True)
test_df.head()

Unnamed: 0,goal,country,duration,category1,category2,goal_min,goal_max,goal_per_day_max,goal_per_day_min,goal_1-1000,goal_100000+,goal_10001-11000,goal_1001-2000,goal_11001-12000,goal_12001-13000,goal_13001-14000,goal_14001-15000,goal_15001-16000,goal_16001-17000,goal_17001-18000,goal_18001-19000,goal_19001-20000,goal_20001-21000,goal_2001-3000,goal_21001-22000,goal_22001-23000,goal_23001-24000,goal_24001-25000,goal_25001-26000,goal_26001-27000,goal_27001-28000,goal_28001-29000,goal_29001-30000,goal_30001-31000,goal_3001-4000,goal_31001-32000,goal_32001-33000,goal_33001-34000,goal_34001-35000,goal_35001-36000,goal_36001-37000,goal_37001-38000,goal_38001-39000,goal_39001-40000,goal_40001-41000,goal_4001-5000,goal_41001-42000,goal_42001-43000,goal_43001-44000,goal_44001-45000,goal_45001-46000,goal_46001-47000,goal_47001-48000,goal_48001-49000,goal_49001-50000,goal_50001-51000,goal_5001-6000,goal_51001-52000,goal_52001-53000,goal_53001-54000,goal_54001-55000,goal_55001-56000,goal_56001-57000,goal_57001-58000,goal_58001-59000,goal_59001-60000,goal_60001-61000,goal_6001-7000,goal_61001-62000,goal_62001-63000,goal_63001-64000,goal_64001-65000,goal_65001-66000,goal_66001-67000,goal_67001-68000,goal_68001-69000,goal_69001-70000,goal_70001-71000,goal_7001-8000,goal_71001-72000,goal_72001-73000,goal_73001-74000,goal_74001-75000,goal_75001-76000,goal_76001-77000,goal_77001-78000,goal_78001-79000,goal_79001-80000,goal_80001-81000,goal_8001-9000,goal_81001-82000,goal_82001-83000,goal_83001-84000,goal_84001-85000,goal_85001-86000,goal_86001-87000,goal_87001-88000,goal_88001-89000,goal_89001-90000,goal_90001-91000,goal_9001-10000,goal_91001-92000,goal_92001-93000,goal_93001-94000,goal_94001-95000,goal_95001-96000,goal_96001-97000,goal_97001-98000,goal_98001-99000,goal_99001-100000,country_AT,country_AU,country_BE,country_CA,country_CH,country_DE,country_DK,country_ES,country_FR,country_GB,country_HK,country_IE,country_IT,country_JP,country_LU,country_MX,country_NL,country_NO,country_NZ,country_SE,country_SG,country_US,category1_art,category1_comics,category1_crafts,category1_dance,category1_design,category1_fashion,category1_film & video,category1_food,category1_games,category1_journalism,category1_music,category1_photography,category1_publishing,category1_technology,category1_theater,category2_3d printing,category2_academic,category2_accessories,category2_action,category2_animals,category2_animation,category2_anthologies,category2_apparel,category2_apps,category2_architecture,category2_art books,category2_audio,category2_bacon,category2_blues,category2_calendars,category2_camera equipment,category2_candles,category2_ceramics,category2_children's books,category2_childrenswear,category2_chiptune,category2_civic design,category2_classical music,category2_comedy,category2_comic books,category2_community gardens,category2_conceptual art,category2_cookbooks,category2_country & folk,category2_couture,category2_crochet,category2_digital art,category2_diy,category2_diy electronics,category2_documentary,category2_drama,category2_drinks,category2_electronic music,category2_embroidery,category2_events,category2_experimental,category2_fabrication tools,category2_faith,category2_family,category2_fantasy,category2_farmer's markets,category2_farms,category2_festivals,category2_fiction,category2_fine art,category2_flight,category2_food trucks,category2_footwear,category2_gadgets,category2_gaming hardware,category2_glass,category2_graphic design,category2_graphic novels,category2_hardware,category2_hip-hop,category2_horror,category2_illustration,category2_immersive,category2_indie rock,category2_installations,category2_interactive design,category2_jazz,category2_jewelry,category2_kids,category2_knitting,category2_latin,category2_letterpress,category2_literary journals,category2_literary spaces,category2_live games,category2_makerspaces,category2_metal,category2_mixed media,category2_mobile games,category2_movie theaters,category2_music videos,category2_musical,category2_narrative film,category2_nature,category2_nonfiction,category2_painting,category2_people,category2_performance art,category2_performances,category2_periodicals,category2_pet fashion,category2_photo,category2_photobooks,category2_places,category2_playing cards,category2_plays,category2_poetry,category2_pop,category2_pottery,category2_print,category2_printing,category2_product design,category2_public art,...,mean_agg_func_state_x,median_agg_func_state_x,max_agg_func_state_x,min_agg_func_state_x,std_agg_func_state_x,mean_agg_func_state_y,median_agg_func_state_y,max_agg_func_state_y,min_agg_func_state_y,std_agg_func_state_y,mean_agg_func_state_x.1,median_agg_func_state_x.1,max_agg_func_state_x.1,min_agg_func_state_x.1,std_agg_func_state_x.1,mean_agg_func_state_y.1,median_agg_func_state_y.1,max_agg_func_state_y.1,min_agg_func_state_y.1,std_agg_func_state_y.1,mean_agg_func_state_x.2,median_agg_func_state_x.2,max_agg_func_state_x.2,min_agg_func_state_x.2,std_agg_func_state_x.2,mean_agg_func_state_y.2,median_agg_func_state_y.2,max_agg_func_state_y.2,min_agg_func_state_y.2,std_agg_func_state_y.2,mean_agg_func_state_x.3,median_agg_func_state_x.3,max_agg_func_state_x.3,min_agg_func_state_x.3,std_agg_func_state_x.3,mean_agg_func_state_y.3,median_agg_func_state_y.3,max_agg_func_state_y.3,min_agg_func_state_y.3,std_agg_func_state_y.3,mean_agg_func_state_x.4,median_agg_func_state_x.4,max_agg_func_state_x.4,min_agg_func_state_x.4,std_agg_func_state_x.4,mean_agg_func_state_y.4,median_agg_func_state_y.4,max_agg_func_state_y.4,min_agg_func_state_y.4,std_agg_func_state_y.4,mean_agg_func_state_x.5,median_agg_func_state_x.5,max_agg_func_state_x.5,min_agg_func_state_x.5,std_agg_func_state_x.5,mean_agg_func_state_y.5,median_agg_func_state_y.5,max_agg_func_state_y.5,min_agg_func_state_y.5,std_agg_func_state_y.5,mean_agg_func_state_x.6,median_agg_func_state_x.6,max_agg_func_state_x.6,min_agg_func_state_x.6,std_agg_func_state_x.6,mean_agg_func_state_y.6,median_agg_func_state_y.6,max_agg_func_state_y.6,min_agg_func_state_y.6,std_agg_func_state_y.6,mean_agg_func_state_x.7,median_agg_func_state_x.7,max_agg_func_state_x.7,min_agg_func_state_x.7,std_agg_func_state_x.7,mean_agg_func_state_y.7,median_agg_func_state_y.7,max_agg_func_state_y.7,min_agg_func_state_y.7,std_agg_func_state_y.7,mean_agg_func_state_x.8,median_agg_func_state_x.8,max_agg_func_state_x.8,min_agg_func_state_x.8,std_agg_func_state_x.8,mean_agg_func_state_y.8,median_agg_func_state_y.8,max_agg_func_state_y.8,min_agg_func_state_y.8,std_agg_func_state_y.8,mean_agg_func_state_x.9,median_agg_func_state_x.9,max_agg_func_state_x.9,min_agg_func_state_x.9,std_agg_func_state_x.9,mean_agg_func_state_y.9,median_agg_func_state_y.9,max_agg_func_state_y.9,min_agg_func_state_y.9,std_agg_func_state_y.9,mean_agg_func_state_x.10,median_agg_func_state_x.10,max_agg_func_state_x.10,min_agg_func_state_x.10,std_agg_func_state_x.10,mean_agg_func_state_y.10,median_agg_func_state_y.10,max_agg_func_state_y.10,min_agg_func_state_y.10,std_agg_func_state_y.10,mean_agg_func_state_x.11,median_agg_func_state_x.11,max_agg_func_state_x.11,min_agg_func_state_x.11,std_agg_func_state_x.11,mean_agg_func_state_y.11,median_agg_func_state_y.11,max_agg_func_state_y.11,min_agg_func_state_y.11,std_agg_func_state_y.11,mean_agg_func_state_x.12,median_agg_func_state_x.12,max_agg_func_state_x.12,min_agg_func_state_x.12,std_agg_func_state_x.12,mean_agg_func_state_y.12,median_agg_func_state_y.12,max_agg_func_state_y.12,min_agg_func_state_y.12,std_agg_func_state_y.12,mean_agg_func_state_x.13,median_agg_func_state_x.13,max_agg_func_state_x.13,min_agg_func_state_x.13,std_agg_func_state_x.13,mean_agg_func_state_y.13,median_agg_func_state_y.13,max_agg_func_state_y.13,min_agg_func_state_y.13,std_agg_func_state_y.13,mean_agg_func_state_x.14,median_agg_func_state_x.14,max_agg_func_state_x.14,min_agg_func_state_x.14,std_agg_func_state_x.14,mean_agg_func_state_y.14,median_agg_func_state_y.14,max_agg_func_state_y.14,min_agg_func_state_y.14,std_agg_func_state_y.14,mean_agg_func_state_x.15,median_agg_func_state_x.15,max_agg_func_state_x.15,min_agg_func_state_x.15,std_agg_func_state_x.15,mean_agg_func_state_y.15,median_agg_func_state_y.15,max_agg_func_state_y.15,min_agg_func_state_y.15,std_agg_func_state_y.15,mean_agg_func_state_x.16,median_agg_func_state_x.16,max_agg_func_state_x.16,min_agg_func_state_x.16,std_agg_func_state_x.16,mean_agg_func_state_y.16,median_agg_func_state_y.16,max_agg_func_state_y.16,min_agg_func_state_y.16,std_agg_func_state_y.16,mean_agg_func_state_x.17,median_agg_func_state_x.17,max_agg_func_state_x.17,min_agg_func_state_x.17,std_agg_func_state_x.17,mean_agg_func_state_y.17,median_agg_func_state_y.17,max_agg_func_state_y.17,min_agg_func_state_y.17,std_agg_func_state_y.17,mean_agg_func_state_x.18,median_agg_func_state_x.18,max_agg_func_state_x.18,min_agg_func_state_x.18,std_agg_func_state_x.18,mean_agg_func_state_y.18,median_agg_func_state_y.18,max_agg_func_state_y.18,min_agg_func_state_y.18,std_agg_func_state_y.18,mean_agg_func_state_x.19,median_agg_func_state_x.19,max_agg_func_state_x.19,min_agg_func_state_x.19,std_agg_func_state_x.19,mean_agg_func_state_y.19,median_agg_func_state_y.19,max_agg_func_state_y.19,min_agg_func_state_y.19,std_agg_func_state_y.19,mean_agg_func_state_x.20,median_agg_func_state_x.20,max_agg_func_state_x.20,min_agg_func_state_x.20,std_agg_func_state_x.20,mean_agg_func_state_y.20,median_agg_func_state_y.20,max_agg_func_state_y.20,min_agg_func_state_y.20,std_agg_func_state_y.20,mean_agg_func_state_x.21,median_agg_func_state_x.21,max_agg_func_state_x.21,min_agg_func_state_x.21,std_agg_func_state_x.21,mean_agg_func_state_y.21,median_agg_func_state_y.21,max_agg_func_state_y.21,min_agg_func_state_y.21,std_agg_func_state_y.21,mean_agg_func_state_x.22,median_agg_func_state_x.22,max_agg_func_state_x.22,min_agg_func_state_x.22,std_agg_func_state_x.22,mean_agg_func_state_y.22,median_agg_func_state_y.22,max_agg_func_state_y.22,min_agg_func_state_y.22,std_agg_func_state_y.22,mean_agg_func_state_x.23,median_agg_func_state_x.23,max_agg_func_state_x.23,min_agg_func_state_x.23,std_agg_func_state_x.23,mean_agg_func_state_y.23,median_agg_func_state_y.23,max_agg_func_state_y.23,min_agg_func_state_y.23,std_agg_func_state_y.23,mean_agg_func_state_x.24,median_agg_func_state_x.24,max_agg_func_state_x.24,min_agg_func_state_x.24,std_agg_func_state_x.24,mean_agg_func_state_y.24,median_agg_func_state_y.24,max_agg_func_state_y.24,min_agg_func_state_y.24,std_agg_func_state_y.24
0,47,8,-0.220204,3,88,-0.314798,-0.315146,-0.187052,-0.184924,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.29915,-0.220204,3.088344,-2.536187,1.115818,0.321821,-0.220204,3.088344,-2.536187,1.145955,0.317954,-0.220204,2.261207,-1.957192,1.0814,0.317954,-0.220204,2.261207,-1.957192,1.0814,-0.633772,-0.633772,-0.220204,-1.047341,0.584874,0.37947,-0.220204,2.178493,-0.220204,1.199349,0.276078,-0.220204,2.178493,-1.543623,1.220319,0.958466,0.896431,2.261207,-0.220204,1.166576,0.88816,0.606933,2.178493,-0.220204,1.189468,0.606933,-0.220204,2.261207,-0.220204,1.432643,-0.220204,-0.220204,-0.220204,-0.220204,,-0.413203,-0.220204,0.027937,-1.047341,0.56302,0.259535,-0.220204,1.020501,-0.302918,0.695484,0.346976,-0.220204,2.261207,-1.047341,1.313411,-0.174965,-0.187052,4.949989,-0.275622,0.224284,0.074193,-0.187052,3.267165,-0.34943,0.649347,-0.159056,-0.237664,2.689935,-0.34943,0.299519,-0.186524,-0.246099,1.620991,-0.34943,0.228696,-0.094796,-0.246099,3.267165,-0.34943,0.391222,-0.103516,-0.249908,3.267165,-0.34943,0.393253,-0.07807,-0.246099,3.267165,-0.34943,0.433627,-0.07807,-0.246099,3.267165,-0.34943,0.433627,-0.231337,-0.231337,-0.187052,-0.275622,0.062628,0.26718,-0.238093,1.879573,-0.334669,1.0767,0.379024,-0.111136,2.588131,-0.34918,1.043723,-0.201855,-0.179672,-0.1598,-0.288275,0.05868,-0.157225,-0.246099,0.338257,-0.34918,0.285174,-0.280543,-0.305145,-0.187052,-0.34943,0.083938,-0.187052,-0.187052,-0.187052,-0.187052,,-0.189736,-0.187052,-0.098483,-0.283674,0.092625,0.1996,0.137703,0.949083,-0.305145,0.502833,-0.258134,-0.246099,-0.187052,-0.34918,0.061807,-0.174829,-0.184924,4.105285,-0.258893,0.187311,0.078461,-0.184924,3.261363,-0.332847,0.655767,-0.161297,-0.24408,2.696746,-0.332847,0.299845,-0.188704,-0.244087,1.625683,-0.332847,0.228872,-0.091671,-0.244087,3.261363,-0.332847,0.391581,-0.10048,-0.249636,3.261363,-0.332847,0.393469,-0.073847,-0.244087,3.261363,-0.332847,0.435466,-0.073847,-0.244087,3.261363,-0.332847,0.435466,-0.236685,-0.236685,-0.184924,-0.288445,0.073201,0.27384,-0.228802,1.885797,-0.332833,1.076353,0.386439,-0.14263,2.625311,-0.332847,1.056673,-0.19188,-0.17014,-0.145115,-0.282125,0.062486,-0.147801,-0.244087,0.355693,-0.332847,0.288374,-0.273674,-0.303251,-0.184924,-0.332847,0.078271,-0.184924,-0.184924,-0.184924,-0.184924,,-0.191643,-0.184924,-0.110955,-0.27905,0.084249,0.206231,0.140475,0.952444,-0.2934,0.502003,-0.254217,-0.244087,-0.184924,-0.332847,0.060913
1,58,9,-0.7992,12,18,-0.268232,-0.268351,-0.094632,-0.101314,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.047148,-0.220204,4.659904,-2.536187,0.881864,0.211079,-0.220204,4.659904,-2.536187,1.014902,0.200858,-0.220204,3.584626,-2.536187,0.993485,0.200858,-0.220204,3.584626,-2.536187,0.993485,-0.7992,-0.7992,-0.7992,-0.7992,,-0.716486,-0.7992,-0.220204,-1.130055,0.46053,-0.413203,-0.220204,-0.220204,-0.7992,0.334283,0.689647,0.689647,2.178493,-0.7992,2.105547,-0.364953,-0.220204,-0.220204,-0.7992,0.289498,-0.551059,-0.551059,-0.302918,-0.7992,0.350924,-0.075455,-0.509702,2.178493,-1.460909,1.585825,-0.7992,-0.7992,-0.7992,-0.7992,,-0.336003,-0.220204,-0.13749,-0.7992,0.265458,-0.509702,-0.509702,-0.220204,-0.7992,0.409412,-0.153734,-0.157529,1.185778,-0.295304,0.098587,0.008182,-0.260861,88.205491,-0.34943,2.449515,-0.141905,-0.256835,5.540454,-0.34943,0.395621,-0.183626,-0.242027,1.85005,-0.34943,0.213956,-0.053585,-0.187052,2.625035,-0.34943,0.335958,-0.124736,-0.209195,1.407202,-0.34943,0.224995,-0.124345,-0.208806,1.407202,-0.34943,0.225102,-0.124345,-0.208806,1.407202,-0.34943,0.225102,-0.094632,-0.094632,-0.094632,-0.094632,,-0.17023,-0.098483,-0.094632,-0.317576,0.12762,0.227556,-0.094632,1.052923,-0.275622,0.720494,0.521179,0.521179,1.136989,-0.094632,0.870888,-0.134424,-0.199889,0.196749,-0.334669,0.245291,-0.168329,-0.168329,-0.094632,-0.242027,0.104224,0.194084,0.006774,1.11197,-0.34918,0.639892,-0.094632,-0.094632,-0.094632,-0.094632,,0.119754,-0.094632,1.285037,-0.335621,0.670212,-0.067034,-0.067034,-0.039436,-0.094632,0.039029,-0.152082,-0.155342,0.998538,-0.273689,0.084684,0.005496,-0.272681,88.412333,-0.332847,2.445671,-0.141473,-0.254531,5.583484,-0.332847,0.395369,-0.18367,-0.241026,1.442485,-0.332847,0.19707,-0.053366,-0.184924,2.551468,-0.332847,0.334947,-0.122183,-0.19973,1.353385,-0.332847,0.223518,-0.121836,-0.19973,1.353385,-0.332847,0.223654,-0.121836,-0.19973,1.353385,-0.332847,0.223654,-0.101314,-0.101314,-0.101314,-0.101314,,-0.17677,-0.101314,-0.096179,-0.332815,0.135164,0.227508,-0.101314,1.057509,-0.273669,0.723949,0.53499,0.53499,1.171294,-0.101314,0.89987,-0.13444,-0.202283,0.199639,-0.332833,0.245331,-0.17117,-0.17117,-0.101314,-0.241026,0.098791,0.190963,-0.009987,1.116672,-0.332847,0.639989,-0.101314,-0.101314,-0.101314,-0.101314,,0.120679,-0.101314,1.289063,-0.332834,0.671667,-0.069165,-0.069165,-0.037015,-0.101314,0.045466
2,58,9,-0.220204,14,95,-0.268232,-0.268351,-0.157529,-0.155342,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,...,-0.047148,-0.220204,4.659904,-2.536187,0.881864,-0.225204,-0.220204,4.659904,-2.536187,0.744234,-0.225204,-0.220204,4.659904,-2.536187,0.744234,-0.184549,-0.220204,4.659904,-2.536187,0.744767,-0.220204,-0.220204,-0.220204,-0.220204,,0.965359,0.855074,2.261207,-0.220204,1.244376,-0.344274,-0.344274,-0.220204,-0.468345,0.175462,-0.220204,-0.220204,-0.220204,-0.220204,,-0.034098,-0.220204,2.261207,-1.295482,1.033803,-0.220204,-0.220204,-0.220204,-0.220204,,-0.495916,-0.220204,-0.220204,-1.047341,0.477548,-0.220204,-0.220204,-0.220204,-0.220204,,-0.220204,-0.220204,-0.220204,-0.220204,,-0.220204,-0.220204,-0.220204,-0.220204,,-0.153734,-0.157529,1.185778,-0.295304,0.098587,0.008182,-0.260861,88.205491,-0.34943,2.449515,-0.122974,-0.255157,4.696933,-0.34943,0.435576,-0.178605,-0.272568,2.292899,-0.34943,0.289099,-0.053585,-0.187052,2.625035,-0.34943,0.335958,-0.118067,-0.199149,1.85005,-0.34943,0.228613,-0.118067,-0.199149,1.85005,-0.34943,0.228613,-0.133243,-0.216576,1.53373,-0.34943,0.219985,-0.157529,-0.157529,-0.157529,-0.157529,,-0.214859,-0.157529,-0.137618,-0.34943,0.116966,-0.146048,-0.146048,-0.134567,-0.157529,0.016237,-0.157529,-0.157529,-0.157529,-0.157529,,0.055041,-0.157529,1.29649,-0.305145,0.543758,-0.157529,-0.157529,-0.157529,-0.157529,,0.073736,0.078657,0.300081,-0.157529,0.228845,-0.157529,-0.157529,-0.157529,-0.157529,,-0.157529,-0.157529,-0.157529,-0.157529,,-0.157529,-0.157529,-0.157529,-0.157529,,-0.152082,-0.155342,0.998538,-0.273689,0.084684,0.005496,-0.272681,88.412333,-0.332847,2.445671,-0.125558,-0.258893,4.61164,-0.332847,0.436518,-0.182705,-0.272634,2.299942,-0.332847,0.287048,-0.053366,-0.184924,2.551468,-0.332847,0.334947,-0.119858,-0.20602,1.505495,-0.332847,0.225916,-0.119858,-0.20602,1.505495,-0.332847,0.225916,-0.134016,-0.214506,1.505495,-0.332847,0.218683,-0.155342,-0.155342,-0.155342,-0.155342,,-0.204882,-0.155342,-0.126457,-0.332847,0.111758,-0.14548,-0.14548,-0.135618,-0.155342,0.013947,-0.155342,-0.155342,-0.155342,-0.155342,,0.056507,-0.155342,1.29417,-0.288475,0.542574,-0.155342,-0.155342,-0.155342,-0.155342,,0.071456,0.081312,0.288399,-0.155342,0.222034,-0.155342,-0.155342,-0.155342,-0.155342,,-0.155342,-0.155342,-0.155342,-0.155342,,-0.155342,-0.155342,-0.155342,-0.155342,
3,3,3,-1.543623,0,31,-0.501058,-0.502326,-0.237664,-0.269409,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.059072,-0.220204,4.742618,-2.618901,0.992623,-0.052729,-0.220204,4.742618,-2.618901,1.024506,-0.427167,-0.220204,1.103215,-2.536187,0.596541,-0.498896,-0.220204,1.103215,-2.453474,0.630735,-0.837375,-0.633772,0.606933,-2.536187,0.973199,-1.58498,-1.58498,-1.543623,-1.626337,0.058487,-0.040991,-0.220204,2.178493,-1.543623,1.209337,0.011394,-0.220204,2.261207,-1.543623,1.382086,-0.170576,-0.385631,2.261207,-1.543623,1.455855,-0.881914,-0.881914,-0.220204,-1.543623,0.935799,-1.212768,-1.543623,-0.220204,-1.874478,0.875359,-0.740119,-0.468345,-0.220204,-1.626337,0.623953,-0.367251,-0.964627,2.261207,-1.874478,1.604293,-1.543623,-1.543623,-1.543623,-1.543623,,-0.294839,-0.305145,1.407202,-0.344289,0.06636,-0.058953,-0.242027,3.853412,-0.34943,0.50206,-0.132644,-0.275622,10.707018,-0.353121,0.498883,-0.134178,-0.275622,5.146811,-0.34943,0.54822,-0.163605,-0.246099,2.799011,-0.354351,0.246785,-0.177129,-0.260861,2.799011,-0.354351,0.237668,-0.160608,-0.246099,2.799011,-0.344938,0.25379,-0.259316,-0.303109,0.875784,-0.344938,0.113551,-0.017447,-0.237664,1.513485,-0.340254,0.519858,-0.266863,-0.266863,-0.237664,-0.296061,0.041293,-0.130687,-0.128006,-0.009913,-0.237664,0.094708,-0.291649,-0.305145,-0.237664,-0.334669,0.036696,0.325386,-0.237664,2.588131,-0.33256,1.269308,-0.286166,-0.286166,-0.237664,-0.334669,0.068593,2.65983,-0.237664,8.492776,-0.275622,5.051515,-0.241594,-0.296061,0.078657,-0.334669,0.146668,0.788483,-0.237664,8.492776,-0.322016,2.901384,-0.237664,-0.237664,-0.237664,-0.237664,,-0.298082,-0.303251,0.555477,-0.322881,0.033279,-0.058638,-0.244087,3.8931,-0.332847,0.505488,-0.135602,-0.273669,10.760287,-0.332851,0.500612,-0.137779,-0.297329,5.090554,-0.332847,0.548592,-0.165326,-0.249636,2.709957,-0.332852,0.243766,-0.179635,-0.269441,2.709957,-0.332852,0.232586,-0.166615,-0.267092,2.709957,-0.332843,0.251191,-0.267679,-0.305943,0.820914,-0.332843,0.107693,-0.064175,-0.303251,1.513073,-0.332838,0.528998,-0.301102,-0.301102,-0.269409,-0.332794,0.04482,-0.131654,-0.12576,-0.007434,-0.269409,0.105355,-0.293527,-0.303251,-0.269409,-0.318057,0.021017,0.327686,-0.269409,2.625311,-0.33283,1.289591,-0.301121,-0.301121,-0.269409,-0.332833,0.044847,2.666193,-0.269409,8.541657,-0.273669,5.088302,-0.251923,-0.332794,0.081312,-0.332833,0.151432,0.785629,-0.269409,8.541657,-0.33282,2.919237,-0.269409,-0.269409,-0.269409,-0.269409,
4,0,21,-0.220204,10,59,-0.547623,-0.549121,-0.334669,-0.332833,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.020491,-0.220204,4.742618,-2.618901,1.037749,-0.025554,-0.220204,4.742618,-2.618901,1.036467,-0.432277,-0.220204,0.937788,-2.618901,0.557513,-0.389085,-0.220204,0.937788,-2.618901,0.508175,0.122467,-0.220204,2.261207,-0.302918,0.943599,0.465138,-0.220204,2.178493,-0.220204,1.170444,-0.007512,-0.220204,2.178493,-1.047341,1.010617,-0.120948,-0.220204,2.261207,-1.543623,1.090232,-0.385631,-0.220204,-0.220204,-0.881914,0.330855,-0.40631,-0.261561,-0.220204,-1.460909,0.428084,0.827503,0.441506,2.261207,-0.220204,1.28495,-0.220204,-0.220204,-0.220204,-0.220204,0.0,0.234721,-0.220204,2.178493,-0.385631,0.978332,-0.985306,-0.840557,-0.13749,-2.122619,0.970198,-0.323382,-0.334669,0.521505,-0.354351,0.04346,-0.000641,-0.216576,17.349745,-0.354351,0.663338,-0.143427,-0.25348,9.476884,-0.350133,0.447636,-0.029176,-0.268371,9.476884,-0.34943,0.923141,-0.170765,-0.275622,2.912886,-0.34943,0.272097,-0.174007,-0.275622,2.912886,-0.34943,0.269344,-0.162935,-0.275622,2.292899,-0.344062,0.278372,-0.112338,-0.246099,1.976578,-0.344062,0.322856,-0.097537,-0.334669,1.162872,-0.34943,0.557512,0.108394,-0.334669,2.588131,-0.34918,1.09489,-0.096726,-0.319907,1.136989,-0.334669,0.546575,-0.272549,-0.328792,-0.054198,-0.334669,0.111954,-0.314539,-0.319907,-0.283674,-0.334669,0.024841,-0.079649,-0.275622,1.162872,-0.334669,0.511157,-0.247135,-0.275622,-0.131114,-0.334669,0.104725,-0.314986,-0.334669,-0.275622,-0.334669,0.03409,0.222097,-0.297765,2.799011,-0.34918,1.263713,0.362018,-0.191814,2.166371,-0.334669,1.208333,-0.332821,-0.332833,-0.331975,-0.332852,4.4e-05,-0.000649,-0.214506,17.416177,-0.332852,0.664832,-0.143601,-0.258883,9.527715,-0.332848,0.447,-0.030915,-0.27316,9.527715,-0.332847,0.925431,-0.173118,-0.273669,2.862054,-0.332847,0.269945,-0.176493,-0.273669,2.862054,-0.332847,0.267042,-0.168982,-0.273669,2.152181,-0.332842,0.276061,-0.116592,-0.244087,1.949221,-0.332842,0.321197,-0.093266,-0.332833,1.166656,-0.332847,0.557147,0.115258,-0.332833,2.595759,-0.332847,1.09519,-0.0946,-0.332818,1.156267,-0.332833,0.554219,-0.274606,-0.332815,-0.03703,-0.332833,0.111004,-0.31535,-0.318042,-0.292483,-0.332833,0.02066,-0.081384,-0.288445,1.166656,-0.332833,0.513359,-0.238126,-0.258893,-0.122653,-0.332833,0.106618,-0.313111,-0.332833,-0.273669,-0.332833,0.034158,0.227926,-0.293393,2.804946,-0.332847,1.263656,0.33381,-0.203995,2.076063,-0.332833,1.167834


# ハイパラの調整

In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9791 entries, 0 to 9790
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            9791 non-null   object
 1   goal          9791 non-null   object
 2   country       9791 non-null   object
 3   duration      9791 non-null   int64 
 4   category1     9791 non-null   object
 5   category2     9791 non-null   object
 6   html_content  9791 non-null   object
 7   state         9791 non-null   int64 
 8   cleaned_text  9791 non-null   object
dtypes: int64(2), object(7)
memory usage: 688.6+ KB


In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9791 entries, 0 to 9790
Columns: 1534 entries, goal to std_agg_func_state_y
dtypes: float64(1243), int32(4), int64(4), uint8(283)
memory usage: 95.9 MB


In [None]:
from sklearn.model_selection import train_test_split

def objective(trial):
    train_x, test_x, train_y, test_y = train_test_split(train_df, train['state'], test_size=0.25, random_state=0, stratify=train['state'])
    dtrain = lgb.Dataset(train_x, label=train_y)
 
    param = {
        'objective': 'binary',
        'metric': 'logloss',
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
    }
 
    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(test_x)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(test_y, pred_labels)
    return accuracy

In [None]:
import optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[32m[I 2022-09-12 14:16:25,201][0m A new study created in memory with name: no-name-8604d12c-3791-45e0-91c5-4a8d3fd132ca[0m
[32m[I 2022-09-12 14:16:30,948][0m Trial 0 finished with value: 0.7912581699346405 and parameters: {'max_depth': 3, 'lambda_l1': 9.644425933827309e-05, 'lambda_l2': 0.0005124688550371716, 'num_leaves': 122, 'feature_fraction': 0.8123492581591932, 'bagging_fraction': 0.5671002656745363, 'bagging_freq': 7, 'min_child_samples': 43}. Best is trial 0 with value: 0.7912581699346405.[0m
[32m[I 2022-09-12 14:16:50,821][0m Trial 1 finished with value: 0.8022875816993464 and parameters: {'max_depth': 8, 'lambda_l1': 3.109833389148149, 'lambda_l2': 6.330992829362742e-08, 'num_leaves': 252, 'feature_fraction': 0.44517196642004536, 'bagging_fraction': 0.9459149491560128, 'bagging_freq': 5, 'min_child_samples': 9}. Best is trial 1 with value: 0.8022875816993464.[0m
[32m[I 2022-09-12 14:17:10,025][0m Trial 2 finished with value: 0.7961601307189542 and parameters: {'ma

In [None]:
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

Number of finished trials: 100
Best trial: {'max_depth': 7, 'lambda_l1': 0.000574426100948834, 'lambda_l2': 2.304131739420853e-07, 'num_leaves': 172, 'feature_fraction': 0.4351792178840505, 'bagging_fraction': 0.7222477701017944, 'bagging_freq': 5, 'min_child_samples': 88}


Number of finished trials: 100
Best trial: {

'max_depth': 7, 

'lambda_l1': 0.000574426100948834, 

'lambda_l2': 2.304131739420853e-07, 

'num_leaves': 172, 

'feature_fraction': 0.4351792178840505, 

'bagging_fraction': 0.7222477701017944, 

'bagging_freq': 5, 

'min_child_samples': 88}

# lgbによる予測

In [None]:
params = study.best_trial.params

In [None]:
target = train['state']

In [None]:
# 3分割交差検証を指定し、インスタンス化
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import accuracy_score

kf = KFold(n_splits=5, shuffle=True, random_state=0)  # 5分割交差検証のためにインスタンス化
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

# スコアとモデルを格納するリスト
score_list = []
models = []

print('input size is {}行, {}カラム'.format(len(train_df), len(train_df.columns)))
for fold_, (train_index, valid_index) in enumerate(skf.split(train, target)):
    train_x = train_df.iloc[train_index]
    valid_x = train_df.iloc[valid_index]
    train_y = target[train_index]
    valid_y = target[valid_index]
    
    print(f'fold{fold_ + 1} start')

    gbm = lgb.LGBMClassifier(
        objective='binary',
        num_boost_round=50000, 
        learning_rate=0.005,
        max_depth=7,
        lambda_l1 = 0.000574426100948834,
        lambda_l2 = 2.304131739420853e-07,
        num_leaves = 172,
        feature_fraction = 0.4351792178840505,
        bagging_fraction = 0.7222477701017944,
        bagging_freq = 5,
        min_child_samples = 88
        )
    gbm.fit(train_x, train_y, eval_set = [(valid_x, valid_y)],
                early_stopping_rounds=100,
                verbose= 100) # 学習の状況を表示しない
    
    oof = gbm.predict(valid_x, num_iteration=gbm.best_iteration_)
    score_list.append(round(accuracy_score(valid_y, oof)*100,2))
    models.append(gbm)  # 学習が終わったモデルをリストに入れておく
    print(f'fold{fold_ + 1} end\n' )
print(score_list, '平均score', np.mean(score_list), "%")  

input size is 9791行, 1534カラム
fold1 start
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.586686
[200]	valid_0's binary_logloss: 0.53127
[300]	valid_0's binary_logloss: 0.497455
[400]	valid_0's binary_logloss: 0.476276
[500]	valid_0's binary_logloss: 0.46126
[600]	valid_0's binary_logloss: 0.450839
[700]	valid_0's binary_logloss: 0.443566
[800]	valid_0's binary_logloss: 0.437904
[900]	valid_0's binary_logloss: 0.433129
[1000]	valid_0's binary_logloss: 0.429885
[1100]	valid_0's binary_logloss: 0.427212
[1200]	valid_0's binary_logloss: 0.424508
[1300]	valid_0's binary_logloss: 0.422463
[1400]	valid_0's binary_logloss: 0.421092
[1500]	valid_0's binary_logloss: 0.419622
[1600]	valid_0's binary_logloss: 0.418491
[1700]	valid_0's binary_logloss: 0.417365
[1800]	valid_0's binary_logloss: 0.416485
[1900]	valid_0's binary_logloss: 0.415578
[2000]	valid_0's binary_logloss: 0.414675
[2100]	valid_0's binary_logloss: 0.414383
[2200]	valid_0's binary_l

In [None]:
# testの予測
test_pred = np.zeros((len(test), 5))  # 行:len(test), 列:3のall zeroの配列を用意

for fold_, gbm in enumerate(models):  # 学習ずみのmodelをgbmに入れる
    pred_ = gbm.predict(test_df, num_iteration=gbm.best_iteration_)  # testの予測
    test_pred[:, fold_] = pred_  # １回目は0列目、2回目は1列目、2回目は3列目に格納

pred = (np.mean(test_pred, axis=1) > 0.5).astype(int)  # 平均をとって、0と１に変換

# submission

In [None]:
sub = pd.DataFrame(test['id'])

In [None]:
sub['state'] = pred
sub.head()

Unnamed: 0,id,state
0,test_00000,1
1,test_00001,1
2,test_00002,1
3,test_00003,0
4,test_00004,0


In [None]:
sub.to_csv('sub/17_lgb_tuning_lr0.005.csv',index=False, header=None)