# ライブラリのインポート / データの読み込み

In [45]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [46]:
import numpy as np
import pandas as pd

pd.set_option('max_colwidth', 500)
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 500)

%matplotlib inline

from matplotlib import pyplot as plt
import matplotlib.ticker as mtick # For specifying the axes tick format 

import seaborn as sns
import re

import json, os, gc, math, time
import datetime
import collections
from tqdm import tqdm
import glob

from statistics import mean
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold

from sklearn import metrics

import lightgbm as lgb

import warnings
warnings.filterwarnings("ignore")

In [47]:
#ドライブへのアクセス
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/My Drive/00_datascience/19_ufj_bank"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/00_datascience/19_ufj_bank


In [48]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')
sub = pd.read_csv('input/sample_submit.csv')

In [49]:
train.head()

Unnamed: 0,id,goal,country,duration,category1,category2,html_content,state
0,train_00000,20001-21000,US,45,art,mixed media,"<div class=""contents""><div><p><a href=""http://dummy.com"">http://dummy.com<p>In its first year, The Shillito's Elves Display won an international \ndesign award for Shillito's department store. The elves display is arts\n and crafts at its finest. The mixed media exhibit displays the talents\n of local fine arts graduates, and the display, while ""folksy"", is as \ntechnologically advanced as Disney World's famous ""It's a Small World"" \nride. </p><p>The Shillito's Elves attracted close to 100...",1
1,train_00001,19001-20000,US,59,food,restaurants,"<div class=""contents""><div><p>Cultural Pretzel Sports Bar is a place where people can come and watch their favorite local and world wide sports teams, while enjoying their favorite ethnic foods on a soft pretzel. Our menu includes a variety of appetizers and soft pretzels. Our pretzels include - Mexican Taco, Mexican Steak Fajita, Greek Gyro, Italian Sausage and Peppers, and American Steak Philly Soft Pretzels. With more to be added to the menu as the business grows.</p><p>Cultural Pretze...",0
2,train_00002,2001-3000,US,38,art,performance art,"<div class=""contents""><div><p>I want to perform this piece guerilla style, off the back off a 24 Ft truck on Christmas Eve, amid last minute Christmas shopping in New York City.</p>\n<p>Our truck pulls in, the door rolls up and I perform. </p>\n<p>New York is a city where you just do it - no excuses. A place where the new becomes the normal real fast and where the artist is challenged to capture the attention and the hearts of the unshockable and unstoppable. In times of celebration and re...",0
3,train_00003,1001-2000,US,30,art,mixed media,"<div class=""contents""><div><div class=""template asset"" contenteditable=""false"" data-alt-text="""" data-caption="""" data-id=""_xxx_"">\n<figure>\n<img alt="""" class=""fit lazyload"" data-src=""http://dummy.com""/>\n</figure>\n</div>\n<h1 class=""page-anchor"" id=""_xxx_"">\n<figure>\n<img alt=""Canyon de Chelley, Dine' (Navajo) Reservation, Arizona. Photo by: Demian Dine' Yazhi'"" class=""fit lazyload"" data-src=""http://dummy.com""/>\n<figcaption class=""px2"">Canyon de Chelley, Dine' (Navajo) Reservation, Arizo...",1
4,train_00004,1001-2000,US,29,film & video,webseries,"<div class=""contents""><div><p>The story of the show, both on and off screen, is about daring to dream after something impossible.Even with an incredible amount of difficulty and opposition, the project continued to grow in size and depth. A year ago, we were finally able to release the pilot episode of Azusa Ghost Hunter's Society (check it out! <a href=""http://dummy.com"">\n<figure>\n<img alt="""" class=""fit lazyload"" data-src=""http://dummy.com""/>\n</figure>\n</div>\n<p>Which brings us to now...",1


# bertによる特徴抽出

In [50]:
def remove_html(text):
    html_pattern = re.compile('<.*?>')
    return html_pattern.sub(r'', text)

In [51]:
for i in range(len(train)):
  train.loc[i, 'cleaned_text'] = remove_html(train.loc[i, 'html_content'])

for i in range(len(test)):
  test.loc[i, 'cleaned_text'] = remove_html(test.loc[i, 'html_content'])

In [52]:
#すでに抽出済みの特徴量を使用
text_train_df = pd.read_csv('input/05_seq_train_df.csv')
text_train_df.drop(['Unnamed: 0'], axis=1, inplace=True)
text_test_df = pd.read_csv('input/05_seq_test_df.csv')
text_test_df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [53]:
merge_text = pd.concat([text_train_df, text_test_df], ignore_index=True)

# ラベルデータの前処理

In [54]:
# ワンホットエンコーディング
merge_df = pd.concat([train, test], ignore_index=True)
dummy_cols = ['goal',	'country', 'category1',	'category2']
from sklearn.preprocessing import OneHotEncoder

dummy_df = pd.get_dummies(merge_df[dummy_cols])

In [55]:
merge_df_new = pd.merge(merge_df, dummy_df, left_index=True, right_index=True)

In [56]:
#ラベルエンコーディング
from sklearn.preprocessing import LabelEncoder

le_cols = ['goal', 'country', 'category1', 'category2']

#カテゴリ変数をループしてlabel encoding
for c in le_cols:
  le = LabelEncoder()
  le.fit(merge_df_new[c])
  merge_df_new[c] = le.transform(merge_df_new[c])

# tfidfによる特徴抽出

In [57]:
#tfidfによる特徴抽出
from sklearn.feature_extraction.text import TfidfVectorizer
tv = TfidfVectorizer()
features = tv.fit_transform(merge_df_new["cleaned_text"].fillna(""))

In [58]:
from sklearn.decomposition import TruncatedSVD, NMF, LatentDirichletAllocation
from sklearn.pipeline import Pipeline



tfidf_svd = Pipeline(steps=[
    ("TfidfVectorizer", TfidfVectorizer()),
    ("TruncatedSVD", TruncatedSVD(n_components=500, random_state=42))
])

features_svd = tfidf_svd.fit_transform(merge_df_new["cleaned_text"].fillna(""))

In [59]:
svd_df_merge = pd.DataFrame(features_svd)
svd_df_merge.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499
0,0.294975,0.000612,0.023093,-0.043301,-0.00465,0.020541,-0.045337,0.023749,-0.013548,0.034168,-0.013013,0.0068,-0.034913,0.006792,-0.015054,0.005607,-0.006633,0.028449,0.020394,-0.021417,0.013802,0.013191,-0.021499,0.007816,-0.03327,0.021218,-0.028719,0.018365,0.040831,0.015447,-0.009399,-0.024765,-0.008271,0.021418,-0.004099,-0.001797,0.006575,-0.017908,-0.022702,-0.039143,0.027667,-0.042095,-0.018312,0.007398,-0.015835,0.025729,0.019487,-0.021705,-0.00419,-0.040916,-0.011882,-0.005396,0.044293,0.00076,-0.025608,-0.012093,-0.016194,-0.032678,0.000566,0.013225,0.023136,0.008637,0.019843,0.001818,0.028825,0.006231,-0.017139,-0.031669,-0.003159,-0.015533,-0.026399,0.001604,-0.009792,-0.020801,0.005663,-0.031367,-0.01407,0.006072,-0.016627,0.021234,-0.019897,-0.013858,-0.013586,0.004694,0.024429,0.002641,0.007775,0.010932,-0.033794,-0.00871,-0.018305,-0.010747,-0.014438,-0.011521,0.015122,0.008698,0.013868,0.007188,-0.011071,-0.010312,-0.004397,-0.017054,-0.010418,-0.003893,-0.010911,-0.005076,0.010106,-0.017227,-0.008574,0.014692,0.016253,-0.012105,-0.000688,-0.00348,-0.005211,-0.002206,0.005471,-0.006412,0.010589,-0.000371,0.015491,-0.016458,-0.009723,-0.028978,0.002319,-0.006822,-0.03718,-0.015665,-0.001428,0.006145,-0.001786,0.03406,0.014847,0.011332,0.010278,-0.006819,-0.00299,-0.006963,0.007242,-0.02888,-0.018141,-0.017128,-0.000992,-0.021067,0.015777,-0.016691,0.016007,-0.012234,-0.012678,-0.034903,-0.011944,-0.018759,-0.019489,-0.013064,-0.005295,0.002942,0.004412,-0.01015,-0.008466,-0.013912,-0.026573,-0.00369,0.001547,0.010385,-0.013997,-0.002729,0.017165,0.00435,-0.004193,-0.015273,0.018009,-0.006203,0.004206,-0.001148,0.015162,-0.013855,0.006764,0.000489,0.018619,-0.00767,-0.014569,0.001622,-0.005735,-0.014777,-0.007956,0.029095,-0.01024,-0.008644,0.012785,-0.00442,0.013195,0.001313,0.01344,-0.006807,-0.014144,-0.001044,0.005804,0.027092,0.000716,-0.009523,-0.01269,0.006916,0.002845,0.006958,-0.013795,-0.004799,0.027741,-0.031408,0.013027,-0.006342,-0.060845,0.016103,0.008326,0.004297,-0.014746,-0.041556,0.000378,-0.013922,0.00497,0.005157,-0.014959,0.004819,-0.020723,-0.003985,-0.012231,0.010716,0.016451,-0.013748,-0.007052,-0.029646,0.006565,-0.003556,0.003022,0.037371,-0.030016,0.003034,-0.001198,-0.009237,0.000968,-0.001527,0.004755,0.005212,-0.001713,-0.006552,0.001258,0.000342,6.8e-05,-0.003477,0.001752,-0.016281,0.014294,-0.004885,0.004468,0.013405,0.020139,0.003945,0.02102,0.007873,0.019889,-0.002567,0.013357,0.022455,-0.007579,-0.012372,-0.032077,-0.01353,-0.005403,0.000266,-0.001648,-0.001639,0.001863,-0.004335,-0.018393,-0.016931,-0.00434,0.002289,0.004362,0.002906,-0.012399,-7.2e-05,-0.003963,0.01955,0.018587,0.033418,0.013801,0.013333,0.014,0.022792,0.000807,-0.010167,0.052441,-0.006166,0.007954,-0.000552,-0.027045,-0.023912,0.013811,-0.010591,-0.009543,-0.000384,-0.003568,0.008797,0.003528,0.015528,0.00656,0.010887,-0.009435,0.014344,-0.020655,0.011157,-0.016251,0.016177,-0.027015,0.00643,-0.001237,-0.015847,-0.006423,-0.007497,0.008301,0.008716,0.006046,-0.00318,-0.029342,0.01883,-0.027225,-0.014363,0.003581,0.014993,0.018762,-0.013809,-0.002406,-0.009149,0.03002,-0.000566,-0.005241,0.010009,0.007114,0.005898,-0.004784,0.011938,0.000898,-0.008663,-0.008263,0.015382,-0.006097,-0.009161,0.019624,0.012683,0.011944,0.001941,0.008756,-0.000588,-0.005878,0.008793,-0.012284,-0.002123,0.015048,0.025482,-0.029234,-0.003705,-0.000455,-0.001011,0.018637,-0.001712,-0.000938,-0.02447,-0.004795,-0.025978,-0.01362,0.003612,0.008791,0.00405,-0.015871,-0.024235,-0.01486,-0.0071,-0.005585,-0.002856,-0.017646,-0.008371,-0.006473,-0.009879,-0.031699,-0.021614,-0.009414,0.002213,-0.00412,0.005586,0.006856,0.013171,-0.008106,0.007111,-0.003575,-0.015716,-0.00462,0.014409,-0.002802,0.013884,0.0097,0.014885,0.003749,0.023841,0.009653,0.0043,-0.003412,0.012885,-0.002451,-0.001222,-0.01816,-0.004087,0.008275,0.015173,0.010978,-0.015076,-0.00136,-0.005419,0.012152,0.024587,0.015563,0.003678,0.0095,0.013368,-0.013611,-0.023924,-0.003298,-0.018314,0.005287,0.007713,-0.000185,0.006374,-0.021484,0.022783,0.019443,-0.012504,-0.001829,0.01025,-0.002145,0.01432,-0.013286,-0.026239,-0.005647,0.005303,0.017564,0.01405,-0.004702,-0.021328,-0.0160364,0.02611,-0.021427,0.002637,-0.006516,0.011613,-0.009936,-0.01627,-0.018194,-0.012301,-0.014083,0.016163,-0.002235,0.010676,0.003555,0.002712,-0.014064,0.025697,-0.017604,-0.022284,0.001311,-0.004207,0.002415,-0.008744,0.007851,-0.005618,0.003442,-0.004662,-0.001124,-0.012321,0.001146,0.019111,-0.002842,-0.011646,-0.020073,0.021683,-0.014214,-0.016948,-0.002338,-0.000872,0.000387,-0.016319,0.00903,-0.011798,0.004316,-0.008867,-0.004078,0.006976,-0.011516,0.006324,-0.010501,0.005502,0.009052,0.005084
1,0.163203,-0.003916,0.027252,0.018163,-0.00254,-0.027746,0.007868,0.058626,-0.076373,0.005528,0.001529,0.063075,0.065003,-0.036947,0.008215,-0.003938,0.08456,-0.047096,-0.007873,-0.02466,-0.022861,0.038012,0.000873,-0.016923,0.020854,-0.002867,0.014033,-0.02027,-0.010988,-0.01371,0.023681,0.002177,-0.000968,-0.03104,-0.004757,-0.01855,0.003879,-0.013603,-0.001063,0.018527,-0.004305,0.007286,0.00377,0.014798,-0.016288,0.008545,0.024345,0.020833,0.004565,0.00035,-0.032647,-0.005683,0.02349,0.003013,-0.029089,0.02436,-0.002233,-0.004603,0.009921,0.008094,0.024851,0.008649,-0.002607,0.013226,0.019922,-0.012876,0.008,0.000682,-0.000246,0.016946,-0.000475,-0.014519,0.012636,0.012495,-0.021521,-0.008485,-0.003045,-0.00824,0.01308,-0.009576,0.000829,-0.017572,0.001136,0.007649,-0.001622,0.020451,-0.005796,-0.022923,0.02404,-0.007969,0.00523,-0.011854,0.001565,-0.01626,-0.008913,-0.011386,0.003382,0.028817,-0.02837,0.029832,0.010804,-0.013527,-0.002565,-0.006638,-0.01167,-0.01906,-0.005656,-0.01753,-0.002393,0.000225,0.009694,0.045953,-0.01934,-0.005644,-0.015246,0.000525,-0.040187,-0.005839,0.010005,0.024856,-0.016898,-0.012195,0.002411,0.011934,0.003365,0.010953,0.00403,0.021191,0.014574,0.005837,-0.014046,0.031644,-0.011582,-0.030638,-0.025228,-0.033206,0.028505,-0.021628,-0.006221,0.012131,0.013309,0.039541,0.018346,0.018283,0.002877,0.016564,-0.003646,-0.019043,0.023225,-0.022494,0.003549,0.014568,0.003112,-0.002357,-0.008913,0.01902,-0.020946,-0.024492,0.009354,0.002656,-0.015591,0.017118,0.021084,-0.004921,-0.005941,-0.002955,0.05288,-0.003666,-0.012794,9.3e-05,0.007546,-0.022651,0.003892,-0.011438,-0.004441,0.00048,0.03776,-0.009038,0.003876,0.013199,-0.018694,0.004078,0.042374,0.009532,-0.043667,-0.017096,0.005908,-0.010793,-0.033263,-0.004856,-0.015815,0.023328,-0.016211,-0.02785,-0.001642,0.049523,0.005503,-0.012195,-0.007389,-0.043093,0.009126,0.031461,0.030042,0.013987,0.046849,0.021333,0.014741,-0.045962,-0.023969,0.016307,-0.031548,0.02434,0.004228,-0.031803,0.025462,0.016845,-0.016227,-0.01154,0.016406,0.019283,0.006523,-0.008536,0.00195,0.036439,-0.012838,0.005726,0.003573,-0.007836,-0.02565,-0.031807,-0.019683,0.04071,0.034222,-0.027157,-0.014437,0.022464,-0.00402,-0.015753,0.032523,0.025401,-0.037151,0.020653,-0.015938,0.007957,0.037757,0.006351,0.008967,-0.038524,-0.006782,-0.026913,0.016541,0.004788,-0.012117,0.021256,3e-06,-0.013761,0.021737,0.019281,0.016999,-0.002109,0.00832,0.018685,0.008279,-0.023927,-0.018144,0.012725,0.002486,0.006239,0.005073,0.012673,-0.029764,-0.000522,-0.001175,0.004413,0.028199,0.005689,0.006939,-0.019899,0.02318,-0.010107,0.005676,-0.01094,0.007925,-0.010685,-0.00539,0.005124,0.021634,-0.029288,0.005428,0.002416,-0.015848,-0.024929,0.008211,0.0002,0.000293,-0.000218,-0.01967,-0.030645,-0.019649,0.006452,-0.009745,0.019987,0.01397,0.016201,-0.042355,-0.005079,-0.006166,-0.004707,-0.014276,0.016393,-0.010306,0.010931,0.007244,0.010991,0.028234,-0.017243,-0.005196,-0.01451,-0.027061,-0.02503,0.011892,0.00234,-0.001981,0.019402,0.032823,0.029139,0.015925,-0.002637,0.014175,-0.005639,-0.000723,0.00627,-0.022748,0.01237,0.001545,-0.01774,-0.012656,0.008712,0.000928,-0.030875,0.046253,0.002935,-0.015183,-0.024055,-0.029806,-0.017497,-0.012935,-0.012333,-0.016854,-0.017971,0.00601,-0.003935,0.008416,0.024333,0.003605,-0.018292,-0.024853,-0.009193,0.002504,0.003071,-0.002511,0.000973,-0.007891,0.015673,-0.010888,0.012386,-0.032803,0.004542,0.008547,-0.018531,-0.012993,-0.008504,-0.021,0.014683,0.032697,0.003251,-0.011389,-0.009819,0.005538,-0.001515,0.035216,-0.011462,-0.008868,-0.007126,0.012498,-0.002032,0.007925,-0.005448,-0.020716,0.019146,0.009727,0.012121,0.02454,0.013121,-0.025732,-0.002138,-0.00189,-0.034328,-0.010266,-0.003276,-0.006531,-0.008554,-0.002528,0.020719,0.00667,0.00424,0.000356,-0.01345,-0.021289,-0.020766,-0.001384,-0.015603,0.001364,0.032275,-0.001883,-0.023385,-0.003541,0.015579,0.010252,0.006481,0.011071,0.021234,-0.016587,-0.005293,-0.007954,0.007871,-0.005281,0.01404,-0.00139,0.00379,-0.014881,-0.026262,-0.036046,-0.007239,0.005636,0.009053,0.010804,-0.008195,-0.013957,-0.011335,-0.007369,0.022719,-0.017823,-0.037795,-0.003011,0.001076,4.714273e-07,0.015475,0.007725,0.002392,0.001555,0.017252,-0.001264,0.01878,-0.002006,0.005697,0.004452,-0.021312,-0.003056,-0.013419,0.022158,-0.017318,0.004456,-0.007307,0.01308,0.000356,0.017817,0.018172,-0.004561,-0.006131,0.019148,-0.002744,0.005585,-0.017044,0.01028,-0.017277,0.016206,0.001739,0.00452,-0.00361,0.01032,0.013295,0.001114,0.001249,0.000192,-0.026204,-0.016621,-0.000688,0.010627,-0.008542,-0.005051,0.033543,0.013254,-0.008066,-0.021986,0.006248,-0.017169,0.009594,0.007042,0.006819
2,0.416711,0.004678,-0.052108,-0.079082,-0.010637,0.082928,-0.018374,0.036624,-0.071621,-0.046461,-0.014937,0.041353,-0.02023,-0.010954,-0.087217,-0.007253,-0.03034,0.014084,-0.012513,-0.046767,-0.023714,0.035796,0.028166,-0.034384,-0.007149,-0.004731,0.046319,-0.009197,0.025746,-0.041982,0.013382,0.020958,0.029541,0.004973,-0.000511,-0.047432,-0.016302,-0.02589,-0.005019,0.002922,0.008939,-0.000865,-0.043802,0.011422,0.037997,-0.043122,-0.028676,-0.011596,0.021939,0.053449,-0.013177,0.027549,0.046695,-0.003099,-0.026838,0.043723,0.032587,-0.001128,-0.045294,-0.050798,-0.007419,0.016965,0.045479,-0.009131,-0.000773,0.007529,0.015699,0.010032,0.010353,-0.009434,0.01744,-0.00659,-0.015586,-0.004963,0.021632,0.019516,0.00119,0.019708,0.013269,0.016196,-0.003847,-0.000941,-0.049537,0.011238,0.066236,-0.001825,0.0161,-0.012459,-0.015586,0.014275,-0.032636,0.014214,0.005725,0.024562,-0.00203,-0.02557,0.020855,-0.024961,0.064168,0.055461,0.0062,0.018694,-0.025961,0.033112,0.008628,0.020298,-0.027144,0.040469,-0.044832,-0.018133,0.026143,-0.016903,-0.032076,-0.012957,0.04677,-0.02796,-0.035402,-0.019049,-0.028994,-0.033588,-0.014547,-0.016935,0.002285,-0.038101,-0.021981,0.007214,-0.012034,-0.044096,0.019688,-0.002578,0.010702,-0.032435,-0.004485,0.015117,0.003119,0.01512,0.007052,-0.046848,-0.010559,-0.021625,0.015734,0.005995,-0.009572,-0.01867,-0.004736,0.031105,-0.009167,-0.027622,-0.018912,-0.009616,-0.002921,-0.018524,0.001327,-0.012179,-0.000535,-0.000384,-0.024862,-0.020086,0.011901,0.011606,-0.053787,-0.006526,0.005081,-0.004517,0.018513,0.019313,-0.007772,0.008577,0.023937,-0.041133,0.010662,-0.012692,0.007205,0.000821,-0.036698,-0.006034,0.020031,-0.000474,0.009866,-0.005828,0.012019,-0.040584,0.030534,0.02771,0.003791,-0.01424,1e-05,-0.037155,0.007686,-0.032839,-0.011155,-0.011176,-0.011985,-0.002684,-0.014027,-0.019259,0.015369,0.011905,-0.006236,-0.001598,-0.002746,-0.034319,-0.013997,-0.014515,-0.022494,-0.023983,0.025608,-0.0109,0.005046,-0.01155,-0.012999,0.024968,-0.002846,0.011762,-0.029039,-0.043177,-0.017873,-0.014646,-0.001778,0.006254,0.025576,0.024578,0.009829,-0.035391,-0.008229,-0.001958,-0.031896,0.007234,-0.014329,-0.03672,-0.009861,-0.042158,0.020778,0.012274,0.020284,0.014637,-0.00182,0.027715,0.025725,0.013417,-0.00165,0.017763,-0.013467,-0.005221,-0.003739,0.026277,0.023894,-0.01814,0.022531,0.042347,0.005695,0.010298,0.044754,-0.024227,0.032113,0.000779,-0.006008,-0.008161,0.003728,-0.041648,0.008986,-0.00662,0.026379,-0.025085,-0.036784,0.020585,0.007251,0.005557,0.024249,-0.034646,-0.006971,-0.019999,-0.00112,0.006644,0.025517,0.016149,0.005249,-0.010356,0.00051,0.015814,0.016852,4e-06,-0.020663,0.018196,-0.005793,-0.009618,0.008372,0.010481,0.000549,0.032855,-0.019208,-0.046766,0.005028,0.00717,0.013061,0.019756,0.018297,-0.02169,0.009322,0.029644,0.002638,0.018161,-0.003457,0.033547,0.038493,0.005133,0.014484,0.026074,-0.041976,0.007695,-0.007938,0.007477,-0.008414,0.01528,0.005897,0.01853,0.005575,-0.013063,0.01618,-0.011104,0.022551,0.018623,-0.024943,-0.014583,-0.012556,-0.004766,0.00186,-0.023306,-0.037035,-0.032982,0.043201,0.037304,-0.033895,-0.022355,0.005235,-0.033008,-0.020016,0.007864,0.043307,0.015291,-0.009681,0.011159,-0.005273,-0.027743,-0.016334,-0.005559,0.014514,-0.025069,0.000105,0.017149,-0.019453,-0.012469,-0.019947,0.004873,-0.001885,0.002591,0.004008,0.005445,-0.020952,-0.010144,0.024594,0.045085,-0.00848,0.002506,0.011595,-0.008353,0.017114,-0.027139,-0.012849,0.009386,0.077026,0.004757,4.7e-05,0.036307,-0.011677,0.00736,-0.009034,0.023818,-0.003165,-0.003504,0.011629,-0.029717,-0.043607,2.1e-05,0.005406,-0.003784,0.015078,-0.005777,-0.003253,0.009667,-0.019872,-0.012448,0.02027,0.012987,0.007573,-0.012635,0.020261,0.001495,0.045272,-0.021419,0.01618,0.030131,-0.014402,-0.015198,-0.00334,-0.025329,0.000904,-0.029412,0.020132,0.021303,-0.015698,-0.007585,-0.018838,-0.001595,-0.008014,-0.001727,-0.010263,0.049696,-0.009258,-0.010381,-0.025934,0.001771,0.019181,0.002674,-0.011474,-0.00572,-0.015797,0.01034,0.039958,0.007382,-0.015279,0.001134,-0.001955,0.022133,-0.011363,-0.016983,0.007063,0.00236,-0.015427,-0.006122,0.006671,-0.036774,-0.005293,0.003517,0.030654,0.026613,-0.00755688,-0.000707,0.02102,-0.006722,-0.016838,0.007289,-0.029916,0.030361,-0.009817,0.003396,0.011102,0.008599,-0.002127,0.016893,0.009369,0.020381,-0.002918,0.013979,-0.013861,0.000155,-0.023244,-0.018794,0.025925,0.011097,0.000688,0.009036,-0.012488,-0.006586,-0.020339,-0.00303,0.016266,0.016593,0.002791,-0.038743,0.014228,-0.0079,-0.009394,-0.01368,-0.005283,-0.039301,0.014781,-0.008397,-0.011523,0.015543,-0.019541,-0.009885,-0.012131,0.022324,-0.011334,-0.006569,0.005135,-0.015688,0.013161,-0.003512
3,0.354831,0.040357,-0.074435,-0.139759,-0.016493,0.005695,-0.074026,-0.054699,-0.033434,0.03026,0.012646,-0.062608,0.00824,-0.000222,-0.032508,-0.007403,-0.049113,0.0356,-0.013439,-0.027953,0.020412,-0.020298,0.060604,-0.026912,-0.018858,-0.024159,0.034556,-0.004683,-0.079385,0.025448,-0.036581,-0.026747,-0.002423,-0.015612,-0.007602,0.007287,0.002839,0.006738,-0.003804,0.029368,-0.034215,0.013292,-0.032836,0.006217,0.019828,0.005241,-0.005085,-0.00415,-0.019404,-0.003929,-0.022902,0.005331,-0.006389,-0.016689,0.020357,0.000582,-0.00532,0.007639,0.015448,-0.014888,0.000521,-0.025076,0.038087,0.022262,0.025705,-0.044096,-0.013231,-0.014316,0.032864,0.004621,0.002945,0.052842,0.040737,0.002059,0.038488,-0.037978,0.009172,0.009892,-0.012467,-0.019212,-0.020698,-0.010479,0.004287,-0.031291,-0.039945,0.013086,0.006317,0.009953,0.026441,0.012802,-0.003667,0.01875,-0.009488,-0.006205,-0.005345,-0.000559,0.016989,0.013227,-0.019872,0.016459,-0.02179,-0.00951,-0.004089,0.001418,-0.009934,-0.009391,0.03079,-0.002691,-0.024361,0.013045,0.010555,0.006948,0.028237,-0.038113,0.00116,-0.001459,0.013188,-0.01362,0.017495,-0.023579,-0.008491,0.001827,0.006913,0.016032,0.011843,-0.005407,0.019339,-0.000751,0.008897,-0.030374,0.030516,0.013669,0.041349,-0.001878,0.011404,-0.001557,0.010462,0.023036,0.016788,0.020912,-0.009942,-0.012143,0.016048,-0.029292,0.000447,-0.005448,-0.013171,-0.006557,-0.009413,0.009934,0.009915,0.01145,-0.027782,0.00863,0.013706,-0.020153,0.011279,0.015792,-0.011998,-0.021094,0.000469,-0.015561,0.000536,-0.007997,-0.019697,-0.005308,0.008136,-0.017284,-0.00829,0.015782,-0.020305,0.001216,-0.033561,0.017286,0.001876,-0.035552,0.015706,0.000222,0.033662,0.023748,-0.013216,-0.003463,-0.011064,0.022489,0.008623,-0.016677,0.014512,0.006448,-0.019827,-0.00865,-0.021224,-0.003542,0.030123,0.022537,0.003275,-0.000253,-0.011975,0.004794,0.026627,0.009897,-0.019277,0.001149,0.009113,-0.001032,-0.00803,0.005746,-0.008653,-0.023495,0.014285,-0.016517,-0.021277,0.011487,0.007164,-0.007075,-0.017173,0.020269,0.017688,-0.014773,-0.013666,0.019013,-0.002892,0.003116,-0.03422,-0.007748,0.007538,0.00282,0.002202,-0.004167,-0.002674,0.007304,0.007023,-0.011342,0.010735,0.006714,0.019606,-0.018577,0.005583,0.006814,0.023859,-0.013558,-0.000103,0.003953,0.016533,0.000904,-0.004267,-0.007525,0.004428,0.016958,-0.008531,0.027352,0.004049,-0.006038,0.004034,-0.001441,0.0111,-0.014403,0.014592,-0.00619,0.012402,0.027039,-0.006931,-0.001878,-0.026267,0.002947,-0.033158,0.021464,-0.008212,0.011683,-0.032685,0.000458,-0.007037,-0.007415,0.00934,-0.006216,0.001893,-0.014707,-0.019533,-0.018242,0.003353,0.015347,-0.010529,-0.005416,0.025286,-0.015554,0.013915,0.013167,0.015025,0.00218,-0.010488,0.000337,0.01722,-0.010039,-0.01247,-0.018949,0.006092,0.000475,-0.000587,0.020428,-0.004066,0.023327,-0.009817,0.018736,1e-05,0.001126,-0.014062,-0.003334,0.007868,0.014951,-0.010219,-0.009413,-0.005275,-0.010779,0.022728,0.015429,0.002963,-0.026022,-0.019337,0.000312,-0.001269,0.007533,0.02089,-0.006465,0.036752,-0.001587,-0.009914,0.009966,0.003732,-0.010779,0.016677,-0.041823,-4e-06,0.014445,-0.002821,0.00105,-0.003082,-0.018107,-0.007295,0.00285,-0.003589,0.006856,-0.019039,-0.004432,0.008062,0.023686,0.017801,-0.010789,0.001843,0.015105,0.006665,0.005698,8.4e-05,0.021123,0.00643,-0.00299,-0.000183,-0.016929,0.018624,-0.000756,-0.011305,0.002496,0.019163,0.01074,-0.002725,0.007965,-0.018767,-0.005796,-0.017752,0.005388,-0.013781,-0.008096,-0.018079,0.006756,-0.015755,-0.002736,0.016875,0.003336,-0.013373,0.020895,0.007974,0.009954,-0.010444,0.002428,-0.002522,0.011646,0.004362,-0.019228,-0.027572,-0.006912,-0.007894,-0.002023,-0.005837,0.011133,0.00548,-0.01036,-0.007423,0.005391,0.004326,0.032522,-0.001628,-0.023527,-0.003269,-0.018909,0.024599,0.008075,-0.013349,0.028762,-0.034327,0.015246,-0.01524,-0.001913,-0.000255,0.027467,0.016686,-0.026613,-0.005726,-0.011795,0.020901,-0.004121,0.002957,0.002943,0.003828,-0.018446,0.023796,-0.009864,0.017569,-0.008431,-0.024315,0.007072,0.034808,-0.016353,-0.003044,0.002943,0.004316,0.010644,0.016936,0.017667,0.01954,0.012871,-0.00918,0.009369,-0.000286,-0.002292,0.018703,-0.01543,0.014991,-0.029858,-8.588587e-05,0.010829,0.011917,-0.013044,0.013034,-0.00048,-0.018934,0.01922,-0.005806,0.015186,0.027785,-0.00624,0.001674,0.004535,0.008004,-0.000805,0.020634,-0.000361,0.012462,-0.013387,0.001803,-0.020761,0.01184,-0.002967,0.027195,0.00379,-0.008744,-0.001571,-0.016794,-0.003851,0.007929,-0.013881,0.003489,-0.00907,-0.000835,0.002673,0.040385,0.003692,0.012645,-0.021813,-0.00941,-0.005674,0.006167,0.017744,-0.008828,-0.012534,-0.017766,0.022759,0.01466,0.044888,-0.002659,0.008327,-0.009394,-0.018082
4,0.260418,0.001437,0.023826,-0.039866,-0.012193,0.048333,0.003216,0.018025,0.000457,0.010451,-0.007496,0.005901,-0.013798,-0.00127,0.006453,-0.006204,-0.013362,-0.00464,-0.01144,0.004546,-0.020052,0.021943,-0.038509,-0.0319,0.02149,-0.043579,0.026812,0.028346,-0.051293,-0.026967,0.002227,0.008723,0.00662,0.004065,0.009596,-0.014261,0.000765,-0.018833,-0.017231,-0.012618,-0.015423,0.001258,-0.002933,-0.005061,-0.00674,0.008867,-0.026713,0.007756,-0.002776,0.019599,0.008397,0.002028,0.007649,-0.024634,-0.019784,-0.009064,0.010434,0.014564,-0.003895,-0.001753,-0.012107,-0.014641,0.004904,0.020135,-0.02085,-0.003541,-0.001184,-0.02159,0.006594,0.014379,0.002408,0.00865,0.005318,-0.006423,0.00739,-0.007457,-0.008768,-0.00864,0.027918,0.004054,0.004594,-0.009446,0.040813,-0.002356,-0.031032,-0.02789,0.018143,0.000333,0.027121,-0.014695,-0.010057,0.025408,-0.010409,-0.002975,0.008334,0.020217,0.018589,-0.015231,0.011245,0.001105,-0.012736,0.011274,-0.00341,-0.015058,0.018288,0.010034,0.004646,0.008461,-0.009941,0.010253,0.003188,0.008066,0.006937,0.045064,-0.003993,0.00577,-0.006901,-0.005887,-0.005552,0.002637,-0.019318,0.002805,-0.024114,0.020565,0.008457,-0.004133,-0.014176,-0.013541,-0.001297,0.030879,0.035692,-0.008506,0.0108,-0.006036,-0.002304,-0.015982,-0.020343,-0.002846,-0.006962,0.0317,-0.022908,0.006891,0.017149,-0.02068,-0.005889,0.007604,-0.008802,-0.00637,0.010047,0.008678,-0.006044,0.003622,0.001312,0.016231,-0.007665,0.003251,-0.004181,-0.012262,0.000458,0.003016,-0.002282,0.00123,0.003563,-0.000769,0.020573,0.005027,-0.014633,0.015533,-0.00745,-0.013152,0.008435,0.013685,0.002897,0.039675,-0.003571,-0.018255,0.006045,-0.005061,0.002901,0.013776,0.027175,-0.008659,0.007394,-0.018849,0.006122,0.000626,0.011531,-0.002204,0.004674,-0.016593,0.003873,-0.012619,-0.019066,0.022105,0.025796,-0.014594,0.011715,0.00463,0.009163,0.007722,0.003037,-0.016506,0.013027,-0.00343,0.004873,-0.001665,-0.003126,-0.016031,0.002284,-0.027859,0.002107,0.02508,0.012516,-0.023872,0.002675,-0.011824,0.020882,0.009732,0.006867,-0.017582,0.007815,-0.000986,-0.005172,0.01622,-0.012228,0.008582,-0.002592,-0.008454,0.002447,0.011193,-0.001445,0.004244,-0.020522,-0.038089,-0.017452,0.008621,-0.006169,0.012156,-0.015329,-0.005669,0.0115,0.01546,0.001894,-0.003761,0.006966,0.001154,-0.003163,0.008764,-0.008616,0.010483,-0.000457,-0.001965,-0.015256,0.011931,-0.031299,-0.010267,-0.016724,-0.002138,0.016869,0.009225,-0.012465,0.012027,0.009985,0.002319,0.016437,-0.002284,-0.012909,-0.00422,-0.000938,-0.00467,-0.016009,-0.002243,0.006482,-0.000195,-0.00697,0.004978,-0.001258,-0.009581,0.004593,0.001434,-0.001297,-0.009678,-0.016876,-0.003917,-0.025173,-0.009554,0.001588,-0.007054,0.00341,0.022288,0.002558,-0.00791,-0.018633,0.012878,0.02167,-0.005756,-0.00153,-0.012761,-0.003776,-0.001991,0.008219,0.012252,-0.005256,-0.008483,-0.001369,0.01795,0.019696,-0.009164,-0.021164,-0.02204,0.002948,-0.015015,0.022819,0.019183,-0.002651,0.003276,0.007757,-0.003845,-0.007368,0.006752,0.011267,-0.016656,-0.010215,-0.000316,0.01614,-0.003184,0.012232,-0.030604,0.001437,0.004717,-0.010084,0.010494,0.003037,-0.003839,0.008754,0.005568,-0.016936,0.015956,0.009839,0.00744,-0.009875,-0.011552,0.000144,0.007243,0.011861,0.014954,0.017318,0.004108,-0.000639,-0.020738,-0.008773,0.006685,0.012808,-0.022173,0.004114,-0.00526,0.000198,0.00086,-0.020039,0.000113,0.008501,-0.011265,-0.002424,0.011266,-0.00523,0.015275,0.003584,-0.008259,0.00234,0.009022,0.002105,0.015345,-0.008046,-0.004741,-0.000243,0.028338,-0.015266,0.015384,0.004887,-0.010762,-0.005476,0.01322,0.00414,0.00618,0.007176,-0.008658,-0.006878,-0.011759,0.003479,-0.010079,0.015003,0.013979,0.002924,0.006796,-0.016258,0.030353,0.023436,0.0199,-0.00925,0.005807,-0.008369,-0.020353,-0.001805,-0.009361,0.009599,0.000622,0.012485,0.002214,-0.001465,-0.014402,-0.005783,-0.006651,-0.004762,0.009574,0.0109,-0.028568,0.009592,0.004414,0.002296,0.007348,0.004907,-0.002238,0.000877,-0.003586,-0.014354,-0.017334,-0.012713,0.009228,-0.009545,0.009783,-0.004882,-0.014955,0.001368,0.008745,-0.024429,0.001143,0.002591,0.010161,-0.005573,0.021094,0.010992,0.004344,0.003589,0.003988,0.019216,-0.014599,-0.009969547,0.018305,-0.024202,-0.006769,0.011122,0.001481,0.023116,0.016563,0.003465,-0.017765,0.003124,0.014014,-0.012569,0.015038,-0.007238,-0.003678,0.0107,-0.005455,-0.009988,0.004074,-0.022681,-0.012758,-0.002296,-0.00613,0.010012,-0.01287,-0.033552,-0.001691,0.008593,-0.001631,-0.008076,0.013342,-0.012104,-0.004876,-0.001151,0.014823,-0.006489,-0.002622,-0.002942,-0.008487,-0.009415,0.008186,-0.01102,-0.005486,-0.006148,-0.001046,0.018317,0.002917,-0.00206,0.02289,0.00874,0.019489,-0.023703,-0.005283


# データのマージ/kmeans用の前処理

In [60]:
#不要なカラムの定義
non_use_cols = ['id',	'html_content',	'cleaned_text', 'state']

In [61]:
#bertから抽出した特徴量を追加
full_merge_df = pd.merge(merge_df_new.drop(non_use_cols, axis=1), merge_text, left_index=True, right_index=True)

In [62]:
#tfidfから抽出した特徴量を追加
full_merge_df_fin = pd.merge(full_merge_df, svd_df_merge, left_index=True, right_index=True)

In [63]:
full_merge_df_fin.head()

Unnamed: 0,goal,country,duration,category1,category2,goal_1-1000,goal_100000+,goal_10001-11000,goal_1001-2000,goal_11001-12000,goal_12001-13000,goal_13001-14000,goal_14001-15000,goal_15001-16000,goal_16001-17000,goal_17001-18000,goal_18001-19000,goal_19001-20000,goal_20001-21000,goal_2001-3000,goal_21001-22000,goal_22001-23000,goal_23001-24000,goal_24001-25000,goal_25001-26000,goal_26001-27000,goal_27001-28000,goal_28001-29000,goal_29001-30000,goal_30001-31000,goal_3001-4000,goal_31001-32000,goal_32001-33000,goal_33001-34000,goal_34001-35000,goal_35001-36000,goal_36001-37000,goal_37001-38000,goal_38001-39000,goal_39001-40000,goal_40001-41000,goal_4001-5000,goal_41001-42000,goal_42001-43000,goal_43001-44000,goal_44001-45000,goal_45001-46000,goal_46001-47000,goal_47001-48000,goal_48001-49000,goal_49001-50000,goal_50001-51000,goal_5001-6000,goal_51001-52000,goal_52001-53000,goal_53001-54000,goal_54001-55000,goal_55001-56000,goal_56001-57000,goal_57001-58000,goal_58001-59000,goal_59001-60000,goal_60001-61000,goal_6001-7000,goal_61001-62000,goal_62001-63000,goal_63001-64000,goal_64001-65000,goal_65001-66000,goal_66001-67000,goal_67001-68000,goal_68001-69000,goal_69001-70000,goal_70001-71000,goal_7001-8000,goal_71001-72000,goal_72001-73000,goal_73001-74000,goal_74001-75000,goal_75001-76000,goal_76001-77000,goal_77001-78000,goal_78001-79000,goal_79001-80000,goal_80001-81000,goal_8001-9000,goal_81001-82000,goal_82001-83000,goal_83001-84000,goal_84001-85000,goal_85001-86000,goal_86001-87000,goal_87001-88000,goal_88001-89000,goal_89001-90000,goal_90001-91000,goal_9001-10000,goal_91001-92000,goal_92001-93000,goal_93001-94000,goal_94001-95000,goal_95001-96000,goal_96001-97000,goal_97001-98000,goal_98001-99000,goal_99001-100000,country_AT,country_AU,country_BE,country_CA,country_CH,country_DE,country_DK,country_ES,country_FR,country_GB,country_HK,country_IE,country_IT,country_JP,country_LU,country_MX,country_NL,country_NO,country_NZ,country_SE,country_SG,country_US,category1_art,category1_comics,category1_crafts,category1_dance,category1_design,category1_fashion,category1_film & video,category1_food,category1_games,category1_journalism,category1_music,category1_photography,category1_publishing,category1_technology,category1_theater,category2_3d printing,category2_academic,category2_accessories,category2_action,category2_animals,category2_animation,category2_anthologies,category2_apparel,category2_apps,category2_architecture,category2_art books,category2_audio,category2_bacon,category2_blues,category2_calendars,category2_camera equipment,category2_candles,category2_ceramics,category2_children's books,category2_childrenswear,category2_chiptune,category2_civic design,category2_classical music,category2_comedy,category2_comic books,category2_community gardens,category2_conceptual art,category2_cookbooks,category2_country & folk,category2_couture,category2_crochet,category2_digital art,category2_diy,category2_diy electronics,category2_documentary,category2_drama,category2_drinks,category2_electronic music,category2_embroidery,category2_events,category2_experimental,category2_fabrication tools,category2_faith,category2_family,category2_fantasy,category2_farmer's markets,category2_farms,category2_festivals,category2_fiction,category2_fine art,category2_flight,category2_food trucks,category2_footwear,category2_gadgets,category2_gaming hardware,category2_glass,category2_graphic design,category2_graphic novels,category2_hardware,category2_hip-hop,category2_horror,category2_illustration,category2_immersive,category2_indie rock,category2_installations,category2_interactive design,category2_jazz,category2_jewelry,category2_kids,category2_knitting,category2_latin,category2_letterpress,category2_literary journals,category2_literary spaces,category2_live games,category2_makerspaces,category2_metal,category2_mixed media,category2_mobile games,category2_movie theaters,category2_music videos,category2_musical,category2_narrative film,category2_nature,category2_nonfiction,category2_painting,category2_people,category2_performance art,category2_performances,category2_periodicals,category2_pet fashion,category2_photo,category2_photobooks,category2_places,category2_playing cards,category2_plays,category2_poetry,category2_pop,category2_pottery,category2_print,category2_printing,category2_product design,category2_public art,category2_punk,category2_puzzles,category2_quilts,category2_r&b,...,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499
0,13,21,45,0,77,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.014294,-0.004885,0.004468,0.013405,0.020139,0.003945,0.02102,0.007873,0.019889,-0.002567,0.013357,0.022455,-0.007579,-0.012372,-0.032077,-0.01353,-0.005403,0.000266,-0.001648,-0.001639,0.001863,-0.004335,-0.018393,-0.016931,-0.00434,0.002289,0.004362,0.002906,-0.012399,-7.2e-05,-0.003963,0.01955,0.018587,0.033418,0.013801,0.013333,0.014,0.022792,0.000807,-0.010167,0.052441,-0.006166,0.007954,-0.000552,-0.027045,-0.023912,0.013811,-0.010591,-0.009543,-0.000384,-0.003568,0.008797,0.003528,0.015528,0.00656,0.010887,-0.009435,0.014344,-0.020655,0.011157,-0.016251,0.016177,-0.027015,0.00643,-0.001237,-0.015847,-0.006423,-0.007497,0.008301,0.008716,0.006046,-0.00318,-0.029342,0.01883,-0.027225,-0.014363,0.003581,0.014993,0.018762,-0.013809,-0.002406,-0.009149,0.03002,-0.000566,-0.005241,0.010009,0.007114,0.005898,-0.004784,0.011938,0.000898,-0.008663,-0.008263,0.015382,-0.006097,-0.009161,0.019624,0.012683,0.011944,0.001941,0.008756,-0.000588,-0.005878,0.008793,-0.012284,-0.002123,0.015048,0.025482,-0.029234,-0.003705,-0.000455,-0.001011,0.018637,-0.001712,-0.000938,-0.02447,-0.004795,-0.025978,-0.01362,0.003612,0.008791,0.00405,-0.015871,-0.024235,-0.01486,-0.0071,-0.005585,-0.002856,-0.017646,-0.008371,-0.006473,-0.009879,-0.031699,-0.021614,-0.009414,0.002213,-0.00412,0.005586,0.006856,0.013171,-0.008106,0.007111,-0.003575,-0.015716,-0.00462,0.014409,-0.002802,0.013884,0.0097,0.014885,0.003749,0.023841,0.009653,0.0043,-0.003412,0.012885,-0.002451,-0.001222,-0.01816,-0.004087,0.008275,0.015173,0.010978,-0.015076,-0.00136,-0.005419,0.012152,0.024587,0.015563,0.003678,0.0095,0.013368,-0.013611,-0.023924,-0.003298,-0.018314,0.005287,0.007713,-0.000185,0.006374,-0.021484,0.022783,0.019443,-0.012504,-0.001829,0.01025,-0.002145,0.01432,-0.013286,-0.026239,-0.005647,0.005303,0.017564,0.01405,-0.004702,-0.021328,-0.0160364,0.02611,-0.021427,0.002637,-0.006516,0.011613,-0.009936,-0.01627,-0.018194,-0.012301,-0.014083,0.016163,-0.002235,0.010676,0.003555,0.002712,-0.014064,0.025697,-0.017604,-0.022284,0.001311,-0.004207,0.002415,-0.008744,0.007851,-0.005618,0.003442,-0.004662,-0.001124,-0.012321,0.001146,0.019111,-0.002842,-0.011646,-0.020073,0.021683,-0.014214,-0.016948,-0.002338,-0.000872,0.000387,-0.016319,0.00903,-0.011798,0.004316,-0.008867,-0.004078,0.006976,-0.011516,0.006324,-0.010501,0.005502,0.009052,0.005084
1,12,21,59,7,110,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.016541,0.004788,-0.012117,0.021256,3e-06,-0.013761,0.021737,0.019281,0.016999,-0.002109,0.00832,0.018685,0.008279,-0.023927,-0.018144,0.012725,0.002486,0.006239,0.005073,0.012673,-0.029764,-0.000522,-0.001175,0.004413,0.028199,0.005689,0.006939,-0.019899,0.02318,-0.010107,0.005676,-0.01094,0.007925,-0.010685,-0.00539,0.005124,0.021634,-0.029288,0.005428,0.002416,-0.015848,-0.024929,0.008211,0.0002,0.000293,-0.000218,-0.01967,-0.030645,-0.019649,0.006452,-0.009745,0.019987,0.01397,0.016201,-0.042355,-0.005079,-0.006166,-0.004707,-0.014276,0.016393,-0.010306,0.010931,0.007244,0.010991,0.028234,-0.017243,-0.005196,-0.01451,-0.027061,-0.02503,0.011892,0.00234,-0.001981,0.019402,0.032823,0.029139,0.015925,-0.002637,0.014175,-0.005639,-0.000723,0.00627,-0.022748,0.01237,0.001545,-0.01774,-0.012656,0.008712,0.000928,-0.030875,0.046253,0.002935,-0.015183,-0.024055,-0.029806,-0.017497,-0.012935,-0.012333,-0.016854,-0.017971,0.00601,-0.003935,0.008416,0.024333,0.003605,-0.018292,-0.024853,-0.009193,0.002504,0.003071,-0.002511,0.000973,-0.007891,0.015673,-0.010888,0.012386,-0.032803,0.004542,0.008547,-0.018531,-0.012993,-0.008504,-0.021,0.014683,0.032697,0.003251,-0.011389,-0.009819,0.005538,-0.001515,0.035216,-0.011462,-0.008868,-0.007126,0.012498,-0.002032,0.007925,-0.005448,-0.020716,0.019146,0.009727,0.012121,0.02454,0.013121,-0.025732,-0.002138,-0.00189,-0.034328,-0.010266,-0.003276,-0.006531,-0.008554,-0.002528,0.020719,0.00667,0.00424,0.000356,-0.01345,-0.021289,-0.020766,-0.001384,-0.015603,0.001364,0.032275,-0.001883,-0.023385,-0.003541,0.015579,0.010252,0.006481,0.011071,0.021234,-0.016587,-0.005293,-0.007954,0.007871,-0.005281,0.01404,-0.00139,0.00379,-0.014881,-0.026262,-0.036046,-0.007239,0.005636,0.009053,0.010804,-0.008195,-0.013957,-0.011335,-0.007369,0.022719,-0.017823,-0.037795,-0.003011,0.001076,4.714273e-07,0.015475,0.007725,0.002392,0.001555,0.017252,-0.001264,0.01878,-0.002006,0.005697,0.004452,-0.021312,-0.003056,-0.013419,0.022158,-0.017318,0.004456,-0.007307,0.01308,0.000356,0.017817,0.018172,-0.004561,-0.006131,0.019148,-0.002744,0.005585,-0.017044,0.01028,-0.017277,0.016206,0.001739,0.00452,-0.00361,0.01032,0.013295,0.001114,0.001249,0.000192,-0.026204,-0.016621,-0.000688,0.010627,-0.008542,-0.005051,0.033543,0.013254,-0.008066,-0.021986,0.006248,-0.017169,0.009594,0.007042,0.006819
2,14,21,38,0,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.005695,0.010298,0.044754,-0.024227,0.032113,0.000779,-0.006008,-0.008161,0.003728,-0.041648,0.008986,-0.00662,0.026379,-0.025085,-0.036784,0.020585,0.007251,0.005557,0.024249,-0.034646,-0.006971,-0.019999,-0.00112,0.006644,0.025517,0.016149,0.005249,-0.010356,0.00051,0.015814,0.016852,4e-06,-0.020663,0.018196,-0.005793,-0.009618,0.008372,0.010481,0.000549,0.032855,-0.019208,-0.046766,0.005028,0.00717,0.013061,0.019756,0.018297,-0.02169,0.009322,0.029644,0.002638,0.018161,-0.003457,0.033547,0.038493,0.005133,0.014484,0.026074,-0.041976,0.007695,-0.007938,0.007477,-0.008414,0.01528,0.005897,0.01853,0.005575,-0.013063,0.01618,-0.011104,0.022551,0.018623,-0.024943,-0.014583,-0.012556,-0.004766,0.00186,-0.023306,-0.037035,-0.032982,0.043201,0.037304,-0.033895,-0.022355,0.005235,-0.033008,-0.020016,0.007864,0.043307,0.015291,-0.009681,0.011159,-0.005273,-0.027743,-0.016334,-0.005559,0.014514,-0.025069,0.000105,0.017149,-0.019453,-0.012469,-0.019947,0.004873,-0.001885,0.002591,0.004008,0.005445,-0.020952,-0.010144,0.024594,0.045085,-0.00848,0.002506,0.011595,-0.008353,0.017114,-0.027139,-0.012849,0.009386,0.077026,0.004757,4.7e-05,0.036307,-0.011677,0.00736,-0.009034,0.023818,-0.003165,-0.003504,0.011629,-0.029717,-0.043607,2.1e-05,0.005406,-0.003784,0.015078,-0.005777,-0.003253,0.009667,-0.019872,-0.012448,0.02027,0.012987,0.007573,-0.012635,0.020261,0.001495,0.045272,-0.021419,0.01618,0.030131,-0.014402,-0.015198,-0.00334,-0.025329,0.000904,-0.029412,0.020132,0.021303,-0.015698,-0.007585,-0.018838,-0.001595,-0.008014,-0.001727,-0.010263,0.049696,-0.009258,-0.010381,-0.025934,0.001771,0.019181,0.002674,-0.011474,-0.00572,-0.015797,0.01034,0.039958,0.007382,-0.015279,0.001134,-0.001955,0.022133,-0.011363,-0.016983,0.007063,0.00236,-0.015427,-0.006122,0.006671,-0.036774,-0.005293,0.003517,0.030654,0.026613,-0.00755688,-0.000707,0.02102,-0.006722,-0.016838,0.007289,-0.029916,0.030361,-0.009817,0.003396,0.011102,0.008599,-0.002127,0.016893,0.009369,0.020381,-0.002918,0.013979,-0.013861,0.000155,-0.023244,-0.018794,0.025925,0.011097,0.000688,0.009036,-0.012488,-0.006586,-0.020339,-0.00303,0.016266,0.016593,0.002791,-0.038743,0.014228,-0.0079,-0.009394,-0.01368,-0.005283,-0.039301,0.014781,-0.008397,-0.011523,0.015543,-0.019541,-0.009885,-0.012131,0.022324,-0.011334,-0.006569,0.005135,-0.015688,0.013161,-0.003512
3,3,21,30,0,77,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.004049,-0.006038,0.004034,-0.001441,0.0111,-0.014403,0.014592,-0.00619,0.012402,0.027039,-0.006931,-0.001878,-0.026267,0.002947,-0.033158,0.021464,-0.008212,0.011683,-0.032685,0.000458,-0.007037,-0.007415,0.00934,-0.006216,0.001893,-0.014707,-0.019533,-0.018242,0.003353,0.015347,-0.010529,-0.005416,0.025286,-0.015554,0.013915,0.013167,0.015025,0.00218,-0.010488,0.000337,0.01722,-0.010039,-0.01247,-0.018949,0.006092,0.000475,-0.000587,0.020428,-0.004066,0.023327,-0.009817,0.018736,1e-05,0.001126,-0.014062,-0.003334,0.007868,0.014951,-0.010219,-0.009413,-0.005275,-0.010779,0.022728,0.015429,0.002963,-0.026022,-0.019337,0.000312,-0.001269,0.007533,0.02089,-0.006465,0.036752,-0.001587,-0.009914,0.009966,0.003732,-0.010779,0.016677,-0.041823,-4e-06,0.014445,-0.002821,0.00105,-0.003082,-0.018107,-0.007295,0.00285,-0.003589,0.006856,-0.019039,-0.004432,0.008062,0.023686,0.017801,-0.010789,0.001843,0.015105,0.006665,0.005698,8.4e-05,0.021123,0.00643,-0.00299,-0.000183,-0.016929,0.018624,-0.000756,-0.011305,0.002496,0.019163,0.01074,-0.002725,0.007965,-0.018767,-0.005796,-0.017752,0.005388,-0.013781,-0.008096,-0.018079,0.006756,-0.015755,-0.002736,0.016875,0.003336,-0.013373,0.020895,0.007974,0.009954,-0.010444,0.002428,-0.002522,0.011646,0.004362,-0.019228,-0.027572,-0.006912,-0.007894,-0.002023,-0.005837,0.011133,0.00548,-0.01036,-0.007423,0.005391,0.004326,0.032522,-0.001628,-0.023527,-0.003269,-0.018909,0.024599,0.008075,-0.013349,0.028762,-0.034327,0.015246,-0.01524,-0.001913,-0.000255,0.027467,0.016686,-0.026613,-0.005726,-0.011795,0.020901,-0.004121,0.002957,0.002943,0.003828,-0.018446,0.023796,-0.009864,0.017569,-0.008431,-0.024315,0.007072,0.034808,-0.016353,-0.003044,0.002943,0.004316,0.010644,0.016936,0.017667,0.01954,0.012871,-0.00918,0.009369,-0.000286,-0.002292,0.018703,-0.01543,0.014991,-0.029858,-8.588587e-05,0.010829,0.011917,-0.013044,0.013034,-0.00048,-0.018934,0.01922,-0.005806,0.015186,0.027785,-0.00624,0.001674,0.004535,0.008004,-0.000805,0.020634,-0.000361,0.012462,-0.013387,0.001803,-0.020761,0.01184,-0.002967,0.027195,0.00379,-0.008744,-0.001571,-0.016794,-0.003851,0.007929,-0.013881,0.003489,-0.00907,-0.000835,0.002673,0.040385,0.003692,0.012645,-0.021813,-0.00941,-0.005674,0.006167,0.017744,-0.008828,-0.012534,-0.017766,0.022759,0.01466,0.044888,-0.002659,0.008327,-0.009394,-0.018082
4,3,21,29,6,139,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.000457,-0.001965,-0.015256,0.011931,-0.031299,-0.010267,-0.016724,-0.002138,0.016869,0.009225,-0.012465,0.012027,0.009985,0.002319,0.016437,-0.002284,-0.012909,-0.00422,-0.000938,-0.00467,-0.016009,-0.002243,0.006482,-0.000195,-0.00697,0.004978,-0.001258,-0.009581,0.004593,0.001434,-0.001297,-0.009678,-0.016876,-0.003917,-0.025173,-0.009554,0.001588,-0.007054,0.00341,0.022288,0.002558,-0.00791,-0.018633,0.012878,0.02167,-0.005756,-0.00153,-0.012761,-0.003776,-0.001991,0.008219,0.012252,-0.005256,-0.008483,-0.001369,0.01795,0.019696,-0.009164,-0.021164,-0.02204,0.002948,-0.015015,0.022819,0.019183,-0.002651,0.003276,0.007757,-0.003845,-0.007368,0.006752,0.011267,-0.016656,-0.010215,-0.000316,0.01614,-0.003184,0.012232,-0.030604,0.001437,0.004717,-0.010084,0.010494,0.003037,-0.003839,0.008754,0.005568,-0.016936,0.015956,0.009839,0.00744,-0.009875,-0.011552,0.000144,0.007243,0.011861,0.014954,0.017318,0.004108,-0.000639,-0.020738,-0.008773,0.006685,0.012808,-0.022173,0.004114,-0.00526,0.000198,0.00086,-0.020039,0.000113,0.008501,-0.011265,-0.002424,0.011266,-0.00523,0.015275,0.003584,-0.008259,0.00234,0.009022,0.002105,0.015345,-0.008046,-0.004741,-0.000243,0.028338,-0.015266,0.015384,0.004887,-0.010762,-0.005476,0.01322,0.00414,0.00618,0.007176,-0.008658,-0.006878,-0.011759,0.003479,-0.010079,0.015003,0.013979,0.002924,0.006796,-0.016258,0.030353,0.023436,0.0199,-0.00925,0.005807,-0.008369,-0.020353,-0.001805,-0.009361,0.009599,0.000622,0.012485,0.002214,-0.001465,-0.014402,-0.005783,-0.006651,-0.004762,0.009574,0.0109,-0.028568,0.009592,0.004414,0.002296,0.007348,0.004907,-0.002238,0.000877,-0.003586,-0.014354,-0.017334,-0.012713,0.009228,-0.009545,0.009783,-0.004882,-0.014955,0.001368,0.008745,-0.024429,0.001143,0.002591,0.010161,-0.005573,0.021094,0.010992,0.004344,0.003589,0.003988,0.019216,-0.014599,-0.009969547,0.018305,-0.024202,-0.006769,0.011122,0.001481,0.023116,0.016563,0.003465,-0.017765,0.003124,0.014014,-0.012569,0.015038,-0.007238,-0.003678,0.0107,-0.005455,-0.009988,0.004074,-0.022681,-0.012758,-0.002296,-0.00613,0.010012,-0.01287,-0.033552,-0.001691,0.008593,-0.001631,-0.008076,0.013342,-0.012104,-0.004876,-0.001151,0.014823,-0.006489,-0.002622,-0.002942,-0.008487,-0.009415,0.008186,-0.01102,-0.005486,-0.006148,-0.001046,0.018317,0.002917,-0.00206,0.02289,0.00874,0.019489,-0.023703,-0.005283


In [64]:
#durationのスケーリング
from sklearn.preprocessing import StandardScaler

std = StandardScaler()
full_merge_df_fin['duration'] = std.fit_transform(pd.DataFrame(full_merge_df_fin['duration']))

# kmeansによるクラスタリング

In [65]:
from sklearn.cluster import KMeans
non_use_cols_for_km = ['goal', 'country', 'category1', 'category2']

In [66]:
# エルボー方による推定。クラスター数を1から20に増やして、それぞれの距離の総和を求める
"""
dist_list =[]
for i in range(1,50):
    print('今はクラスター数{}のクラスタリングを実行しています。'.format(i))
    kmeans= KMeans(n_clusters=i, init='random', random_state=0)
    kmeans.fit(full_merge_df_fin.drop(non_use_cols_for_km, axis=1))
    dist_list.append(kmeans.inertia_)
    
# グラフを表示
plt.plot(range(1,50), dist_list,marker='+')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')
"""

"\ndist_list =[]\nfor i in range(1,50):\n    print('今はクラスター数{}のクラスタリングを実行しています。'.format(i))\n    kmeans= KMeans(n_clusters=i, init='random', random_state=0)\n    kmeans.fit(full_merge_df_fin.drop(non_use_cols_for_km, axis=1))\n    dist_list.append(kmeans.inertia_)\n    \n# グラフを表示\nplt.plot(range(1,50), dist_list,marker='+')\nplt.xlabel('Number of clusters')\nplt.ylabel('Distortion')\n"

In [67]:
#クラスタリング
clusters_list=[5, 10, 15, 20, 50]

for n_clusters in clusters_list:
  print('今クラスター数「{}」のクラスタリングをしています'.format(n_clusters))
  kmeans = KMeans(init='random', n_clusters=n_clusters, random_state=0)
  kmeans.fit(full_merge_df_fin.drop(non_use_cols_for_km, axis=1))
  full_merge_df_fin['cluster_number_{}'.format(n_clusters)] =pd.Series(kmeans.labels_, name='cluster_number_{}'.format(5))

今クラスター数「5」のクラスタリングをしています
今クラスター数「10」のクラスタリングをしています
今クラスター数「15」のクラスタリングをしています
今クラスター数「20」のクラスタリングをしています
今クラスター数「50」のクラスタリングをしています


# aggrigation特徴量の作成

In [68]:
#aggrigationの際にgroup化するカラム
group_cols = ['goal',	'country', 'category1',	'category2', 'cluster_number_5', 'cluster_number_10', 'cluster_number_15', 'cluster_number_20', 'cluster_number_50']

In [69]:
#関数定義
def agg_func_duration(input_df, cols):
  agg_func = ["mean", "median", "max", "min", "std"]
  tmp = input_df.groupby([c])["duration"].agg(agg_func)
  output_df = pd.merge(input_df, tmp, how="left", on=[c])[agg_func].add_suffix("_agg_func_duration")

  return output_df

def agg_func_state(input_df, cols):
  agg_func = ["mean", "median", "max", "min", "std"]
  tmp = input_df.groupby([c])["duration"].agg(agg_func)
  output_df = pd.merge(input_df, tmp, how="left", on=[c])[agg_func].add_suffix("_agg_func_state")

  return output_df

In [70]:
#実行の準備
full_merge_df_fin["state"] = train['state']
agg_df = agg_func_duration(full_merge_df_fin, group_cols[0])

In [71]:
#関数の実行
for c in group_cols[1:len(group_cols)]:
  agg_df = pd.merge(agg_df, agg_func_duration(full_merge_df_fin, c), left_index=True, right_index=True)

for c in group_cols:
  agg_df = pd.merge(agg_df, agg_func_state(full_merge_df_fin, c), left_index=True, right_index=True)

In [72]:
agg_df

Unnamed: 0,mean_agg_func_duration_x,median_agg_func_duration_x,max_agg_func_duration_x,min_agg_func_duration_x,std_agg_func_duration_x,mean_agg_func_duration_y,median_agg_func_duration_y,max_agg_func_duration_y,min_agg_func_duration_y,std_agg_func_duration_y,mean_agg_func_duration_x.1,median_agg_func_duration_x.1,max_agg_func_duration_x.1,min_agg_func_duration_x.1,std_agg_func_duration_x.1,mean_agg_func_duration_y.1,median_agg_func_duration_y.1,max_agg_func_duration_y.1,min_agg_func_duration_y.1,std_agg_func_duration_y.1,mean_agg_func_duration_x.2,median_agg_func_duration_x.2,max_agg_func_duration_x.2,min_agg_func_duration_x.2,std_agg_func_duration_x.2,mean_agg_func_duration_y.2,median_agg_func_duration_y.2,max_agg_func_duration_y.2,min_agg_func_duration_y.2,std_agg_func_duration_y.2,mean_agg_func_duration_x.3,median_agg_func_duration_x.3,max_agg_func_duration_x.3,min_agg_func_duration_x.3,std_agg_func_duration_x.3,mean_agg_func_duration_y.3,median_agg_func_duration_y.3,max_agg_func_duration_y.3,min_agg_func_duration_y.3,std_agg_func_duration_y.3,mean_agg_func_duration,median_agg_func_duration,max_agg_func_duration,min_agg_func_duration,std_agg_func_duration,mean_agg_func_state_x,median_agg_func_state_x,max_agg_func_state_x,min_agg_func_state_x,std_agg_func_state_x,mean_agg_func_state_y,median_agg_func_state_y,max_agg_func_state_y,min_agg_func_state_y,std_agg_func_state_y,mean_agg_func_state_x.1,median_agg_func_state_x.1,max_agg_func_state_x.1,min_agg_func_state_x.1,std_agg_func_state_x.1,mean_agg_func_state_y.1,median_agg_func_state_y.1,max_agg_func_state_y.1,min_agg_func_state_y.1,std_agg_func_state_y.1,mean_agg_func_state_x.2,median_agg_func_state_x.2,max_agg_func_state_x.2,min_agg_func_state_x.2,std_agg_func_state_x.2,mean_agg_func_state_y.2,median_agg_func_state_y.2,max_agg_func_state_y.2,min_agg_func_state_y.2,std_agg_func_state_y.2,mean_agg_func_state_x.3,median_agg_func_state_x.3,max_agg_func_state_x.3,min_agg_func_state_x.3,std_agg_func_state_x.3,mean_agg_func_state_y.3,median_agg_func_state_y.3,max_agg_func_state_y.3,min_agg_func_state_y.3,std_agg_func_state_y.3,mean_agg_func_state,median_agg_func_state,max_agg_func_state,min_agg_func_state,std_agg_func_state
0,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.027740,-0.220204,4.742618,-2.618901,1.007448,1.838584,2.178493,4.742618,0.606933,0.667446,1.838584,2.178493,4.742618,0.606933,0.667446,1.817935,2.178493,4.659904,0.606933,0.648946,1.864998,2.178493,4.659904,0.689647,0.714734,0.258354,-0.220204,2.261207,-0.964627,0.889830,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.027740,-0.220204,4.742618,-2.618901,1.007448,1.838584,2.178493,4.742618,0.606933,0.667446,1.838584,2.178493,4.742618,0.606933,0.667446,1.817935,2.178493,4.659904,0.606933,0.648946,1.864998,2.178493,4.659904,0.689647,0.714734
1,0.176685,-0.220204,2.261207,-2.618901,1.052416,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.107058,-0.220204,2.261207,-2.618901,1.016086,0.176685,-0.220204,2.261207,-2.618901,1.052416,-0.003448,-0.220204,4.742618,-2.618901,1.040187,1.818256,2.178493,4.742618,0.606933,0.582256,1.821722,2.178493,4.742618,0.606933,0.581507,1.821722,2.178493,4.742618,0.606933,0.581507,1.821117,2.178493,4.659904,0.606933,0.571680,0.242664,-0.220204,4.329049,-2.618901,1.028299,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.107058,-0.220204,2.261207,-2.618901,1.016086,0.176685,-0.220204,2.261207,-2.618901,1.052416,-0.003448,-0.220204,4.742618,-2.618901,1.040187,1.818256,2.178493,4.742618,0.606933,0.582256,1.821722,2.178493,4.742618,0.606933,0.581507,1.821722,2.178493,4.742618,0.606933,0.581507,1.821117,2.178493,4.659904,0.606933,0.571680
2,0.102029,-0.220204,4.659904,-2.536187,1.200040,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,0.102029,-0.220204,4.659904,-2.536187,1.200040,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.384908,-0.220204,0.772360,-2.536187,0.507846,-0.074289,-0.220204,4.659904,-2.536187,0.931173,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,0.102029,-0.220204,4.659904,-2.536187,1.200040,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.384908,-0.220204,0.772360,-2.536187,0.507846
3,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.384908,-0.220204,0.772360,-2.536187,0.507846,-0.145838,-0.220204,4.659904,-2.618901,0.965364,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.111216,-0.220204,2.261207,-2.618901,0.911437,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.384908,-0.220204,0.772360,-2.536187,0.507846
4,-0.035978,-0.220204,3.998195,-2.039905,1.003031,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.015340,-0.220204,4.659904,-2.536187,1.007883,-0.035978,-0.220204,3.998195,-2.039905,1.003031,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.488092,-0.220204,0.689647,-2.536187,0.565389,-0.145838,-0.220204,4.659904,-2.618901,0.965364,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.015340,-0.220204,4.659904,-2.536187,1.007883,-0.035978,-0.220204,3.998195,-2.039905,1.003031,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.488092,-0.220204,0.689647,-2.536187,0.565389
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19586,0.156565,-0.220204,4.659904,-2.536187,1.082846,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.156565,-0.220204,4.659904,-2.536187,1.082846,-0.015999,-0.220204,4.659904,-2.618901,0.901310,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.145463,-0.220204,1.434070,-2.288046,0.609274,0.083215,-0.220204,4.659904,-2.536187,0.975628,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.156565,-0.220204,4.659904,-2.536187,1.082846,-0.015999,-0.220204,4.659904,-2.618901,0.901310,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.145463,-0.220204,1.434070,-2.288046,0.609274
19587,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.017138,-0.220204,4.659904,-2.536187,0.925905,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.015999,-0.220204,4.659904,-2.618901,0.901310,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.292947,-0.220204,1.020501,-2.122619,0.401782,0.049235,-0.220204,3.419199,-2.122619,0.895374,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.017138,-0.220204,4.659904,-2.536187,0.925905,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.015999,-0.220204,4.659904,-2.618901,0.901310,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.292947,-0.220204,1.020501,-2.122619,0.401782
19588,0.089972,-0.220204,2.261207,-2.618901,1.104731,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.089972,-0.220204,2.261207,-2.618901,1.104731,0.125690,-0.220204,2.261207,-2.122619,1.079338,0.117570,-0.220204,2.261207,-2.122619,1.073287,0.195600,-0.220204,2.261207,-2.453474,1.242297,0.195600,-0.220204,2.261207,-2.453474,1.242297,0.195600,-0.220204,2.261207,-2.453474,1.242297,-0.074289,-0.220204,4.659904,-2.536187,0.931173,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.089972,-0.220204,2.261207,-2.618901,1.104731,0.125690,-0.220204,2.261207,-2.122619,1.079338,0.117570,-0.220204,2.261207,-2.122619,1.073287,0.195600,-0.220204,2.261207,-2.453474,1.242297,0.195600,-0.220204,2.261207,-2.453474,1.242297,0.195600,-0.220204,2.261207,-2.453474,1.242297
19589,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.141818,-0.220204,3.088344,-2.453474,1.024294,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.384908,-0.220204,0.772360,-2.536187,0.507846,0.094585,-0.220204,4.577190,-2.288046,0.958105,-0.019053,-0.220204,4.742618,-2.618901,0.983125,-0.141818,-0.220204,3.088344,-2.453474,1.024294,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.027740,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.772360,-2.618901,0.541647,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.388376,-0.220204,0.772360,-2.536187,0.518117,-0.384908,-0.220204,0.772360,-2.536187,0.507846


In [73]:
#データのマージ
full_merge_df_fin = pd.merge(full_merge_df_fin, agg_df, left_index=True, right_index=True)
full_merge_df_fin.drop(['state'], axis=1, inplace=True)

# データセットの整理

In [74]:
train_df = pd.DataFrame(full_merge_df_fin.loc[:len(train),])
test_df = pd.DataFrame(full_merge_df_fin.loc[len(train):19591,])

In [75]:
test_df = test_df.reset_index()
test_df.drop(['index'], axis=1, inplace=True)
test_df.head()

Unnamed: 0,goal,country,duration,category1,category2,goal_1-1000,goal_100000+,goal_10001-11000,goal_1001-2000,goal_11001-12000,goal_12001-13000,goal_13001-14000,goal_14001-15000,goal_15001-16000,goal_16001-17000,goal_17001-18000,goal_18001-19000,goal_19001-20000,goal_20001-21000,goal_2001-3000,goal_21001-22000,goal_22001-23000,goal_23001-24000,goal_24001-25000,goal_25001-26000,goal_26001-27000,goal_27001-28000,goal_28001-29000,goal_29001-30000,goal_30001-31000,goal_3001-4000,goal_31001-32000,goal_32001-33000,goal_33001-34000,goal_34001-35000,goal_35001-36000,goal_36001-37000,goal_37001-38000,goal_38001-39000,goal_39001-40000,goal_40001-41000,goal_4001-5000,goal_41001-42000,goal_42001-43000,goal_43001-44000,goal_44001-45000,goal_45001-46000,goal_46001-47000,goal_47001-48000,goal_48001-49000,goal_49001-50000,goal_50001-51000,goal_5001-6000,goal_51001-52000,goal_52001-53000,goal_53001-54000,goal_54001-55000,goal_55001-56000,goal_56001-57000,goal_57001-58000,goal_58001-59000,goal_59001-60000,goal_60001-61000,goal_6001-7000,goal_61001-62000,goal_62001-63000,goal_63001-64000,goal_64001-65000,goal_65001-66000,goal_66001-67000,goal_67001-68000,goal_68001-69000,goal_69001-70000,goal_70001-71000,goal_7001-8000,goal_71001-72000,goal_72001-73000,goal_73001-74000,goal_74001-75000,goal_75001-76000,goal_76001-77000,goal_77001-78000,goal_78001-79000,goal_79001-80000,goal_80001-81000,goal_8001-9000,goal_81001-82000,goal_82001-83000,goal_83001-84000,goal_84001-85000,goal_85001-86000,goal_86001-87000,goal_87001-88000,goal_88001-89000,goal_89001-90000,goal_90001-91000,goal_9001-10000,goal_91001-92000,goal_92001-93000,goal_93001-94000,goal_94001-95000,goal_95001-96000,goal_96001-97000,goal_97001-98000,goal_98001-99000,goal_99001-100000,country_AT,country_AU,country_BE,country_CA,country_CH,country_DE,country_DK,country_ES,country_FR,country_GB,country_HK,country_IE,country_IT,country_JP,country_LU,country_MX,country_NL,country_NO,country_NZ,country_SE,country_SG,country_US,category1_art,category1_comics,category1_crafts,category1_dance,category1_design,category1_fashion,category1_film & video,category1_food,category1_games,category1_journalism,category1_music,category1_photography,category1_publishing,category1_technology,category1_theater,category2_3d printing,category2_academic,category2_accessories,category2_action,category2_animals,category2_animation,category2_anthologies,category2_apparel,category2_apps,category2_architecture,category2_art books,category2_audio,category2_bacon,category2_blues,category2_calendars,category2_camera equipment,category2_candles,category2_ceramics,category2_children's books,category2_childrenswear,category2_chiptune,category2_civic design,category2_classical music,category2_comedy,category2_comic books,category2_community gardens,category2_conceptual art,category2_cookbooks,category2_country & folk,category2_couture,category2_crochet,category2_digital art,category2_diy,category2_diy electronics,category2_documentary,category2_drama,category2_drinks,category2_electronic music,category2_embroidery,category2_events,category2_experimental,category2_fabrication tools,category2_faith,category2_family,category2_fantasy,category2_farmer's markets,category2_farms,category2_festivals,category2_fiction,category2_fine art,category2_flight,category2_food trucks,category2_footwear,category2_gadgets,category2_gaming hardware,category2_glass,category2_graphic design,category2_graphic novels,category2_hardware,category2_hip-hop,category2_horror,category2_illustration,category2_immersive,category2_indie rock,category2_installations,category2_interactive design,category2_jazz,category2_jewelry,category2_kids,category2_knitting,category2_latin,category2_letterpress,category2_literary journals,category2_literary spaces,category2_live games,category2_makerspaces,category2_metal,category2_mixed media,category2_mobile games,category2_movie theaters,category2_music videos,category2_musical,category2_narrative film,category2_nature,category2_nonfiction,category2_painting,category2_people,category2_performance art,category2_performances,category2_periodicals,category2_pet fashion,category2_photo,category2_photobooks,category2_places,category2_playing cards,category2_plays,category2_poetry,category2_pop,category2_pottery,category2_print,category2_printing,category2_product design,category2_public art,category2_punk,category2_puzzles,category2_quilts,category2_r&b,...,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,cluster_number_5,cluster_number_10,cluster_number_15,cluster_number_20,cluster_number_50,mean_agg_func_duration_x,median_agg_func_duration_x,max_agg_func_duration_x,min_agg_func_duration_x,std_agg_func_duration_x,mean_agg_func_duration_y,median_agg_func_duration_y,max_agg_func_duration_y,min_agg_func_duration_y,std_agg_func_duration_y,mean_agg_func_duration_x.1,median_agg_func_duration_x.1,max_agg_func_duration_x.1,min_agg_func_duration_x.1,std_agg_func_duration_x.1,mean_agg_func_duration_y.1,median_agg_func_duration_y.1,max_agg_func_duration_y.1,min_agg_func_duration_y.1,std_agg_func_duration_y.1,mean_agg_func_duration_x.2,median_agg_func_duration_x.2,max_agg_func_duration_x.2,min_agg_func_duration_x.2,std_agg_func_duration_x.2,mean_agg_func_duration_y.2,median_agg_func_duration_y.2,max_agg_func_duration_y.2,min_agg_func_duration_y.2,std_agg_func_duration_y.2,mean_agg_func_duration_x.3,median_agg_func_duration_x.3,max_agg_func_duration_x.3,min_agg_func_duration_x.3,std_agg_func_duration_x.3,mean_agg_func_duration_y.3,median_agg_func_duration_y.3,max_agg_func_duration_y.3,min_agg_func_duration_y.3,std_agg_func_duration_y.3,mean_agg_func_duration,median_agg_func_duration,max_agg_func_duration,min_agg_func_duration,std_agg_func_duration,mean_agg_func_state_x,median_agg_func_state_x,max_agg_func_state_x,min_agg_func_state_x,std_agg_func_state_x,mean_agg_func_state_y,median_agg_func_state_y,max_agg_func_state_y,min_agg_func_state_y,std_agg_func_state_y,mean_agg_func_state_x.1,median_agg_func_state_x.1,max_agg_func_state_x.1,min_agg_func_state_x.1,std_agg_func_state_x.1,mean_agg_func_state_y.1,median_agg_func_state_y.1,max_agg_func_state_y.1,min_agg_func_state_y.1,std_agg_func_state_y.1,mean_agg_func_state_x.2,median_agg_func_state_x.2,max_agg_func_state_x.2,min_agg_func_state_x.2,std_agg_func_state_x.2,mean_agg_func_state_y.2,median_agg_func_state_y.2,max_agg_func_state_y.2,min_agg_func_state_y.2,std_agg_func_state_y.2,mean_agg_func_state_x.3,median_agg_func_state_x.3,max_agg_func_state_x.3,min_agg_func_state_x.3,std_agg_func_state_x.3,mean_agg_func_state_y.3,median_agg_func_state_y.3,max_agg_func_state_y.3,min_agg_func_state_y.3,std_agg_func_state_y.3,mean_agg_func_state,median_agg_func_state,max_agg_func_state,min_agg_func_state,std_agg_func_state
0,47,8,-0.220204,3,88,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.000984,0.034838,-0.003573,0.039774,0.013612,-0.003691,0.008234,0.006419,0.001161,0.015044,-0.002207,0.000528,0.025927,0.026037,0.0011,-0.002247,-0.017118,-0.010721,-0.00691,-0.000629,-0.010969,-0.002555,-0.014714,-0.009069,-0.021026,-0.006836,-0.017208,-0.009586,-0.001602,0.01088,-0.014753,-0.003327,0.009237,0.004904,-0.007608,-0.002172,0.018232,0.005846,0.016005,0.003866,-0.014537,-0.011469,-0.009764,-0.013541,0.010665,0.008069,0.001892,0.009173,-0.002305,0.006347,0.004324,0.007947,0.007222,-0.005955,-9.1e-05,0.004159,0.006389,0.013287,-0.001887,0.009689,0.007668,-0.003981,-0.003275,-0.012006,-0.000299,-0.00356,-0.007357,-0.003438,0.004014,-0.02047,-0.006904,-0.022393,0.008644,0.004803,0.001542,-0.003995,-0.015452,-0.014763,-0.014546,-0.019564,-0.005173,0.006941,0.00603,-0.001419,-0.006549,-0.001794,0.001814,0.017786,-0.001935,-0.015466,-0.002798,-0.003622,0.007062,-0.002658,-0.004427,0.009385,-0.001799,0.014488,0.007751,0.004521,0.015259,0.014296,0.012405,0.010204,0.006897,0.006402,-0.009508,0.01676,-0.011879,-0.016996,0.003875,0.002927,-0.003975,-0.00359,-0.001091,-0.008191,-0.001526,-0.012201,-0.018957,0.00861,-0.010145,-0.001563,-0.00635,0.006497,0.00115,0.028544,0.008174,-0.016028,0.022184,0.005073,-0.003294,0.001676,-0.004215,-0.012513,0.003857,-0.008361,-0.006642,0.000648,0.018217,0.003146,0.003786,-0.000696,0.006335,-0.008617,0.019251,-0.002178,0.007918,-0.021131,-0.01538,0.007986,-0.00238,0.007986,-0.00579,-0.005702,-0.003097,1,8,9,6,20,-0.151761,-0.220204,2.261207,-2.122619,0.971921,0.241281,-0.220204,3.419199,-2.122619,1.11337,-0.150302,-0.220204,2.261207,-2.288046,0.986859,-0.151761,-0.220204,2.261207,-2.122619,0.971921,0.317648,-0.220204,3.088344,-2.536187,1.146353,0.309751,-0.220204,2.261207,-1.957192,1.07743,0.309751,-0.220204,2.261207,-1.957192,1.07743,0.309751,-0.220204,2.261207,-1.957192,1.07743,0.35689,-0.220204,2.261207,-1.957192,1.090922,0.02402,-0.220204,2.261207,-2.618901,0.906296,0.241281,-0.220204,3.419199,-2.122619,1.11337,-0.150302,-0.220204,2.261207,-2.288046,0.986859,-0.151761,-0.220204,2.261207,-2.122619,0.971921,0.317648,-0.220204,3.088344,-2.536187,1.146353,0.309751,-0.220204,2.261207,-1.957192,1.07743,0.309751,-0.220204,2.261207,-1.957192,1.07743,0.309751,-0.220204,2.261207,-1.957192,1.07743,0.35689,-0.220204,2.261207,-1.957192,1.090922
1,58,9,-0.7992,12,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.004427,-0.021476,-0.000363,-0.016897,0.002961,-0.009606,0.014002,0.00245,0.002283,-0.016825,-0.018373,-0.001403,-0.012852,-0.005385,-0.01222,0.008674,0.012852,-0.001967,0.008863,-0.012112,0.004895,0.005733,-0.01456,0.003802,-0.018187,0.00912,-0.019918,-0.002888,-0.015073,-0.007412,0.005342,0.010986,-0.01007,0.00168,0.008104,-0.014282,0.0052,-0.005159,0.000828,-0.006135,-0.006217,0.003806,0.005566,-0.006975,-0.005214,0.007915,0.00196,0.001975,0.009425,-0.011753,0.001016,0.00068,0.001778,-0.007286,0.00437,-0.009839,0.012124,-0.020491,0.006225,0.020799,-0.002585,0.006019,-0.005409,-0.002034,0.01694,-0.013216,0.007841,-0.002075,-0.000428,0.01535,-0.005985,-0.006671,-0.003464,4e-06,0.006677,-0.000846,0.008479,0.015513,-0.001816,0.007031,-0.012606,0.010732,-0.001324,-0.003121,-0.010269,-0.003563,0.022143,-0.016486,-0.000289,-0.007878,-0.004669,-0.0175,-0.000761,0.009635,0.017621,-0.00405,-0.014347,-0.002864,-0.000261,0.007115,-0.017447,-0.013971,0.014491,-0.005646,0.00273,-0.004822,0.013541,0.003747,-0.004766,0.003685,0.006431,-0.000133,-0.000455,-0.019458,-0.013538,0.007135,-0.001954,0.007295,0.008438,-0.013609,-0.008611,-0.004883,0.003007,0.011974,-0.002643,-0.000423,-0.003229,-0.005538,0.002357,-0.014187,0.006038,0.007101,-0.002598,0.002047,-0.003335,0.001796,-0.000256,-0.004129,-0.005541,0.011379,-0.011103,0.009465,-0.002834,-0.01872,-0.01426,-0.001821,-0.008361,-0.003036,-0.001925,0.011722,-0.019124,0.005313,0.008361,-0.00589,-0.003665,3,3,5,7,3,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.101162,-0.220204,2.261207,-2.618901,0.983602,-0.017138,-0.220204,4.659904,-2.536187,0.925905,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.015999,-0.220204,4.659904,-2.618901,0.90131,0.450567,-0.13749,3.419199,-2.536187,1.037515,0.450567,-0.13749,3.419199,-2.536187,1.037515,-0.151722,-0.220204,1.43407,-2.122619,0.472888,-0.151722,-0.220204,1.43407,-2.122619,0.472888,0.060212,-0.220204,4.742618,-2.37076,0.970213,-0.101162,-0.220204,2.261207,-2.618901,0.983602,-0.017138,-0.220204,4.659904,-2.536187,0.925905,-0.086383,-0.220204,2.261207,-2.536187,0.710982,-0.015999,-0.220204,4.659904,-2.618901,0.90131,0.450567,-0.13749,3.419199,-2.536187,1.037515,0.450567,-0.13749,3.419199,-2.536187,1.037515,-0.151722,-0.220204,1.43407,-2.122619,0.472888,-0.151722,-0.220204,1.43407,-2.122619,0.472888
2,58,9,-0.220204,14,95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,-0.00505,-0.005621,-0.015093,0.007518,-0.011758,-0.009981,-0.005099,-0.004647,0.032871,0.004337,-0.023483,-0.008003,-0.000499,-0.014075,-0.023528,0.002561,-0.004719,0.00333,-0.021491,0.00677,0.012718,0.002521,0.0026,-0.014251,0.010222,-0.007143,-0.007016,0.007863,-0.016672,-0.018133,-0.003722,0.014673,0.011161,-0.000587,-0.002127,0.002376,-0.001329,-0.008304,-0.007994,-0.004381,-0.001098,-0.011307,-0.003666,-0.0201,0.010839,-0.009389,0.007506,0.01138,0.016563,0.008548,0.017225,0.006264,0.001164,-0.015692,-0.015505,0.010201,0.014067,-0.001855,0.01788,0.010706,-0.018697,0.018996,0.006903,0.031233,-0.017406,-0.008674,0.007426,0.002886,-0.004278,0.012441,-0.006212,-0.019287,-0.013915,0.01824,-0.010401,0.005165,0.003006,0.007866,-0.001346,-0.005582,-0.001336,0.008723,0.007638,0.007637,-0.007417,-0.019595,0.006391,0.006051,-0.000788,-0.018177,-0.017427,-0.002496,-0.01457,-0.009951,-0.004448,-0.014534,-0.00224,0.014056,0.011623,0.002721,0.014224,0.005354,-0.025698,0.005029,-0.004947,-0.006009,0.006167,-0.017886,-0.021811,0.00119,0.01547,-0.012132,-0.007061,-0.017092,0.002337,-0.000158,-0.003466,-0.00586,0.007229,0.000675,0.024082,0.001725,-0.005766,0.011966,0.002577,-0.009901,0.002275,0.003417,0.008306,-0.006081,-0.009071,0.003467,0.006103,0.006602,0.004079,-0.011015,-0.016418,0.001936,0.00546,-0.008011,0.027154,-0.015082,-0.010121,-0.01861,-0.011247,0.012284,0.011037,-0.028226,0.005114,0.001663,0.009339,-0.013712,-0.003702,0.003392,0.011048,3,9,6,11,4,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.101162,-0.220204,2.261207,-2.618901,0.983602,-0.141818,-0.220204,3.088344,-2.453474,1.024294,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.015999,-0.220204,4.659904,-2.618901,0.90131,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.363166,-0.220204,1.103215,-1.874478,0.461796,0.060212,-0.220204,4.742618,-2.37076,0.970213,-0.101162,-0.220204,2.261207,-2.618901,0.983602,-0.141818,-0.220204,3.088344,-2.453474,1.024294,-0.180314,-0.220204,2.261207,-2.453474,1.011184,-0.015999,-0.220204,4.659904,-2.618901,0.90131,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.273286,-0.220204,2.261207,-2.618901,0.675451,-0.363166,-0.220204,1.103215,-1.874478,0.461796
3,3,3,-1.543623,0,31,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,-0.031185,0.022542,0.009058,0.001666,-0.002277,0.006856,-0.026917,-0.003566,-0.027549,-0.020131,0.013719,0.007109,0.001986,-0.036465,0.006719,-0.003669,-0.004686,0.034341,0.018795,0.01324,-0.001255,-0.006027,-0.023697,0.015554,-0.000161,-0.044869,-0.002674,0.022245,-0.010389,-0.000319,-0.016969,0.007949,-0.012524,-0.006436,0.036737,-0.003718,-0.011202,-0.019697,-0.00996,-0.030478,0.00187,-0.024705,0.005181,0.003832,-0.035118,-0.010653,-0.042911,-0.003827,0.021104,0.028652,0.021851,-0.022589,-0.034504,-0.007385,0.003182,-0.003543,0.015521,0.032358,-0.039422,-0.013262,-0.024199,-0.016294,-0.018157,-0.025486,0.016119,0.037032,0.005298,-0.021354,0.060613,0.008291,-0.022712,-0.060061,-0.020397,0.005724,-0.009062,0.017477,-0.022576,-0.000176,0.008565,-0.020921,0.020849,0.011925,-0.022973,-0.021528,0.010917,0.005882,0.020515,-0.02758,-0.006974,-0.039889,0.004776,-0.018095,0.021756,-0.008392,0.010151,-0.010901,0.058467,0.029071,0.005004,0.02351,0.050721,-0.010991,-0.013098,0.018155,0.011685,-0.040871,-0.012607,0.000503,0.020739,-0.044655,0.051952,0.01638,0.002817,0.031982,0.004975,-0.042538,-0.036755,-0.033942,0.057078,-0.01419,-0.038603,-0.013453,-0.029779,-0.000445,-0.001317,0.01642,0.008169,-0.000342,0.012848,0.021852,0.039255,0.018981,-0.018713,0.001178,0.004314,0.041813,-0.003889,0.023964,0.001506,-0.006007,0.000776,-0.003807,-0.008462,-0.006664,-0.002673,0.022643,0.032676,0.032503,0.006117,-0.012206,-0.00731,-0.005235,-0.009406,0.022069,0.033493,4,1,12,17,8,-0.171919,-0.220204,2.261207,-2.453474,1.011236,0.072088,-0.220204,2.261207,-2.618901,1.027182,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.171919,-0.220204,2.261207,-2.453474,1.011236,-0.02774,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.77236,-2.618901,0.541647,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.439177,-0.220204,0.606933,-2.618901,0.550042,-0.145838,-0.220204,4.659904,-2.618901,0.965364,0.072088,-0.220204,2.261207,-2.618901,1.027182,-0.090545,-0.220204,4.659904,-2.618901,1.047584,-0.171919,-0.220204,2.261207,-2.453474,1.011236,-0.02774,-0.220204,4.742618,-2.618901,1.007448,-0.413034,-0.220204,0.77236,-2.618901,0.541647,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.448559,-0.220204,0.689647,-2.618901,0.568386,-0.439177,-0.220204,0.606933,-2.618901,0.550042
4,0,21,-0.220204,10,59,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.00869,0.020394,-0.021942,-0.022788,0.028972,-0.011574,0.020109,0.002627,-0.027968,-0.022335,-0.030729,-0.015611,-0.005944,0.024974,-0.007787,-0.043168,-0.01236,-0.024761,-0.01958,0.013891,0.048989,0.011552,0.017412,-0.011547,0.004126,0.005096,-0.003496,0.000716,-0.006616,-0.002785,-0.0238,-0.013594,-0.019666,-0.012395,-0.001074,0.007937,-0.004769,-0.001745,0.021968,-0.006099,-0.009071,0.015685,0.010367,0.026728,0.0023,-0.000561,0.002871,0.02024,-0.006905,-0.020723,0.017639,0.001256,-0.014582,0.034066,-0.011293,-0.021551,-0.002663,0.010491,0.014295,-0.002426,0.023669,0.004986,0.010939,0.009246,-0.003665,0.000505,-0.002528,-0.004668,-0.018369,-0.014524,-0.022574,0.001604,0.006496,0.014886,0.014642,-0.001321,0.007074,-0.006293,0.005441,-0.027756,0.012668,0.011319,-0.001354,-0.013355,-0.003236,-0.009066,-0.013782,-0.009224,0.006058,0.032272,0.038926,-0.006236,0.028205,-0.017737,-0.023855,-0.004413,0.011766,0.01747,-0.005495,-0.019668,-0.00428,0.013034,-0.0116,-0.049219,-0.00356,-0.012499,0.013318,0.017074,-0.009058,-0.008936,-0.00943,0.002706,-0.005553,0.017459,-0.002502,0.011882,0.013102,-0.009108,-0.012176,0.001199,0.005413,0.013283,-0.002595,-0.004682,-0.003104,-0.015987,-0.006103,0.002453,-0.024784,0.003137,0.015224,0.021127,-0.006963,0.019253,0.001227,0.01923,-0.00273,0.007624,0.015083,-0.002168,0.001353,0.02418,0.008388,-0.020112,0.000475,-0.014402,0.01728,-0.011894,-0.005065,-0.029549,0.007371,0.009992,-0.013016,0.002926,-0.006856,2,7,8,5,10,0.089972,-0.220204,2.261207,-2.618901,1.104731,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.089972,-0.220204,2.261207,-2.618901,1.104731,-0.003448,-0.220204,4.742618,-2.618901,1.040187,-0.425558,-0.220204,0.77236,-2.618901,0.553843,-0.398615,-0.220204,0.689647,-2.618901,0.515184,-0.398615,-0.220204,0.689647,-2.618901,0.515184,-0.395971,-0.220204,0.606933,-2.618901,0.530381,-0.271099,-0.220204,4.742618,-2.618901,1.0622,-0.019053,-0.220204,4.742618,-2.618901,0.983125,0.055309,-0.220204,4.742618,-2.618901,1.020367,0.089972,-0.220204,2.261207,-2.618901,1.104731,-0.003448,-0.220204,4.742618,-2.618901,1.040187,-0.425558,-0.220204,0.77236,-2.618901,0.553843,-0.398615,-0.220204,0.689647,-2.618901,0.515184,-0.398615,-0.220204,0.689647,-2.618901,0.515184,-0.395971,-0.220204,0.606933,-2.618901,0.530381


# lgbによる予測

In [76]:
target = train['state']

In [82]:
# 3分割交差検証を指定し、インスタンス化
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

kf = KFold(n_splits=5)  # 3分割交差検証のためにインスタンス化

# スコアとモデルを格納するリスト
score_list = []
models = []

for fold_, (train_index, valid_index) in enumerate(kf.split(train, target)):
    train_x = train_df.iloc[train_index]
    valid_x = train_df.iloc[valid_index]
    train_y = target[train_index]
    valid_y = target[valid_index]
    
    print(f'fold{fold_ + 1} start')

    gbm = lgb.LGBMClassifier(objective='binary',num_boost_round=50000, learning_rate=0.01)
    gbm.fit(train_x, train_y, eval_set = [(valid_x, valid_y)],
                early_stopping_rounds=100,
                verbose= 100) # 学習の状況を表示しない
    
    oof = gbm.predict(valid_x, num_iteration=gbm.best_iteration_)
    score_list.append(round(accuracy_score(valid_y, oof)*100,2))
    models.append(gbm)  # 学習が終わったモデルをリストに入れておく
    print(f'fold{fold_ + 1} end\n' )
print(score_list, '平均score', np.mean(score_list), "%")  

fold1 start
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.542027
[200]	valid_0's binary_logloss: 0.486342
[300]	valid_0's binary_logloss: 0.459796
[400]	valid_0's binary_logloss: 0.446184
[500]	valid_0's binary_logloss: 0.436001
[600]	valid_0's binary_logloss: 0.429973
[700]	valid_0's binary_logloss: 0.425442
[800]	valid_0's binary_logloss: 0.422946
[900]	valid_0's binary_logloss: 0.420731
[1000]	valid_0's binary_logloss: 0.419986
[1100]	valid_0's binary_logloss: 0.419328
[1200]	valid_0's binary_logloss: 0.418863
[1300]	valid_0's binary_logloss: 0.418669
Early stopping, best iteration is:
[1254]	valid_0's binary_logloss: 0.418446
fold1 end

fold2 start
Training until validation scores don't improve for 100 rounds.
[100]	valid_0's binary_logloss: 0.541932
[200]	valid_0's binary_logloss: 0.486481
[300]	valid_0's binary_logloss: 0.461731
[400]	valid_0's binary_logloss: 0.446175
[500]	valid_0's binary_logloss: 0.435845
[600]	valid_0's bina

In [83]:
# testの予測
test_pred = np.zeros((len(test), 5))  # 行:len(test), 列:3のall zeroの配列を用意

for fold_, gbm in enumerate(models):  # 学習ずみのmodelをgbmに入れる
    pred_ = gbm.predict(test_df, num_iteration=gbm.best_iteration_)  # testの予測
    test_pred[:, fold_] = pred_  # １回目は0列目、2回目は1列目、2回目は3列目に格納

pred = (np.mean(test_pred, axis=1) > 0.5).astype(int)  # 平均をとって、0と１に変換

# submission

In [84]:
sub = pd.DataFrame(test['id'])

In [85]:
sub['state'] = pred
sub.head()

Unnamed: 0,id,state
0,test_00000,0
1,test_00001,1
2,test_00002,1
3,test_00003,0
4,test_00004,0


In [86]:
sub.to_csv('sub/11_lgb4.csv',index=False, header=None)