## import library

In [57]:
# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# using machine learning tensorflow
import tensorflow as tf
import tensorflow_decision_forests as tfdf

print(f"Found TF-DF {tfdf.__version__}")

Found TF-DF 1.5.0


## Collection Data

In [58]:
gender = pd.read_csv("/kaggle/input/titanic/gender_submission.csv",delimiter=',')
test = pd.read_csv("/kaggle/input/titanic/test.csv",delimiter=',')
train = pd.read_csv("/kaggle/input/titanic/train.csv",delimiter=',')

In [59]:
gender.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1


In [60]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [61]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


# Data Understanding

In [62]:
print(train.columns.values)

['PassengerId' 'Survived' 'Pclass' 'Name' 'Sex' 'Age' 'SibSp' 'Parch'
 'Ticket' 'Fare' 'Cabin' 'Embarked']


Fitur manakah yang termasuk data kategorial?

Nilai-nilai ini mengklasifikasikan sampel ke dalam kumpulan sampel serupa. Dalam fitur kategoris apakah nilai didasarkan pada nominal, ordinal, rasio, atau interval? Hal ini antara lain membantu kami memilih plot yang sesuai untuk visualisasi.

Kategorikal: Survived, Sex, and Embarked. Ordinal: Pclass.

Fitur manakah yang termauk data numerik?

Fitur manakah yang bersifat numerik? Nilai-nilai ini berubah dari sampel ke sampel. Dalam fitur numerik, apakah nilainya bersifat diskrit, kontinu, atau berdasarkan deret waktu? Hal ini antara lain membantu kami memilih plot yang sesuai untuk visualisasi.

Data kontinu & diskrit: Age, Fare. Discrete: SibSp, Parch.

In [63]:
# preview the data
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Fitur yang memiliki tipe data campuran adalah fitur yang mengandung kombinasi data numerik dan data alfanumerik (teks) dalam satu fitur. Ini adalah kandidat yang perlu diperbaiki.

"Ticket" adalah campuran dari tipe data numerik dan alfanumerik. "Cabin" adalah alfanumerik/character

Fitur-fitur yang mungkin mengandung kesalahan atau typo adalah fitur-fitur di mana data tidak konsisten atau mungkin dimasukkan secara tidak akurat. Ini bisa sulit untuk ditinjau dalam dataset besar, tetapi dengan meninjau beberapa sampel dari dataset yang lebih kecil, kita mungkin bisa mengetahui dengan jelas fitur-fitur mana yang memerlukan koreksi.

Fitur "Name" mungkin mengandung kesalahan atau typo karena ada beberapa cara yang digunakan untuk mendeskripsikan nama, termasuk gelar, tanda kurung bulat, dan tanda kutip yang digunakan untuk nama alternatif atau singkat. Fitur ini bisa rentan terhadap inkonsistensi dan variasi dalam penginputan data, sehingga penting untuk meninjau dan potensialnya memperbaikinya.

In [64]:
train.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


Fitur-fitur yang mengandung nilai kosong, null, atau kosong adalah fitur-fitur yang perlu diperbaiki.

Pada dataset pelatihan, fitur-fitur "Cabin" > "Age" > "Embarked" mengandung sejumlah nilai null, dalam urutan tersebut.
Pada dataset uji, fitur "Cabin" > "Age" tidak lengkap.

Berikut adalah tipe data untuk berbagai fitur:

Membantu kita selama proses konversi.

Tujuh fitur adalah integer atau float. Enam fitur dalam kasus dataset uji.
Lima fitur adalah string (objek).

# Exploring Data And Glimpsy Data

In [65]:
#info data

In [66]:
train.info()
print("----")
test.info()
print("----")
gender.info()
print("----")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pcl

# Duplicated Data

In [67]:
train.duplicated().sum()

0

In [68]:
test.duplicated().sum()

0

# Missing Value

In [69]:
train.isnull().value_counts()

PassengerId  Survived  Pclass  Name   Sex    Age    SibSp  Parch  Ticket  Fare   Cabin  Embarked
False        False     False   False  False  False  False  False  False   False  True   False       529
                                                                                 False  False       183
                                             True   False  False  False   False  True   False       158
                                                                                 False  False        19
                                             False  False  False  False   False  False  True          2
Name: count, dtype: int64

In [70]:
test.isnull().value_counts()

PassengerId  Pclass  Name   Sex    Age    SibSp  Parch  Ticket  Fare   Cabin  Embarked
False        False   False  False  False  False  False  False   False  True   False       244
                                                                       False  False        87
                                   True   False  False  False   False  True   False        82
                                                                       False  False         4
                                   False  False  False  False   True   True   False         1
Name: count, dtype: int64

In [71]:
gender.isnull().value_counts()

PassengerId  Survived
False        False       418
Name: count, dtype: int64

# Unvariate Analyst

## Stastistik Despritive

In [72]:
train.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


Nama (Name): Nama-nama unik di seluruh dataset (jumlah=unik=891).

Jenis Kelamin (Sex): Variabel jenis kelamin memiliki dua nilai mungkin, dengan 65% pria (terbanyak=laki-laki, frekuensi=577/jumlah=891).

Kabin (Cabin): Nilai-nilai Kabin memiliki beberapa duplikasi di seluruh sampel. Alternatifnya, beberapa penumpang berbagi kabin.

Tempat Berangkat (Embarked): Tempat berangkat memiliki tiga nilai mungkin. Tempat S digunakan oleh sebagian besar penumpang (terbanyak=S).

Tiket (Ticket): Fitur Tiket memiliki rasio duplikasi yang tinggi (22%), dengan nilai unik sebanyak 681.

In [73]:
train.describe(include=['O'])

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
count,891,891,891,204,889
unique,891,2,681,147,3
top,"Braund, Mr. Owen Harris",male,347082,B96 B98,S
freq,1,577,7,4,644


In [74]:
test.describe()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
count,418.0,418.0,332.0,418.0,418.0,417.0
mean,1100.5,2.26555,30.27259,0.447368,0.392344,35.627188
std,120.810458,0.841838,14.181209,0.89676,0.981429,55.907576
min,892.0,1.0,0.17,0.0,0.0,0.0
25%,996.25,1.0,21.0,0.0,0.0,7.8958
50%,1100.5,3.0,27.0,0.0,0.0,14.4542
75%,1204.75,3.0,39.0,1.0,0.0,31.5
max,1309.0,3.0,76.0,8.0,9.0,512.3292


In [75]:
#gender.describe()

# Biverate Analyst

# Menganalisis dengan memutar fitur-fitur

Untuk mengonfirmasi beberapa observasi dan asumsi kita, kita dapat dengan cepat menganalisis korelasi fitur dengan memutar fitur satu sama lain. Kita hanya bisa melakukannya pada tahap ini untuk fitur-fitur yang tidak memiliki nilai yang kosong. Juga, hanya masuk akal untuk melakukan ini pada fitur-fitur yang bersifat kategori (Sex), ordinal (Pclass), atau diskrit (SibSp, Parch).

Pclass
Kita mengamati korelasi signifikan (>0.5) antara Pclass=1 dan Survived (mengklasifikasikan #3). Kita memutuskan untuk menyertakan fitur ini dalam model kita.

Sex
Kita mengkonfirmasi observasi selama perumusan masalah bahwa Sex=perempuan memiliki tingkat kelangsungan hidup yang sangat tinggi, yaitu 74% (mengklasifikasikan #1).

SibSp dan Parch
Fitur-fitur ini memiliki korelasi nol untuk beberapa nilai tertentu. Mungkin yang terbaik adalah mendapatkan fitur atau serangkaian fitur dari fitur-fitur individu ini (menciptakan #1).

In [76]:
train[['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean().sort_values(by='Survived', ascending=False)

Unnamed: 0,Pclass,Survived
0,1,0.62963
1,2,0.472826
2,3,0.242363


In [77]:
train[["SibSp","Survived"]].groupby(['SibSp'], as_index=False).mean().sort_values(by='Survived',ascending=False)

Unnamed: 0,SibSp,Survived
1,1,0.535885
2,2,0.464286
0,0,0.345395
3,3,0.25
4,4,0.166667
5,5,0.0
6,8,0.0


In [78]:
train[["SibSp","Survived"]].groupby(['SibSp'],as_index=False).mean().sort_values(by='Survived',ascending=False)

Unnamed: 0,SibSp,Survived
1,1,0.535885
2,2,0.464286
0,0,0.345395
3,3,0.25
4,4,0.166667
5,5,0.0
6,8,0.0


In [79]:
train[['Parch','Survived']].groupby(['Parch'],as_index=False).mean().sort_values(by='Survived', ascending=False)

Unnamed: 0,Parch,Survived
3,3,0.6
1,1,0.550847
2,2,0.5
0,0,0.343658
5,5,0.2
4,4,0.0
6,6,0.0


# Bivarate Analyze by visualizing data


## Korelasi antar Fitur Numerik
Grafik histogram berguna untuk menganalisis variabel numerik kontinu seperti Usia di mana pengelompokan atau rentang akan membantu mengidentifikasi pola yang berguna. Histogram dapat mengindikasikan distribusi sampel menggunakan interval yang didefinisikan secara otomatis atau interval yang sama. Ini membantu kita menjawab pertanyaan yang berkaitan dengan interval tertentu (Apakah bayi memiliki tingkat kelangsungan hidup yang lebih tinggi?)

Perhatikan bahwa sumbu x dalam visualisasi histogram mewakili jumlah sampel atau penumpang.

Observasi.

Bayi (Usia <=4) memiliki tingkat kelangsungan hidup yang tinggi.
Penumpang tertua (Usia = 80) selamat.
Sejumlah besar orang berusia 15-25 tahun tidak selamat.
Sebagian besar penumpang berada dalam rentang usia 15-35 tahun.
Keputusan.

Analisis sederhana ini mengkonfirmasi asumsi kita sebagai dasar keputusan untuk tahap selanjutnya dalam alur kerja.

Kita harus mempertimbangkan Usia (asumsi kita dalam mengklasifikasikan #2) dalam pelatihan model kita.
Lengkapkan fitur Usia untuk nilai-nilai yang hilang (melengkapi #1).
Kita harus mengelompokkan kelompok usia (menciptakan #3).

In [80]:
fig = px.histogram(train, x='Age', color='Survived',
                   barmode='overlay',
                   nbins=20,
                   title='Distribusi Usia berdasarkan Status Selamat',
                   labels={'Age': 'Usia', 'Survived': 'Selamat'})

fig.show()

## Mengkorelasikan fitur numerik dan ordinal

can combine multiple features for identifying correlations using a single plot. This can be done with numerical and categorical features which have numeric values.

Observations.

Pclass=3 had most passengers, however most did not survive. Confirms our classifying assumption #2.
Infant passengers in Pclass=2 and Pclass=3 mostly survived. Further qualifies our classifying assumption #2.
Most passengers in Pclass=1 survived. Confirms our classifying assumption #3.
Pclass varies in terms of Age distribution of passengers.
Decisions.

Consider Pclass for model training.

In [81]:
# Menyiapkan subplot. Ada tiga kelas ('Pclass') dan dua kategori dalam 'Survived'
rows = len(train['Pclass'].unique())
cols = len(train['Survived'].unique())

fig = make_subplots(rows=rows, cols=cols, subplot_titles=('Survived: 0', 'Survived: 1'))

# Mengisi subplot dengan histogram
for i, pclass in enumerate(train['Pclass'].unique()):
    for j, survived in enumerate(train['Survived'].unique()):
        filtered_df = train[(train['Pclass'] == pclass) & (train['Survived'] == survived)]
        fig.add_trace(
            go.Histogram(x=filtered_df['Age'], nbinsx=20, name=f'Pclass {pclass} - Survived {survived}', opacity=0.5),
            row=i+1, col=j+1
        )

# Menambahkan judul dan legenda
fig.update_layout(height=600, width=800, title_text="Distribusi Usia berdasarkan Kelas dan Status Selamat")
fig.update_traces(marker=dict(line=dict(width=0.5, color='black')))
fig.show()

# Korelasi fitur data kategorial

Pengamatan.

Penumpang perempuan memiliki tingkat kelangsungan hidup yang jauh lebih baik dibandingkan laki-laki. Mengonfirmasi klasifikasi (#1).
Pengecualian di Embarked=C di mana laki-laki memiliki tingkat kelangsungan hidup yang lebih tinggi. Ini bisa jadi merupakan korelasi antara Pclass dan Embarked dan pada gilirannya Pclass dan Survived, belum tentu merupakan korelasi langsung antara Embarked dan Survived.
Laki-laki memiliki tingkat kelangsungan hidup yang lebih baik di kelas P=3 jika dibandingkan dengan kelas P=2 untuk port C dan Q. Menyelesaikan (#2).
Pelabuhan embarkasi memiliki tingkat kelangsungan hidup yang bervariasi untuk kelas P=3 dan di antara penumpang pria. Berkorelasi (#1).

Keputusan.

Tambahkan fitur Seks ke pelatihan model.
Selesaikan dan tambahkan fitur Embarked ke pelatihan model.

In [82]:
# First, we need to prepare the data for plotting
df = train.groupby(['Embarked', 'Pclass', 'Sex'])['Survived'].mean().reset_index()

# Identify unique 'Embarked' values for creating subplots
embarked_values = df['Embarked'].unique()

# Create subplots
fig = make_subplots(rows=len(embarked_values), cols=1, subplot_titles=embarked_values)

# Add traces
for i, embarked in enumerate(embarked_values, 1):
    df_embarked = df[df['Embarked'] == embarked]
    for sex in df['Sex'].unique():
        df_sex = df_embarked[df_embarked['Sex'] == sex]
        fig.add_trace(go.Scatter(x=df_sex['Pclass'], y=df_sex['Survived'], 
                                 mode='lines+markers', name=sex), 
                      row=i, col=1)

# Update layout
fig.update_layout(height=600, width=800, title_text="Survival Rate by Class and Embarked Port")
fig.show()

## Mengkorelasikan data kategorial dan numerik
Kami mungkin juga ingin mengkorelasikan fitur kategoris (dengan nilai non-numerik) dan fitur numerik. Kita dapat mempertimbangkan untuk mengkorelasikan Embarked (Non-numerik kategoris), Jenis Kelamin (Non-numerik kategoris), Tarif (Numerik kontinu), dengan Bertahan (Numerik kategoris).

Pengamatan.

Penumpang yang membayar tarif lebih tinggi memiliki kelangsungan hidup yang lebih baik. Mengonfirmasi asumsi kami untuk membuat rentang tarif (#4).
Pelabuhan embarkasi berkorelasi dengan tingkat kelangsungan hidup. Konfirmasi korelasi (#1) dan penyelesaian (#2).
Keputusan.

Pertimbangkan fitur banding Tarif.

In [83]:
df = train.groupby(['Embarked', 'Survived', 'Sex'])['Fare'].mean().reset_index()

# Create the facet grid plot
fig = px.bar(df, x='Sex', y='Fare', facet_row='Embarked', facet_col='Survived', 
             color='Sex', barmode='group', 
             category_orders={"Embarked": sorted(df['Embarked'].unique())},
             height=600, width=800)

# Update layout
fig.update_layout(title_text="Average Fare by Sex, Embarked, and Survival Status")
fig.update_traces(marker=dict(line=dict(width=0), opacity=0.5))
fig.show()

# Data Preprocessing

saya akan menerapkan preprocessing dan transformasi berikut pada kumpulan data titanic ini

Tokenisasi nama-namanya. Misalnya, "Braund, Mr. Owen Harris" akan menjadi ["Braund", "Mr.", "Owen", "Harris"].
Ekstrak awalan apa pun di tiket. Misalnya tiket "STON/O2.3101282" akan menjadi "STON/O2." dan 3101282.

In [129]:
def preprocess(df):
    df = train.copy()
    
    def normalize_name(x):
        return " ".join([v.strip(",()[].\"'") for v in x.split(" ")])
    
    def ticket_number(x):
        return x.split(" ")[-1]
        
    def ticket_item(x):
        items = x.split(" ")
        if len(items) == 1:
            return "NONE"
        return "_".join(items[0:-1])
    
    df["Name"] = df["Name"].apply(normalize_name)
    df["Ticket_number"] = df["Ticket"].apply(ticket_number)
    df["Ticket_item"] = df["Ticket"].apply(ticket_item)                     
    return df
    
preprocessed_train_df = preprocess(train)
preprocessed_test_df = preprocess(test)

preprocessed_train_df.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Ticket_number,Ticket_item
0,1,0,3,Braund Mr Owen Harris,male,22.0,1,0,A/5 21171,7.25,,S,21171,A/5
1,2,1,1,Cumings Mrs John Bradley Florence Briggs Thayer,female,38.0,1,0,PC 17599,71.2833,C85,C,17599,PC
2,3,1,3,Heikkinen Miss Laina,female,26.0,0,0,STON/O2. 3101282,7.925,,S,3101282,STON/O2.
3,4,1,1,Futrelle Mrs Jacques Heath Lily May Peel,female,35.0,1,0,113803,53.1,C123,S,113803,NONE
4,5,0,3,Allen Mr William Henry,male,35.0,0,0,373450,8.05,,S,373450,NONE


In [130]:
input_features = list(preprocessed_train_df.columns)
input_features.remove("Ticket")
input_features.remove("PassengerId")
input_features.remove("Survived")
#input_features.remove("Ticket_number")

print(f"Input features: {input_features}")

Input features: ['Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked', 'Ticket_number', 'Ticket_item']


# Convert dataset to TensorFlow Dataset

In [131]:
def tokenize_names(features, labels=None):
    """Divite the names into tokens. TF-DF can consume text tokens natively."""
    features["Name"] =  tf.strings.split(features["Name"])
    return features, labels

train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(preprocessed_train_df,label="Survived").map(tokenize_names)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(preprocessed_serving_df).map(tokenize_names)

kode ini adalah bagian dari pipeline prapemrosesan data untuk model pembelajaran mesin menggunakan TensorFlow. Hal ini terutama berfokus pada pemrosesan fitur 'Nama' dengan melakukan tokenisasi, yang dapat bermanfaat untuk model yang struktur atau komponen namanya relevan (seperti dalam beberapa tugas pemrosesan bahasa alami). train_ds dimaksudkan untuk melatih model, dan serve_ds kemungkinan dimaksudkan untuk membuat prediksi dengan model yang dilatih.

# Train model using default parameters

In [132]:
model = tfdf.keras.GradientBoostedTreesModel(
    verbose=0, # Very few logs
    features=[tfdf.keras.FeatureUsage(name=n) for n in input_features],
    exclude_non_specified_features=True, # Only use the features in "features"
    random_seed=1234,
)
model.fit(train_ds)

self_evaluation = model.make_inspector().evaluation()
print(f"Accuracy: {self_evaluation.accuracy} Loss:{self_evaluation.loss}")

[INFO 23-12-15 05:25:05.7512 UTC kernel.cc:1243] Loading model from path /tmp/tmpn_8dd7oj/model/ with prefix 18cee2e4b3c14fd5
[INFO 23-12-15 05:25:05.7582 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:25:05.7583 UTC kernel.cc:1075] Use fast generic engine


Accuracy: 0.8260869383811951 Loss:0.8608942627906799


# Train model using improved default parameters

In [133]:
model = tfdf.keras.GradientBoostedTreesModel(
    verbose=0, # Very few logs
    features=[tfdf.keras.FeatureUsage(name=n) for n in input_features],
    exclude_non_specified_features=True, # Only use the features in "features"
    
    #num_trees=2000,
    
    # Only for GBT.
    # A bit slower, but great to understand the model.
    # compute_permutation_variable_importance=True,
    
    # Change the default hyper-parameters
    # hyperparameter_template="benchmark_rank1@v1",
    
    #num_trees=1000,
    #tuner=tuner
    
    min_examples=1,
    categorical_algorithm="RANDOM",
    #max_depth=4,
    shrinkage=0.05,
    #num_candidate_attributes_ratio=0.2,
    split_axis="SPARSE_OBLIQUE",
    sparse_oblique_normalization="MIN_MAX",
    sparse_oblique_num_projections_exponent=2.0,
    num_trees=2000,
    #validation_ratio=0.0,
    random_seed=1234,
    
)
model.fit(train_ds)

self_evaluation = model.make_inspector().evaluation()
print(f"Accuracy: {self_evaluation.accuracy} Loss:{self_evaluation.loss}")

[INFO 23-12-15 05:25:11.2408 UTC kernel.cc:1243] Loading model from path /tmp/tmpaqebq_o6/model/ with prefix abc8fe94f6024f60
[INFO 23-12-15 05:25:11.2514 UTC decision_forest.cc:660] Model loaded with 42 root(s), 2212 node(s), and 10 input feature(s).
[INFO 23-12-15 05:25:11.2515 UTC kernel.cc:1075] Use fast generic engine


Accuracy: 0.782608687877655 Loss:1.060815453529358


# Model SUMMARY

In [134]:
model.summary()

Model: "gradient_boosted_trees_model_113"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
Total params: 1 (1.00 Byte)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 1 (1.00 Byte)
_________________________________________________________________
Type: "GRADIENT_BOOSTED_TREES"
Task: CLASSIFICATION
Label: "__LABEL"

Input Features (11):
	Age
	Cabin
	Embarked
	Fare
	Name
	Parch
	Pclass
	Sex
	SibSp
	Ticket_item
	Ticket_number

No weights

Variable Importance: INV_MEAN_MIN_DEPTH:
    1.           "Sex"  0.597073 ################
    2.           "Age"  0.363764 #######
    3.          "Fare"  0.264018 ###
    4.          "Name"  0.207843 #
    5.        "Pclass"  0.178906 
    6. "Ticket_number"  0.178488 
    7.   "Ticket_item"  0.177907 
    8.      "Embarked"  0.177237 
    9.         "Parch"  0.175481 
   10.         "SibSp"  0.171800 

Variable Importance: NUM_AS_ROOT:
    1.  "Sex" 36.000000 ####

# Prediction

In [107]:
pip install tensorflow-decision-forests

Note: you may need to restart the kernel to use updated packages.


In [108]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [140]:
def prediction_to_kaggle_format(model, test_ds, threshold=0.5):
    # Make predictions
    predictions = model.predict(test_ds)

    # In binary classification, TFDF models usually output the probability of the positive class directly
    proba_survive = predictions[:, 0]

    # Convert predictions to binary format based on the threshold
    survived = (proba_survive >= threshold).astype(int)

    return pd.DataFrame({
        "PassengerId": preprocessed_test_df["PassengerId"],
        "Survived": survived
    })

# Generate predictions for submission
kaggle_predictions = prediction_to_kaggle_format(model, test_ds)



In [141]:
def make_submission(kaggle_predictions):
    path = "/kaggle/working/submission.csv"
    kaggle_predictions.to_csv(path, index=False)
    print(f"Submission exported to {path}")

# Generate the submission file
make_submission(kaggle_predictions)

Submission exported to /kaggle/working/submission.csv


# Train Model Using Hyperparameter Tuning

In [142]:
tuner = tfdf.tuner.RandomSearch(num_trials=1000)
tuner.choice("min_examples", [2, 5, 7, 10])
tuner.choice("categorical_algorithm", ["CART", "RANDOM"])

local_search_space = tuner.choice("growing_strategy", ["LOCAL"])
local_search_space.choice("max_depth", [3, 4, 5, 6, 8])

global_search_space = tuner.choice("growing_strategy", ["BEST_FIRST_GLOBAL"], merge=True)
global_search_space.choice("max_num_nodes", [16, 32, 64, 128, 256])

#tuner.choice("use_hessian_gain", [True, False])
tuner.choice("shrinkage", [0.02, 0.05, 0.10, 0.15])
tuner.choice("num_candidate_attributes_ratio", [0.2, 0.5, 0.9, 1.0])


tuner.choice("split_axis", ["AXIS_ALIGNED"])
oblique_space = tuner.choice("split_axis", ["SPARSE_OBLIQUE"], merge=True)
oblique_space.choice("sparse_oblique_normalization",
                     ["NONE", "STANDARD_DEVIATION", "MIN_MAX"])
oblique_space.choice("sparse_oblique_weights", ["BINARY", "CONTINUOUS"])
oblique_space.choice("sparse_oblique_num_projections_exponent", [1.0, 1.5])

# Tune the model. Notice the `tuner=tuner`.
tuned_model = tfdf.keras.GradientBoostedTreesModel(tuner=tuner)
tuned_model.fit(train_ds, verbose=0)

tuned_self_evaluation = tuned_model.make_inspector().evaluation()
print(f"Accuracy: {tuned_self_evaluation.accuracy} Loss:{tuned_self_evaluation.loss}")

Use /tmp/tmpqtn2ivkl as temporary training directory


[INFO 23-12-15 05:33:21.4787 UTC kernel.cc:1243] Loading model from path /tmp/tmpqtn2ivkl/model/ with prefix a6a1ecfc72af4358
[INFO 23-12-15 05:33:21.4901 UTC decision_forest.cc:660] Model loaded with 32 root(s), 852 node(s), and 12 input feature(s).
[INFO 23-12-15 05:33:21.4901 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesGeneric" built
[INFO 23-12-15 05:33:21.4902 UTC kernel.cc:1075] Use fast generic engine


Accuracy: 0.8630136847496033 Loss:0.6804219484329224


Model Mengalami peninggkatan 
Accuracy: 0.8630136847496033 Loss:0.6804219484329224
add Codeadd Markdown

# Make ML model with Ensemble

Di sini saya akan membuat 100 model dengan benih berbeda dan menggabungkan hasilnya

Pendekatan ini menghilangkan sedikit aspek acak yang terkait dengan pembuatan model ML

Dalam pembuatan GBT digunakan parameter jujur. Ini akan menggunakan contoh pelatihan yang berbeda untuk menyimpulkan struktur dan nilai daun. Teknik regularisasi ini menukar contoh dengan perkiraan bias.

In [143]:
predictions = None
num_predictions = 0

for i in range(100):
    print(f"i:{i}")
    # Possible models: GradientBoostedTreesModel or RandomForestModel
    model = tfdf.keras.GradientBoostedTreesModel(
        verbose=0, # Very few logs
        features=[tfdf.keras.FeatureUsage(name=n) for n in input_features],
        exclude_non_specified_features=True, # Only use the features in "features"

        #min_examples=1,
        #categorical_algorithm="RANDOM",
        ##max_depth=4,
        #shrinkage=0.05,
        ##num_candidate_attributes_ratio=0.2,
        #split_axis="SPARSE_OBLIQUE",
        #sparse_oblique_normalization="MIN_MAX",
        #sparse_oblique_num_projections_exponent=2.0,
        #num_trees=2000,
        ##validation_ratio=0.0,
        random_seed=i,
        honest=True,
    )
    model.fit(train_ds)
    
    sub_predictions = model.predict(test_ds, verbose=0)[:,0]
    if predictions is None:
        predictions = sub_predictions
    else:
        predictions += sub_predictions
    num_predictions += 1

predictions/=num_predictions

kaggle_predictions = pd.DataFrame({
        "PassengerId": preprocessed_test_df["PassengerId"],
        "Survived": (predictions >= 0.5).astype(int)
    })

make_submission(kaggle_predictions)

i:0


[INFO 23-12-15 05:33:22.3556 UTC kernel.cc:1243] Loading model from path /tmp/tmp6hduypey/model/ with prefix fa021c7a0d0b40f3
[INFO 23-12-15 05:33:22.3594 UTC kernel.cc:1075] Use fast generic engine


i:1


[INFO 23-12-15 05:33:23.7119 UTC kernel.cc:1243] Loading model from path /tmp/tmp1128qosk/model/ with prefix 75a05556cf5b4d30
[INFO 23-12-15 05:33:23.7294 UTC kernel.cc:1075] Use fast generic engine


i:2


[INFO 23-12-15 05:33:24.7954 UTC kernel.cc:1243] Loading model from path /tmp/tmpy00ewkem/model/ with prefix 7010fa2f3e274a33
[INFO 23-12-15 05:33:24.7994 UTC kernel.cc:1075] Use fast generic engine


i:3


[INFO 23-12-15 05:33:27.6978 UTC kernel.cc:1243] Loading model from path /tmp/tmpb5eapna_/model/ with prefix f5b5f4f0332f4de1
[INFO 23-12-15 05:33:27.7291 UTC kernel.cc:1075] Use fast generic engine


i:4


[INFO 23-12-15 05:33:28.9567 UTC kernel.cc:1243] Loading model from path /tmp/tmp7duzot2f/model/ with prefix ec215db3f5c745e7
[INFO 23-12-15 05:33:28.9629 UTC kernel.cc:1075] Use fast generic engine


i:5


[INFO 23-12-15 05:33:30.0530 UTC kernel.cc:1243] Loading model from path /tmp/tmpryu5xbjz/model/ with prefix eb44e1fad0bc45c1
[INFO 23-12-15 05:33:30.0560 UTC kernel.cc:1075] Use fast generic engine


i:6


[INFO 23-12-15 05:33:31.2130 UTC kernel.cc:1243] Loading model from path /tmp/tmp6t3t8dsh/model/ with prefix a0fad890effc4deb
[INFO 23-12-15 05:33:31.2202 UTC kernel.cc:1075] Use fast generic engine


i:7


[INFO 23-12-15 05:33:32.6535 UTC kernel.cc:1243] Loading model from path /tmp/tmpsjusk9t9/model/ with prefix 48f348c514614c46
[INFO 23-12-15 05:33:32.6727 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:33:32.6728 UTC kernel.cc:1075] Use fast generic engine


i:8


[INFO 23-12-15 05:33:33.8827 UTC kernel.cc:1243] Loading model from path /tmp/tmpxmiy7650/model/ with prefix 7892ae0251e94421
[INFO 23-12-15 05:33:33.8919 UTC kernel.cc:1075] Use fast generic engine


i:9


[INFO 23-12-15 05:33:35.1843 UTC kernel.cc:1243] Loading model from path /tmp/tmpe6222tju/model/ with prefix 5f6d93b2dc3e4ad0
[INFO 23-12-15 05:33:35.1976 UTC kernel.cc:1075] Use fast generic engine


i:10


[INFO 23-12-15 05:33:36.3045 UTC kernel.cc:1243] Loading model from path /tmp/tmp6n6yg6yv/model/ with prefix 39ca798ca1534156
[INFO 23-12-15 05:33:36.3096 UTC kernel.cc:1075] Use fast generic engine


i:11


[INFO 23-12-15 05:33:37.5975 UTC kernel.cc:1243] Loading model from path /tmp/tmp09yl7yj0/model/ with prefix 98a7692d99d14360
[INFO 23-12-15 05:33:37.6110 UTC kernel.cc:1075] Use fast generic engine


i:12


[INFO 23-12-15 05:33:38.7713 UTC kernel.cc:1243] Loading model from path /tmp/tmpw26qiwoj/model/ with prefix 391880a49cf94cfd
[INFO 23-12-15 05:33:38.7769 UTC kernel.cc:1075] Use fast generic engine


i:13


[INFO 23-12-15 05:33:39.9875 UTC kernel.cc:1243] Loading model from path /tmp/tmpm7brs_m7/model/ with prefix ed78eaadf59c4b1d
[INFO 23-12-15 05:33:39.9978 UTC kernel.cc:1075] Use fast generic engine


i:14


[INFO 23-12-15 05:33:41.0855 UTC kernel.cc:1243] Loading model from path /tmp/tmp_4hayc0b/model/ with prefix a4829e0dddab4a8d
[INFO 23-12-15 05:33:41.0910 UTC kernel.cc:1075] Use fast generic engine


i:15


[INFO 23-12-15 05:33:42.2134 UTC kernel.cc:1243] Loading model from path /tmp/tmpm9sozny4/model/ with prefix 23b7532f57244d67
[INFO 23-12-15 05:33:42.2205 UTC kernel.cc:1075] Use fast generic engine


i:16


[INFO 23-12-15 05:33:43.5602 UTC kernel.cc:1243] Loading model from path /tmp/tmpbo2qc2o_/model/ with prefix cf12893716a74563
[INFO 23-12-15 05:33:43.5716 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:33:43.5717 UTC kernel.cc:1075] Use fast generic engine


i:17


[INFO 23-12-15 05:33:45.3684 UTC kernel.cc:1243] Loading model from path /tmp/tmpwqw7eqbz/model/ with prefix 6e9ae993068b4c29
[INFO 23-12-15 05:33:45.3802 UTC kernel.cc:1075] Use fast generic engine


i:18


[INFO 23-12-15 05:33:46.5975 UTC kernel.cc:1243] Loading model from path /tmp/tmpf_o6qc2m/model/ with prefix 5c3cca69b61e46fc
[INFO 23-12-15 05:33:46.6082 UTC kernel.cc:1075] Use fast generic engine


i:19


[INFO 23-12-15 05:33:47.9607 UTC kernel.cc:1243] Loading model from path /tmp/tmpg0uxab41/model/ with prefix c9415383358c4487
[INFO 23-12-15 05:33:47.9768 UTC kernel.cc:1075] Use fast generic engine


i:20


[INFO 23-12-15 05:33:49.2885 UTC kernel.cc:1243] Loading model from path /tmp/tmpibgj4nss/model/ with prefix dca3c31666e042c5
[INFO 23-12-15 05:33:49.3020 UTC kernel.cc:1075] Use fast generic engine


i:21


[INFO 23-12-15 05:33:50.4756 UTC kernel.cc:1243] Loading model from path /tmp/tmpyaed0npg/model/ with prefix 21417e68a9b94b51
[INFO 23-12-15 05:33:50.4806 UTC kernel.cc:1075] Use fast generic engine


i:22


[INFO 23-12-15 05:33:51.6058 UTC kernel.cc:1243] Loading model from path /tmp/tmpy5cct7t5/model/ with prefix 3c1d33d4768b4e57
[INFO 23-12-15 05:33:51.6110 UTC kernel.cc:1075] Use fast generic engine


i:23


[INFO 23-12-15 05:33:52.7586 UTC kernel.cc:1243] Loading model from path /tmp/tmps03ahj_e/model/ with prefix 451ab22b52d44dc8
[INFO 23-12-15 05:33:52.7665 UTC kernel.cc:1075] Use fast generic engine


i:24


[INFO 23-12-15 05:33:53.8653 UTC kernel.cc:1243] Loading model from path /tmp/tmpjvjzptt4/model/ with prefix 7468e0ea7aed4780
[INFO 23-12-15 05:33:53.8705 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:33:53.8705 UTC kernel.cc:1075] Use fast generic engine


i:25


[INFO 23-12-15 05:33:55.0961 UTC kernel.cc:1243] Loading model from path /tmp/tmptlqfd1sm/model/ with prefix 37d21ba3320d4c44
[INFO 23-12-15 05:33:55.1064 UTC kernel.cc:1075] Use fast generic engine


i:26


[INFO 23-12-15 05:33:56.3032 UTC kernel.cc:1243] Loading model from path /tmp/tmp7_og_gga/model/ with prefix 38e1970110fc4e59
[INFO 23-12-15 05:33:56.3124 UTC kernel.cc:1075] Use fast generic engine


i:27


[INFO 23-12-15 05:33:57.4283 UTC kernel.cc:1243] Loading model from path /tmp/tmp07lr7mam/model/ with prefix 1ad34892a979449d
[INFO 23-12-15 05:33:57.4340 UTC kernel.cc:1075] Use fast generic engine


i:28


[INFO 23-12-15 05:33:58.5477 UTC kernel.cc:1243] Loading model from path /tmp/tmp7pmdoi_l/model/ with prefix a97bbfdf33374bed
[INFO 23-12-15 05:33:58.5523 UTC kernel.cc:1075] Use fast generic engine


i:29


[INFO 23-12-15 05:33:59.7947 UTC kernel.cc:1243] Loading model from path /tmp/tmp1vz2y108/model/ with prefix 7b5d2ae7756d4bb9
[INFO 23-12-15 05:33:59.8066 UTC kernel.cc:1075] Use fast generic engine


i:30


[INFO 23-12-15 05:34:01.3491 UTC kernel.cc:1243] Loading model from path /tmp/tmpgnsqidsi/model/ with prefix effd4e05d2614c19
[INFO 23-12-15 05:34:01.3738 UTC kernel.cc:1075] Use fast generic engine


i:31


[INFO 23-12-15 05:34:02.5472 UTC kernel.cc:1243] Loading model from path /tmp/tmpr8nwiwl0/model/ with prefix e1a852cac4384cf7
[INFO 23-12-15 05:34:02.5556 UTC kernel.cc:1075] Use fast generic engine


i:32


[INFO 23-12-15 05:34:03.6339 UTC kernel.cc:1243] Loading model from path /tmp/tmpk5di8rbx/model/ with prefix 7e0efd7845254f12
[INFO 23-12-15 05:34:03.6390 UTC kernel.cc:1075] Use fast generic engine


i:33


[INFO 23-12-15 05:34:04.9073 UTC kernel.cc:1243] Loading model from path /tmp/tmp3bm4f5yg/model/ with prefix ea2c39db4aaf4e91
[INFO 23-12-15 05:34:04.9191 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:34:04.9191 UTC kernel.cc:1075] Use fast generic engine


i:34


[INFO 23-12-15 05:34:06.0744 UTC kernel.cc:1243] Loading model from path /tmp/tmpxmtk6tt1/model/ with prefix 2a2754018fa2459e
[INFO 23-12-15 05:34:06.0820 UTC kernel.cc:1075] Use fast generic engine


i:35


[INFO 23-12-15 05:34:07.2195 UTC kernel.cc:1243] Loading model from path /tmp/tmprqmzt9i5/model/ with prefix 4019348b1c6745cd
[INFO 23-12-15 05:34:07.2267 UTC kernel.cc:1075] Use fast generic engine


i:36


[INFO 23-12-15 05:34:08.5369 UTC kernel.cc:1243] Loading model from path /tmp/tmpb2rhq9zr/model/ with prefix c0bcca5631784396
[INFO 23-12-15 05:34:08.5501 UTC kernel.cc:1075] Use fast generic engine


i:37


[INFO 23-12-15 05:34:09.6972 UTC kernel.cc:1243] Loading model from path /tmp/tmptsi63kfy/model/ with prefix 4fe31e0a9a0c4364
[INFO 23-12-15 05:34:09.7049 UTC kernel.cc:1075] Use fast generic engine


i:38


[INFO 23-12-15 05:34:10.9603 UTC kernel.cc:1243] Loading model from path /tmp/tmp7lpquru8/model/ with prefix f8c4359e7c464d78
[INFO 23-12-15 05:34:10.9723 UTC kernel.cc:1075] Use fast generic engine


i:39


[INFO 23-12-15 05:34:12.2223 UTC kernel.cc:1243] Loading model from path /tmp/tmpin6_xapu/model/ with prefix 9002f7857a9340d8
[INFO 23-12-15 05:34:12.2338 UTC kernel.cc:1075] Use fast generic engine


i:40


[INFO 23-12-15 05:34:13.2733 UTC kernel.cc:1243] Loading model from path /tmp/tmpt14e5xm9/model/ with prefix c1bcd6ad6d4a4037
[INFO 23-12-15 05:34:13.2771 UTC kernel.cc:1075] Use fast generic engine


i:41


[INFO 23-12-15 05:34:14.6142 UTC kernel.cc:1243] Loading model from path /tmp/tmpid4ok1fj/model/ with prefix 5316aefae483424b
[INFO 23-12-15 05:34:14.6285 UTC kernel.cc:1075] Use fast generic engine


i:42


[INFO 23-12-15 05:34:15.8861 UTC kernel.cc:1243] Loading model from path /tmp/tmpf_8vdbnv/model/ with prefix 0e365f8187434d9a
[INFO 23-12-15 05:34:15.8948 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:34:15.8949 UTC kernel.cc:1075] Use fast generic engine


i:43


[INFO 23-12-15 05:34:17.2592 UTC kernel.cc:1243] Loading model from path /tmp/tmpeqk_of9c/model/ with prefix ea6e7aed7f7f4f4e
[INFO 23-12-15 05:34:17.2748 UTC kernel.cc:1075] Use fast generic engine


i:44


[INFO 23-12-15 05:34:18.4789 UTC kernel.cc:1243] Loading model from path /tmp/tmpyy9les81/model/ with prefix bd122df51d3942da
[INFO 23-12-15 05:34:18.4876 UTC kernel.cc:1075] Use fast generic engine


i:45


[INFO 23-12-15 05:34:19.5517 UTC kernel.cc:1243] Loading model from path /tmp/tmp08yphq1h/model/ with prefix fe823bb9f206492e
[INFO 23-12-15 05:34:19.5550 UTC kernel.cc:1075] Use fast generic engine


i:46


[INFO 23-12-15 05:34:20.9767 UTC kernel.cc:1243] Loading model from path /tmp/tmpoibiq5om/model/ with prefix 609bdb9ba3ec432b
[INFO 23-12-15 05:34:20.9919 UTC kernel.cc:1075] Use fast generic engine


i:47


[INFO 23-12-15 05:34:22.4090 UTC kernel.cc:1243] Loading model from path /tmp/tmpwrwuz_n8/model/ with prefix e3e08ee066da4c30
[INFO 23-12-15 05:34:22.4212 UTC kernel.cc:1075] Use fast generic engine


i:48


[INFO 23-12-15 05:34:23.4949 UTC kernel.cc:1243] Loading model from path /tmp/tmpb3nifsvf/model/ with prefix 547a91a13e464824
[INFO 23-12-15 05:34:23.4990 UTC kernel.cc:1075] Use fast generic engine


i:49


[INFO 23-12-15 05:34:24.6320 UTC kernel.cc:1243] Loading model from path /tmp/tmp2r2u4c10/model/ with prefix 317990f65a0f4b56
[INFO 23-12-15 05:34:24.6381 UTC kernel.cc:1075] Use fast generic engine


i:50


[INFO 23-12-15 05:34:27.4892 UTC kernel.cc:1243] Loading model from path /tmp/tmpf33ttdap/model/ with prefix a5db3cfffe5b47b9
[INFO 23-12-15 05:34:27.5038 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:34:27.5039 UTC kernel.cc:1075] Use fast generic engine


i:51


[INFO 23-12-15 05:34:28.9602 UTC kernel.cc:1243] Loading model from path /tmp/tmpcy6yu3qj/model/ with prefix 158d7e7a3eb143db
[INFO 23-12-15 05:34:28.9769 UTC kernel.cc:1075] Use fast generic engine


i:52


[INFO 23-12-15 05:34:30.1889 UTC kernel.cc:1243] Loading model from path /tmp/tmpmu4c3j05/model/ with prefix 33e0b0355e4641ec
[INFO 23-12-15 05:34:30.1968 UTC kernel.cc:1075] Use fast generic engine


i:53


[INFO 23-12-15 05:34:31.3816 UTC kernel.cc:1243] Loading model from path /tmp/tmpxkdvckz6/model/ with prefix b8b8266a36f64475
[INFO 23-12-15 05:34:31.3890 UTC kernel.cc:1075] Use fast generic engine


i:54


[INFO 23-12-15 05:34:32.4817 UTC kernel.cc:1243] Loading model from path /tmp/tmprualj4iu/model/ with prefix 03e457f4b2674f05
[INFO 23-12-15 05:34:32.4849 UTC kernel.cc:1075] Use fast generic engine


i:55


[INFO 23-12-15 05:34:33.8067 UTC kernel.cc:1243] Loading model from path /tmp/tmpuqslh95w/model/ with prefix bdbccdc824cc4031
[INFO 23-12-15 05:34:33.8206 UTC kernel.cc:1075] Use fast generic engine


i:56


[INFO 23-12-15 05:34:35.0620 UTC kernel.cc:1243] Loading model from path /tmp/tmp3aqoe9o_/model/ with prefix 545921e5e3fe4c86
[INFO 23-12-15 05:34:35.0720 UTC kernel.cc:1075] Use fast generic engine


i:57


[INFO 23-12-15 05:34:36.1777 UTC kernel.cc:1243] Loading model from path /tmp/tmpis_gkv79/model/ with prefix 9bb31358ac104b92
[INFO 23-12-15 05:34:36.1815 UTC kernel.cc:1075] Use fast generic engine


i:58


[INFO 23-12-15 05:34:37.3152 UTC kernel.cc:1243] Loading model from path /tmp/tmp0iirjhcj/model/ with prefix f44c1ea3acbc4a21
[INFO 23-12-15 05:34:37.3211 UTC kernel.cc:1075] Use fast generic engine


i:59


[INFO 23-12-15 05:34:38.5335 UTC kernel.cc:1243] Loading model from path /tmp/tmpg13kx5w3/model/ with prefix 1104ec8446d14e05
[INFO 23-12-15 05:34:38.5419 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:34:38.5420 UTC kernel.cc:1075] Use fast generic engine


i:60


[INFO 23-12-15 05:34:39.7760 UTC kernel.cc:1243] Loading model from path /tmp/tmpivos4wcv/model/ with prefix ceba52bb88864873
[INFO 23-12-15 05:34:39.7849 UTC kernel.cc:1075] Use fast generic engine


i:61


[INFO 23-12-15 05:34:40.8788 UTC kernel.cc:1243] Loading model from path /tmp/tmphw309nu1/model/ with prefix eacc888f102c4f8b
[INFO 23-12-15 05:34:40.8831 UTC kernel.cc:1075] Use fast generic engine


i:62


[INFO 23-12-15 05:34:42.4511 UTC kernel.cc:1243] Loading model from path /tmp/tmpo9xcivj9/model/ with prefix 8043c86fd4e34d1e
[INFO 23-12-15 05:34:42.4745 UTC kernel.cc:1075] Use fast generic engine


i:63


[INFO 23-12-15 05:34:43.6577 UTC kernel.cc:1243] Loading model from path /tmp/tmpnupm3562/model/ with prefix 57023e0a781240ba
[INFO 23-12-15 05:34:43.6663 UTC kernel.cc:1075] Use fast generic engine


i:64


[INFO 23-12-15 05:34:44.7879 UTC kernel.cc:1243] Loading model from path /tmp/tmpcu75bmmi/model/ with prefix ebc93228e9b24045
[INFO 23-12-15 05:34:44.7948 UTC kernel.cc:1075] Use fast generic engine


i:65


[INFO 23-12-15 05:34:45.8716 UTC kernel.cc:1243] Loading model from path /tmp/tmpsr7vdzo3/model/ with prefix 1b4872da25864317
[INFO 23-12-15 05:34:45.8761 UTC kernel.cc:1075] Use fast generic engine


i:66


[INFO 23-12-15 05:34:47.0797 UTC kernel.cc:1243] Loading model from path /tmp/tmpbjyafrvr/model/ with prefix 9b9f94ab2581489b
[INFO 23-12-15 05:34:47.0856 UTC kernel.cc:1075] Use fast generic engine


i:67


[INFO 23-12-15 05:34:48.5053 UTC kernel.cc:1243] Loading model from path /tmp/tmpa2a03z2d/model/ with prefix 628ba46cbd48422e
[INFO 23-12-15 05:34:48.5222 UTC kernel.cc:1075] Use fast generic engine


i:68


[INFO 23-12-15 05:34:49.7679 UTC kernel.cc:1243] Loading model from path /tmp/tmp8gbdotg7/model/ with prefix 7f4c7cf85b71428f
[INFO 23-12-15 05:34:49.7783 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:34:49.7783 UTC kernel.cc:1075] Use fast generic engine


i:69


[INFO 23-12-15 05:34:50.9660 UTC kernel.cc:1243] Loading model from path /tmp/tmpyp43w05q/model/ with prefix 552b1a7cf3ab4968
[INFO 23-12-15 05:34:50.9723 UTC kernel.cc:1075] Use fast generic engine


i:70


[INFO 23-12-15 05:34:52.2150 UTC kernel.cc:1243] Loading model from path /tmp/tmp70mn76h5/model/ with prefix 310d22fa3ada4db6
[INFO 23-12-15 05:34:52.2227 UTC kernel.cc:1075] Use fast generic engine


i:71


[INFO 23-12-15 05:34:53.3690 UTC kernel.cc:1243] Loading model from path /tmp/tmp317bqaa0/model/ with prefix 12fd2406a86243fa
[INFO 23-12-15 05:34:53.3756 UTC kernel.cc:1075] Use fast generic engine


i:72


[INFO 23-12-15 05:34:54.6784 UTC kernel.cc:1243] Loading model from path /tmp/tmpyrcoibiq/model/ with prefix ed5fed1c53294a5c
[INFO 23-12-15 05:34:54.6924 UTC kernel.cc:1075] Use fast generic engine


i:73


[INFO 23-12-15 05:34:55.8123 UTC kernel.cc:1243] Loading model from path /tmp/tmps5rpv0tc/model/ with prefix 23a4e47ee89a4065
[INFO 23-12-15 05:34:55.8186 UTC kernel.cc:1075] Use fast generic engine


i:74


[INFO 23-12-15 05:34:57.0548 UTC kernel.cc:1243] Loading model from path /tmp/tmpdxhu7ndn/model/ with prefix 59832a32c3114aea
[INFO 23-12-15 05:34:57.0652 UTC kernel.cc:1075] Use fast generic engine


i:75


[INFO 23-12-15 05:34:58.2204 UTC kernel.cc:1243] Loading model from path /tmp/tmpdz0e7uhl/model/ with prefix 69665c10bb1545a2
[INFO 23-12-15 05:34:58.2275 UTC kernel.cc:1075] Use fast generic engine


i:76


[INFO 23-12-15 05:34:59.3206 UTC kernel.cc:1243] Loading model from path /tmp/tmp2pymq96e/model/ with prefix badc5aee90554db0
[INFO 23-12-15 05:34:59.3243 UTC kernel.cc:1075] Use fast generic engine


i:77


[INFO 23-12-15 05:35:00.3995 UTC kernel.cc:1243] Loading model from path /tmp/tmp8bjc5kik/model/ with prefix 257fae1f1e9a4fc2
[INFO 23-12-15 05:35:00.4036 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:35:00.4037 UTC kernel.cc:1075] Use fast generic engine


i:78


[INFO 23-12-15 05:35:01.5222 UTC kernel.cc:1243] Loading model from path /tmp/tmpq9cm3lmn/model/ with prefix deb3a2b350b24147
[INFO 23-12-15 05:35:01.5283 UTC kernel.cc:1075] Use fast generic engine


i:79


[INFO 23-12-15 05:35:02.6638 UTC kernel.cc:1243] Loading model from path /tmp/tmpsg9o0zte/model/ with prefix ab945531de6542c7
[INFO 23-12-15 05:35:02.6701 UTC kernel.cc:1075] Use fast generic engine


i:80


[INFO 23-12-15 05:35:03.8461 UTC kernel.cc:1243] Loading model from path /tmp/tmp0knlxuqu/model/ with prefix 05b32cd8f20e4ded
[INFO 23-12-15 05:35:03.8548 UTC kernel.cc:1075] Use fast generic engine


i:81


[INFO 23-12-15 05:35:05.0720 UTC kernel.cc:1243] Loading model from path /tmp/tmp0pxntnqn/model/ with prefix 5a5e2e7683874a77
[INFO 23-12-15 05:35:05.0819 UTC kernel.cc:1075] Use fast generic engine


i:82


[INFO 23-12-15 05:35:06.2570 UTC kernel.cc:1243] Loading model from path /tmp/tmpme1opqxi/model/ with prefix ed1f10794403459c
[INFO 23-12-15 05:35:06.2659 UTC kernel.cc:1075] Use fast generic engine


i:83


[INFO 23-12-15 05:35:07.4468 UTC kernel.cc:1243] Loading model from path /tmp/tmppyc37au6/model/ with prefix 3005f092d5bc4a74
[INFO 23-12-15 05:35:07.4545 UTC kernel.cc:1075] Use fast generic engine


i:84


[INFO 23-12-15 05:35:08.8136 UTC kernel.cc:1243] Loading model from path /tmp/tmpmrrx62w1/model/ with prefix d545773db1be413f
[INFO 23-12-15 05:35:08.8276 UTC kernel.cc:1075] Use fast generic engine


i:85


[INFO 23-12-15 05:35:09.9378 UTC kernel.cc:1243] Loading model from path /tmp/tmpv93_24xx/model/ with prefix 07535426c5024330
[INFO 23-12-15 05:35:09.9434 UTC kernel.cc:1075] Use fast generic engine


i:86


[INFO 23-12-15 05:35:11.2701 UTC kernel.cc:1243] Loading model from path /tmp/tmp2_qgb_al/model/ with prefix 210841659c9b4146
[INFO 23-12-15 05:35:11.2846 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:35:11.2846 UTC kernel.cc:1075] Use fast generic engine


i:87


[INFO 23-12-15 05:35:12.6655 UTC kernel.cc:1243] Loading model from path /tmp/tmp123rmlr1/model/ with prefix 7a6f1c80dde24b16
[INFO 23-12-15 05:35:12.6819 UTC kernel.cc:1075] Use fast generic engine


i:88


[INFO 23-12-15 05:35:13.9024 UTC kernel.cc:1243] Loading model from path /tmp/tmpdix0nrd4/model/ with prefix e36254fa43854295
[INFO 23-12-15 05:35:13.9128 UTC kernel.cc:1075] Use fast generic engine


i:89


[INFO 23-12-15 05:35:15.0174 UTC kernel.cc:1243] Loading model from path /tmp/tmps23o3gfy/model/ with prefix 1de020ec38f04461
[INFO 23-12-15 05:35:15.0219 UTC kernel.cc:1075] Use fast generic engine


i:90


[INFO 23-12-15 05:35:16.2201 UTC kernel.cc:1243] Loading model from path /tmp/tmpwy76bkiw/model/ with prefix 5ff800ffe9a749ec
[INFO 23-12-15 05:35:16.2292 UTC kernel.cc:1075] Use fast generic engine


i:91


[INFO 23-12-15 05:35:17.3540 UTC kernel.cc:1243] Loading model from path /tmp/tmpxoc2a8sq/model/ with prefix 9bdb510710604cae
[INFO 23-12-15 05:35:17.3600 UTC kernel.cc:1075] Use fast generic engine


i:92


[INFO 23-12-15 05:35:18.8615 UTC kernel.cc:1243] Loading model from path /tmp/tmpr1aydf84/model/ with prefix 763667397b4a4843
[INFO 23-12-15 05:35:18.8780 UTC kernel.cc:1075] Use fast generic engine


i:93


[INFO 23-12-15 05:35:20.0886 UTC kernel.cc:1243] Loading model from path /tmp/tmpstrk1ray/model/ with prefix d05071922f6549f2
[INFO 23-12-15 05:35:20.0976 UTC kernel.cc:1075] Use fast generic engine


i:94


[INFO 23-12-15 05:35:21.2561 UTC kernel.cc:1243] Loading model from path /tmp/tmp6s3mlusr/model/ with prefix 1dce20fcee6b4bed
[INFO 23-12-15 05:35:21.2620 UTC kernel.cc:1075] Use fast generic engine


i:95


[INFO 23-12-15 05:35:22.4380 UTC kernel.cc:1243] Loading model from path /tmp/tmpvddse22m/model/ with prefix 9029a1967d184943
[INFO 23-12-15 05:35:22.4456 UTC abstract_model.cc:1311] Engine "GradientBoostedTreesQuickScorerExtended" built
[INFO 23-12-15 05:35:22.4456 UTC kernel.cc:1075] Use fast generic engine


i:96


[INFO 23-12-15 05:35:23.6512 UTC kernel.cc:1243] Loading model from path /tmp/tmp4u0ok1b2/model/ with prefix 7ae7893c19574dba
[INFO 23-12-15 05:35:23.6602 UTC kernel.cc:1075] Use fast generic engine


i:97


[INFO 23-12-15 05:35:24.7571 UTC kernel.cc:1243] Loading model from path /tmp/tmp7zrueulo/model/ with prefix 8d889ba57a454be1
[INFO 23-12-15 05:35:24.7617 UTC kernel.cc:1075] Use fast generic engine


i:98


[INFO 23-12-15 05:35:25.9103 UTC kernel.cc:1243] Loading model from path /tmp/tmp3wlolqtf/model/ with prefix 6cb058577b7b42ff
[INFO 23-12-15 05:35:25.9171 UTC kernel.cc:1075] Use fast generic engine


i:99


[INFO 23-12-15 05:35:27.2280 UTC kernel.cc:1243] Loading model from path /tmp/tmp5_b2chag/model/ with prefix b6dd2775a47a49ad
[INFO 23-12-15 05:35:27.2410 UTC kernel.cc:1075] Use fast generic engine


Submission exported to /kaggle/working/submission.csv
