In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import OneHotEncoder

# # 可視化ツールのimport
# import plotly.express as px
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# import plotly.figure_factory as ff

import tensorflow as tf
from keras import models, layers, callbacks, initializers, regularizers
from keras.layers import ELU, Dense, Dropout
from keras.utils import plot_model

# import optuna

# temp=dict(layout=go.Layout(font=dict(family="Franklin Gothic", size=12), 
#                            height=500, width=1000))
# #色コード
# class CLR:
#     map_1 = 'GnBu'
#     blue_thin = '#88CAC9'
#     blue_dark = '#38A6A5'
#     pink_thin = '#eb8792'
#     pink_dark = '#ec616e'
#     orange = '#f8b287'
#     orange_dark = '#ee572a'

In [2]:
df_profile = pd.read_csv('./data/profile.csv')
df_recipe = pd.read_csv('./data/recipe.csv')
df_profile.head()

Unnamed: 0,Number,Ave_depth,Std_depth,Ave_top,Std_top,Ave_min,Std_min,Collapse,Distortion,Mask,Roughness
0,1,96.695526,0.0,0.0,0.0,0.0,0.0,2,0,1,1
1,2,68.230583,10.478008,11.66111,0.58781,10.610055,0.593178,0,1,0,1
2,3,84.559617,5.306303,11.611535,0.411439,11.611535,0.411439,0,1,0,1
3,4,77.775023,3.08021,10.764922,0.394361,10.764922,0.394361,0,0,0,0
4,5,33.637337,3.592776,12.181741,0.522975,12.181741,0.522975,0,2,0,0


<div style="padding:20px;color:white;margin:0;font-size:200%;text-align:center;display:fill;border-radius:5px;background-color:#38A6A5;overflow:hidden;font-weight:500">データの前処理</div>

In [3]:
from get_objective import profile

df_rsme = profile(df_profile)

# print(df_rsme)

df_rsme

Unnamed: 0,0
0,1.668418
1,2.211165
2,1.552452
3,1.510181
4,2.059738


In [4]:
df_recipe = df_recipe.drop(columns='Number')

In [5]:
# 欠損値がないか確認
df_recipe.isnull().sum(axis=0)#データの欠測値を一覧表示

Ar              0
HBr             0
N2              0
Cl2             0
SF6             0
NF3             0
CO2             0
Pressure        0
Source_power    0
Bias_power      0
dtype: int64

In [6]:
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()
df_scaled =pd.DataFrame(scaler.fit_transform(df_recipe),columns=df_recipe.columns)
df_scaled

Unnamed: 0,Ar,HBr,N2,Cl2,SF6,NF3,CO2,Pressure,Source_power,Bias_power
0,-0.278543,-0.278543,2.270436,-1.37969,-0.62361,1.998781,-0.278543,0.0,0.0,0.0
1,-0.278543,-0.278543,-0.806036,1.960964,-0.62361,-1.583264,-0.278543,0.0,0.0,0.0
2,2.042649,-0.278543,-0.036918,-0.962108,-0.62361,-0.687752,-0.278543,0.0,0.0,0.0
3,4.36384,-0.278543,-0.806036,-2.214853,-0.62361,-1.583264,-0.278543,0.0,0.0,0.0
4,-0.278543,-0.278543,1.193671,1.042284,-0.62361,-1.941468,-0.278543,0.0,0.0,0.0
5,-0.278543,-0.278543,0.27073,-0.46101,-0.62361,2.356985,-0.278543,0.0,0.0,0.0
6,-0.278543,-0.278543,0.424553,-0.210461,1.603567,-0.150446,-0.278543,0.0,0.0,0.0
7,-0.278543,-0.278543,-1.113683,1.042284,-0.62361,0.745065,-0.278543,0.0,0.0,0.0
8,-0.278543,-0.278543,2.578083,-0.46101,-0.62361,-0.329548,-0.278543,0.0,0.0,0.0
9,-0.278543,-0.278543,0.424553,-0.210461,-0.62361,-0.150446,2.042649,0.0,0.0,0.0


In [7]:
from sklearn.model_selection import train_test_split
df_train_x = df_recipe
df_train_y = df_rsme
x_train, x_val, y_train, y_val = train_test_split(df_train_x, df_train_y, test_size=0.15, random_state=42)

ValueError: Found input variables with inconsistent numbers of samples: [25, 26]

In [None]:
# モデルの構築関数
def build_model(input_shape):
    model = models.Sequential()
    
    model.add(Dense(128, activation = "elu", input_shape = input_shape, name = "layer_1", kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation = "relu", name = "layer_2", kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.25))
    model.add(Dense(32, activation = "sigmoid", name = "layer_3" , kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1, activation = "sigmoid", name ="output_layer")) 

    model.compile(optimizer='adam', loss='binary_crossentropy')
    return model

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

input_shape = x_train.shape[1:]

model = build_model(input_shape)

plot_model(model, show_shapes=True)

In [None]:
y_train.head()

In [None]:
# 早期終了コールバックの設定
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)

# 学習率スケジューリングコールバックの設定
lr_scheduler = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1)

# モデルの訓練
final = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    batch_size=12,
    epochs=50,
    callbacks=[early_stopping, lr_scheduler]
)

In [None]:
import matplotlib.pyplot as plt
# lossの変化を取得
train_loss = final.history['loss']
val_loss = final.history['val_loss']

# グラフのプロット
plt.figure(figsize=(8, 6))
plt.plot(range(1, len(train_loss) + 1), train_loss, label='Training Loss')
plt.plot(range(1, len(val_loss) + 1), val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
df_processed_test =preprocess(df_test)
df_processed_test =pd.DataFrame(scaler.fit_transform(df_processed_test),columns=df_processed_test.columns)
df_processed_test.head()

In [None]:
y_pred = model.predict(df_processed_test)

print(y_pred)

df_submit = pd.DataFrame(predictions,columns=['Survived'])#新たな提出用dfを定義
df_submit.Survived = round(df_submit.Survived)
df_submit

In [None]:
df_answer = pd.read_csv('./kaggle/input/titanic/answer.csv')

Y_test = df_answer['Survived']

Y_test.head()

from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

fpr, tpr, thresholds = roc_curve(Y_test, y_pred)
auc_score = roc_auc_score(Y_test, y_pred)
plt.plot(fpr, tpr, label='AUC = %.3f' % (auc_score))
plt.legend()
plt.title('ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid(True)


# accuracy: 0.8208955223880597
# f1_score: 0.7446808510638298
