## Imports

In [1]:
%pip install pandas scikit-learn tensorflow keras tqdm numpy matplotlib pydot IPython

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import plot_model
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

import pydot
from IPython.display import Image

import time
import os

2023-04-10 20:26:27.586965: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-10 20:26:28.005858: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-10 20:26:28.007978: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# print available GPUs
print("Num GPUs Available: ", tf.config.experimental.list_physical_devices('GPU'))

Num GPUs Available:  []


## Clean Data

In [4]:
if not os.path.exists("clean_train.csv"):
    import csv

    def to_int(s):
        try:
            return int(s)
        except:
            int_str = ""
            for i in range(len(str(s))):
                if str(s)[i] in "0123456789.":
                    int_str += str(s)[i]
            if int_str == "":
                return 0
            elif int_str.count(".") > 1:
                return int(int_str.replace(".", ""))
            else:
                return int(float(int_str))

    input_file = open("train.csv", "r")

    output_csv_array = []

    for row in csv.reader(input_file):
        
        output_row = []
        
        for i in range(len(row)):
            if row[0] == "PassengerId":
                print("header")
                if i in [0,1,2,4,5,6,7,8,9,11]:
                    output_row.append(row[i])
                
            
            elif i in [0,1,2,5,6,7,9]:
                try:
                    output_row.append(to_int(row[i]))
                except:
                    print("Error: ", row[i])
            
            elif i == 4:
                output_row.append(0 if row[i] == "male" else 1)
            
            elif i == 8:
                output_row.append(to_int(row[i]))
                
            elif i == 11:
                output_row.append(0 if row[i] == "S" else 1 if row[i] == "C" else 2)
        
        output_csv_array.append(output_row)

    with open("clean_train.csv", "w", newline="") as output_file:
        writer = csv.writer(output_file)
        writer.writerows(output_csv_array)

    input_file.close()

else:
    print("clean_train.csv already exists")


clean_train.csv already exists


## import data

In [8]:
df = pd.read_csv("clean_train.csv")


X, y = df.iloc[:, 2:], df.iloc[:, 1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)


df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,0.352413,23.783389,0.523008,0.381594,284612.6,31.785634,0.365881
std,257.353842,0.486592,0.836071,0.47799,17.597344,1.102743,0.806057,1193290.0,49.70373,0.640157
min,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,223.5,0.0,2.0,0.0,6.0,0.0,0.0,13508.0,7.0,0.0
50%,446.0,0.0,3.0,0.0,24.0,0.0,0.0,111361.0,14.0,0.0
75%,668.5,1.0,3.0,1.0,35.0,1.0,0.0,347077.0,31.0,1.0
max,891.0,1.0,3.0,1.0,80.0,8.0,6.0,23101290.0,512.0,2.0


## AI Model

In [23]:
def train_model(X_train, y_train, epochs = 100, batch_size=32, model_arg=None):
    if model_arg is not None:
        model = model_arg
    else:
        model = Sequential()
        model.add(Dense(64, activation="relu", input_shape=(X_train.shape[1],)))
        model.add(Dense(8, activation="relu"))
        model.add(Dense(1, activation="sigmoid"))
        print(model.summary())
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
    return model

In [7]:
def plot_model_architecture(model):
    """Plot a Keras model's architecture and display it in the notebook."""
    plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
    return Image(filename='model.png')

## User Input

In [20]:
epochs = 1500
batch_size = 16

## Load Model

In [5]:
if os.path.exists("model.h5"):
    model = load_model("model.h5")
    print("model loaded")
else:
    model = None
    print("model not loaded")

print(model)

model loaded
<keras.engine.sequential.Sequential object at 0x7f66c4365de0>


## Run Model

In [31]:
t1 = time.time()


model_trained = train_model(X_train, y_train, epochs=epochs, batch_size=batch_size, model_arg=model)

model = model_trained


print("time:", str(int((time.time() - t1)*1000)) + "ms")

loss, accuracy = model.evaluate(X_test, y_test)

print("loss:", loss, "accuracy:", accuracy)

Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500
Epoch 31/1500
Epoch 32/1500
Epoch 33/1500
Epoch 34/1500
Epoch 35/1500
Epoch 36/1500
Epoch 37/1500
Epoch 38/1500
Epoch 39/1500
Epoch 40/1500
Epoch 41/1500
Epoch 42/1500
Epoch 43/1500
Epoch 44/1500
Epoch 45/1500
Epoch 46/1500
Epoch 47/1500
Epoch 48/1500
Epoch 49/1500
Epoch 50/1500
Epoch 51/1500
Epoch 52/1500
Epoch 53/1500
Epoch 54/1500
Epoch 55/1500
Epoch 56/1500
Epoch 57/1500
Epoch 58/1500
Epoch 59/1500
Epoch 60/1500
Epoch 61/1500
Epoch 62/1500
Epoch 63/1500
Epoch 64/1500
Epoch 65/1500
Epoch 66/1500
Epoch 67/1500
Epoch 68/1500
Epoch 69/1500
Epoch 70/1500
Epoch 71/1500
Epoch 72/1500
E

## visualize Model architecture

In [34]:
plot_model_architecture(model)
print(model.summary())

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 64)                576       
                                                                 
 dense_16 (Dense)            (None, 8)                 520       
                                                                 
 dense_17 (Dense)            (None, 1)                 9         
                                                                 
Total params: 1,105
Trainable params: 1,105
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
model.save("model.h5")

In [13]:
loss, accuracy = model.evaluate(X_test, y_test)
print("loss:", loss, "accuracy:", accuracy)
print(model.summary())

loss: 0.632318377494812 accuracy: 0.6442577242851257
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 64)                576       
                                                                 
 dense_16 (Dense)            (None, 8)                 520       
                                                                 
 dense_17 (Dense)            (None, 1)                 9         
                                                                 
Total params: 1,105
Trainable params: 1,105
Non-trainable params: 0
_________________________________________________________________
None
