<a href="https://colab.research.google.com/github/SemihAkkoc/machine_learning/blob/main/tragedy_to_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')  # importing training data
dftest = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')  # importing testing data
y_train = dftrain.pop('survived')
y_test = dftest.pop('survived')

In [5]:
dftrain.head()  # visualizing the data (note that we have unkown data so we need to get rid of them)
# y_train.head()

Unnamed: 0,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,female,35.0,1,0,53.1,First,C,Southampton,n
4,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [11]:
# creating our feature colums

CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []

for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, def

In [15]:
# input function (if you know other way without doing this input function can you reach me?)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

train_input_fn = make_input_fn(dftrain, y_train)
test_input_fn = make_input_fn(dftest, y_test, num_epochs=1, shuffle=False)

In [None]:
# creating the model
model = tf.estimator.LinearClassifier(feature_columns=feature_columns)

In [None]:
# training the model
model.train(train_input_fn)

In [17]:
result = model.evaluate(test_input_fn)
print(result['accuracy'])  # printing how accurate our model is

INFO:tensorflow:Calling model_fn.


  getter=tf.compat.v1.get_variable)


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2022-01-19T14:52:17
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp8nsc_s_7/model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.73107s
INFO:tensorflow:Finished evaluation at 2022-01-19-14:52:17
INFO:tensorflow:Saving dict for global step 200: accuracy = 0.7537879, accuracy_baseline = 0.625, auc = 0.8324763, auc_precision_recall = 0.794043, average_loss = 0.4724371, global_step = 200, label/mean = 0.375, loss = 0.4639116, precision = 0.68085104, prediction/mean = 0.3765846, recall = 0.64646465
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 200: /tmp/tmp8nsc_s_7/model.ckpt-200
0.7537879


In [None]:
# our models predictions
predict = list(model.predict(test_input_fn))

In [30]:
# created a function to show is the predictions

def show_prediction(num=0, dftest=dftest, y_test=y_test, predict=predict):
    is_alive = lambda x: 'alive' if x else 'dead'
    print(f'{num+1:1d}th passenger status:\n')
    print(dftest.loc[num])
    print('\n\nModels prediction of this passengers probability to live is:', end=' ')
    print(predict[num]['probabilities'][1])
    print(f'In reality this person is {is_alive(int(y_test.loc[num]))}')

In [34]:
show_prediction()

1th passenger status:

sex                          male
age                            35
n_siblings_spouses              0
parch                           0
fare                         8.05
class                       Third
deck                      unknown
embark_town           Southampton
alone                           y
Name: 0, dtype: object


Models prediction of this passengers probability to live is: 0.07677981
In reality this person is dead
