In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf

from sklearn.model_selection import train_test_split

In [5]:
df = pd.read_csv("/kaggle/input/titanic/train.csv")
df.head(n=8)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S


In [6]:
train_df = df.drop(["PassengerId", "Name", "Ticket"], axis=1)
print(train_df.isnull().sum())

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Cabin       687
Embarked      2
dtype: int64


In [7]:
man_mean_age = train_df[train_df["Sex"]=="male"]["Age"].mean()
women_mean_age = train_df[train_df["Sex"]=="female"]["Age"].mean()

print("%1.0f"%man_mean_age)
print("%1.0f"%women_mean_age)

31
28


In [8]:
train_df.loc[(train_df["Sex"] == "male") & (train_df["Age"].isnull()), "Age"] = man_mean_age
train_df.loc[(train_df["Sex"] == "female") & (train_df["Age"].isnull()), "Age"] = women_mean_age

In [15]:
train_df["Cabin"] = train_df["Cabin"].fillna("X")
train_df["Embarked"] = train_df["Embarked"].fillna("S")

In [16]:
print(train_df.isnull().sum())

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Cabin       0
Embarked    0
dtype: int64


In [17]:
y = train_df.Survived
x = train_df.drop(["Survived"], axis=1)

In [20]:
x.head(n=8)

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,3,male,22.0,1,0,7.25,X,S
1,1,female,38.0,1,0,71.2833,C85,C
2,3,female,26.0,0,0,7.925,X,S
3,1,female,35.0,1,0,53.1,C123,S
4,3,male,35.0,0,0,8.05,X,S
5,3,male,30.726645,0,0,8.4583,X,Q
6,1,male,54.0,0,0,51.8625,E46,S
7,3,male,2.0,3,1,21.075,X,S


In [22]:
X_train,X_test,Y_train,Y_test = train_test_split(x,y, train_size=0.75, random_state=101)

In [23]:
# numeric feature
pclass_feature = tf.feature_column.numeric_column("Pclass")
parch_feature = tf.feature_column.numeric_column("Parch")
fare_feature = tf.feature_column.numeric_column("Fare")
age_feature = tf.feature_column.numeric_column("Age")

# แบ่งช่วงอายุ เด็ก ผู้ใหญ่ คนแก่
age_bucket_feature = tf.feature_column.bucketized_column(age_feature, [12,21,60])

# แบ่งเพศ ชาย หญิง
sex_feature = tf.feature_column.categorical_column_with_vocabulary_list("Sex", ["female","male"])

# แยกตัวแปร
embarked_feature = tf.feature_column.categorical_column_with_hash_bucket("Embarked", 3)
cabin_feature = tf.feature_column.categorical_column_with_hash_bucket("Cabin", 100)

feature_columns = [pclass_feature,parch_feature,fare_feature,age_feature,age_bucket_feature,sex_feature,embarked_feature,cabin_feature]

In [52]:
def train_input_fn(features, labels, epochs, shuffle=True, batch_size = 32): 
    def input_function(): 
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) 
        if shuffle: 
            dataset.shuffle(100) 
        dataset = dataset.batch(batch_size).repeat(epochs) 
        return dataset 
    return input_function 

def eval_input_fn(features, labels, batch_size = 32): 
    def input_function(): 
        input_features = dict(features) 
        if labels is None: 
            inputs = input_features 
        else: 
            inputs = (input_features, labels) 
            
        dataset = tf.data.Dataset.from_tensor_slices(inputs) 
        dataset = dataset.batch(batch_size) 
        return dataset 
    return input_function 


In [53]:
train_input_fn = train_input_fn(X_train, Y_train, epochs=None)

In [54]:
LinearClassifier = tf.estimator.LinearClassifier(feature_columns = feature_columns)

In [55]:
LinearClassifier.train(input_fn = train_input_fn, steps=2000)

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x7f76f810c210>

In [56]:
eval_input_fn = eval_input_fn(X_test, Y_test)

In [57]:
LinearClassifier.evaluate(input_fn = eval_input_fn)

{'accuracy': 0.7892377,
 'accuracy_baseline': 0.5695067,
 'auc': 0.8442831,
 'auc_precision_recall': 0.8478156,
 'average_loss': 0.5017562,
 'label/mean': 0.43049327,
 'loss': 0.5024658,
 'precision': 0.8356164,
 'prediction/mean': 0.33014998,
 'recall': 0.6354167,
 'global_step': 2000}

In [58]:
pred = list(LinearClassifier.predict(input_fn = eval_input_fn))
pred



[{'logits': array([-1.6939456], dtype=float32),
  'logistic': array([0.15525764], dtype=float32),
  'probabilities': array([0.8447423 , 0.15525764], dtype=float32),
  'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'all_class_ids': array([0, 1], dtype=int32),
  'all_classes': array([b'0', b'1'], dtype=object)},
 {'logits': array([2.3225577], dtype=float32),
  'logistic': array([0.91072816], dtype=float32),
  'probabilities': array([0.08927189, 0.9107281 ], dtype=float32),
  'class_ids': array([1]),
  'classes': array([b'1'], dtype=object),
  'all_class_ids': array([0, 1], dtype=int32),
  'all_classes': array([b'0', b'1'], dtype=object)},
 {'logits': array([-0.09617531], dtype=float32),
  'logistic': array([0.47597468], dtype=float32),
  'probabilities': array([0.5240253 , 0.47597468], dtype=float32),
  'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'all_class_ids': array([0, 1], dtype=int32),
  'all_classes': array([b'0', b'1'], dtype=object)}

In [59]:
pred = [np.argmax(p["probabilities"], axis=0) for p in pred]
print(pred)

[0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0]
