In [10]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
from sklearn.model_selection import train_test_split

### Import and clean the data

In [11]:
dftrain = pd.read_csv('../input/titanic/train.csv') 
dftrain= dftrain.drop(columns=['Cabin','Embarked','Ticket','Name'])
dftrain=dftrain.fillna(dftrain['Age'].mean())

dfvalid = pd.read_csv('../input/titanic/test.csv') 
dfvalid= dfvalid.drop(columns=['Cabin','Embarked','Ticket','Name'])
dfvalid=dfvalid.fillna(dftrain['Age'].mean())

print(dftrain.head(-1))
print(dfvalid.head(-1))

### Split Data

In [12]:
y= dftrain.pop('Survived') 
X_dftrain_train, X_dftrain_valid, y_train, y_valid = train_test_split(dftrain, y, train_size=0.8, test_size=0.2,random_state=0)

print(dftrain.head(-1))

### Define feature columns

In [13]:
dftrain.dtypes

In [14]:
CATEGORICAL_COLUMNS = ['Sex','SibSp','Parch','Pclass']
NUMERIC_COLUMNS = ['Age', 'Fare']

In [15]:
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique() 
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name,vocabulary))
for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)

### Input function

In [16]:
def make_input_fn(data_df,label_df, num_epocs=2000,shuffle=True, batch_size= 32):
  def input_function():
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df),label_df)) 
    if shuffle:
      ds = ds.shuffle(2000)  
    ds = ds.batch(batch_size).repeat(num_epocs)  
    return ds 
  return input_function 
  
train_input_fn = make_input_fn(X_dftrain_train, y_train) 
eval_input_fn = make_input_fn(X_dftrain_valid,y_valid,num_epocs=1,shuffle=False)

### Creating the linear regression model

In [17]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)

### Training the model

In [18]:
linear_est.train(train_input_fn) 
result = linear_est.evaluate(eval_input_fn) 
print(result)
print(result['accuracy'])

In [19]:
test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(x=dfvalid, num_epochs=1, shuffle=False)

### Make predictions

In [20]:
pred_dicts = list(linear_est.predict(test_input_fn))
probs = pd.Series([pred['class_ids'][0] for pred in pred_dicts])
print(probs)

### Save results

In [21]:
output = dfvalid['PassengerId'].copy().to_frame()
output['Survived'] = probs
output.to_csv('output_submission.csv', index=False)
output.head()