In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle
import tabpy_client 

In [30]:
# Read CSV into pandas
df = pd.read_csv('data/titanic.csv')
# Set the X and y variables
X = df[['Age', 'Fare']]
y = df['Survived']

In [31]:
# Quickly fill in null values before training the model
X.fillna(X.mean(), axis=0, inplace=True)
y = [number if number==1 else 0 for number in y]

In [32]:
# Train test split your data
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)
# Create and train a simple model
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [33]:
# Generate predictions
y_test_predicted = model.predict(X_test)
# Print acccuracy score
print(accuracy_score(y_test_predicted, y_test))

0.659192825112


In [34]:
# Save the model as a pickled file
pickle.dump(model, open('data/model', 'w'))

In [35]:
# Connect to TabPy server using the client library
connection = tabpy_client.Client('http://localhost:9004/')

In [38]:
# Create scoring function (for use with feeding new data into the model in Tableau)
def PredictSurvival(age, fare):
    model = pickle.load(open('data/model', 'r'))
    X_test = pd.DataFrame([age, fare]).transpose()
    X_test.fillna(X_test.mean(), axis=0, inplace=True)
    y_test_predicted = model.predict(X_test)
    return list(y_test_predicted)

In [None]:
# Publish the function to TabPy server so it can be used from Tableau
# PredictSurvivalFunction is the name to be used in Tableau, PredictSurvival is the original function name
connection.deploy('PredictSurvivalFunction', PredictSurvival,'Returns a yes/no prediction', override=True)