# Titanic App

![img](static/images/small_titanic.jpg)

¿Sobreviviria tu pasajero?

**librerias**

In [None]:
import warnings
warnings.simplefilter('ignore')

import pylab as plt

import pandas as pd
import numpy as np

**datos**

[fuente](https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv)

Incluyen caracteristicas como edad, numero de ticket, cabina, etc..

El objetivo es clasificar-predecir la supervivencia.

In [None]:
df=pd.read_excel('titanic.xls')

df.head()

**exploracion**

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.info(memory_usage='deep')

In [None]:
df.isna().sum()

In [None]:
df.describe()

**preparando datos**

In [None]:
# tratando los NaN

# letra de la cabina
df.cabin=df.cabin.replace(np.NaN, 'U') 
df.cabin=[e[0] for e in df.cabin.values]

df.cabin=df.cabin.replace('U', 'Unknown')

In [None]:
# titulo (Señor-Señora)
df['title']=[e.split()[1] for e in df.name.values]


df.title=[t if t in ['Mr.', 'Miss.', 
                     'Mrs.', 'Master.', 
                     'Dr.', 'Rev.'] else 'Unknown'
          for t in df.title.values ]

In [None]:
df.title.value_counts()

In [None]:
df.pclass.unique()

In [None]:
df.head()

In [None]:
# se crea el campo de si es mujer
df['isfemale']=np.where(df.sex=='female', 1, 0)

# pclass a categorico string
df.pclass=np.where(df.pclass== 1,'First', 
                   np.where(df.pclass==2, 
                            'Second', 'Third'))
 


# se eliminan columnas innecesarias
df=df.drop(columns=['sex', 'name', 'boat', 'body', 'ticket', 'home.dest', 'cabin', 'embarked'])

In [None]:
df.head()

In [None]:
# one-hot encoding de las variables categoricas


df_dummy=pd.get_dummies(df, 
                        columns=['pclass', 'title'], 
                        drop_first=False)

df_dummy=df_dummy.dropna()

df_dummy.head()

In [None]:
df_dummy.info()

## Modelo

In [None]:
from sklearn.model_selection import train_test_split as tts

from sklearn.ensemble import RandomForestClassifier as RFC

from sklearn.metrics import f1_score as f1

In [None]:
X=df_dummy.drop(columns=['survived', 'sibsp', 'parch'])

y=df_dummy.survived


X_train, X_test, y_train, y_test=tts(X, y, test_size=.2, random_state=42)

In [None]:
X.head()

In [None]:
rfc=RFC(n_estimators=180)

rfc.fit(X_train, y_train)

y_pred=rfc.predict(X_test)

'Acc:', rfc.score(X_test, y_test), 'F1:', f1(y_pred, y_test)

In [None]:
plt.bar(X.columns, rfc.feature_importances_)
plt.xticks(rotation=90);

### Hiperparametros

In [None]:
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK

from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import roc_auc_score as auc

In [None]:
criteria=['gini', 'entropy']
weight=['balanced', 'balanced_subsample']
features=['auto', 'sqrt', 'log2']


space={
    'n_estimators':hp.quniform('n_estimators', 10, 1000, 20),
    
    'criterion': hp.choice('criterion', criteria),
        
    'max_depth':hp.quniform('max_depth', 3, 16, 1),
    
    'min_samples_split': hp.quniform('min_samples_split', 2, 10, 1),
    
    'class_weight': hp.choice('class_weight', weight),
    
    'max_features': hp.choice('max_features', features)
}

In [None]:
def objetivo(space):
    
    modelo=RFC(
        n_estimators=int(space['n_estimators']),
        criterion=space['criterion'],
        max_depth=int(space['max_depth']),
        min_samples_split=int(space['min_samples_split']),
        class_weight=space['class_weight'],
        max_features=space['max_features']
    )
        
    modelo.fit(X_train, y_train)
    
    y_pred=modelo.predict(X_test)
    
    acc_=acc(y_pred, y_test)
    f1_=f1(y_pred, y_test)
    auc_=auc(y_pred, y_test)
    
    return {'loss':-auc_, 'status':STATUS_OK}

In [None]:
best=fmin(fn=objetivo,
          space=space,
          algo=tpe.suggest,
          max_evals=150,
          trials=Trials())

best

In [None]:
rfc=RFC(n_estimators=int(best['n_estimators']),
        criterion=criteria[best['criterion']],
        max_depth=int(best['max_depth']),
        min_samples_split=int(best['min_samples_split']),
        class_weight=weight[best['class_weight']],
        max_features=features[best['max_features']])


rfc.fit(X_train, y_train)

y_pred=rfc.predict(X_test)

'Acc:', rfc.score(X_test, y_test), 'F1:', f1(y_pred, y_test)

## Reentreno y guardado

In [None]:
import pickle

In [None]:
rfc=RFC(n_estimators=int(best['n_estimators']),
        criterion=criteria[best['criterion']],
        max_depth=int(best['max_depth']),
        min_samples_split=int(best['min_samples_split']),
        class_weight=weight[best['class_weight']],
        max_features=features[best['max_features']])


rfc.fit(X, y)

In [None]:
pickle.dump(rfc, open('modelo_rfc.pkl', 'wb'))  

## Carga y prediccion

In [None]:
rfc_cargado=pickle.load(open('modelo_rfc.pkl', 'rb'))

In [None]:
y_pred=rfc_cargado.predict(X_test)

'Acc:', rfc.score(X_test, y_test), 'F1:', f1(y_pred, y_test)

In [None]:
X.head()

In [None]:
# se crea un pasajero ficticio


pasajero=dict(zip(X.columns, [25, 200, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0]))

pasajero

In [None]:
y_pred=rfc_cargado.predict([list(pasajero.values())])

y_prob=rfc_cargado.predict_proba([list(pasajero.values())])

print ('Etiqueta:', y_pred) 
print ()
print ('Probabilidad [0 , 1]:', y_prob[0])
print ()
print ('Prob supervivencia:', y_prob[0][1])

## Plot

In [None]:
tasa_media_super=np.mean(df.survived)*100
print ('Tasa media supervivencia: {:.2f}%'.format(tasa_media_super))

In [None]:
with plt.xkcd():
    plt.figure(figsize=(10,8))

    plt.bar(range(2),
            [tasa_media_super, y_prob[0][1]*100],
            align='center', 
            color=['y', 'b'], 
            alpha=0.5)

    plt.xticks(range(2), ['Tasa Supervivencia media', 'Pasajero'])

    plt.axhline(tasa_media_super, color='r')
    plt.ylim([0,100])
    plt.ylabel('Probabilidad Supervivencia')
    plt.title('¿Sobrevivirá tu pasajero? \n '+'¡{:.2f}% de probabilidad!'.format(y_prob[0][1]*100));

# Web-App (main.py)

In [None]:
from flask import Flask
from flask import render_template
from flask import request
from flask import Markup

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import io
import os
import base64

import numpy as np
import pickle


# necesario en pythonanywhere
#PATH=os.path.dirname(os.path.abspath(__file__))

# inicializando variables
tasa_media=38.20


# modelo 
modelo=None


# flask app
app=Flask(__name__)



# antes del primer request...
@app.before_first_request
def startup():
    global modelo
    
    modelo=pickle.load(open('modelo_rfc.pkl', 'rb'))
    
    
    
# main app
@app.route("/", methods=['POST', 'GET'])
def main():
    
    if request.method=='POST':
        
        response={
            's_age': request.form['s_age'],
            's_fare': request.form['s_fare'],
            's_gender': 1 if request.form['s_gender']=='Female' else 0,
            's_pclass': request.form['s_pclass'],
            's_title': request.form['s_title']}
        
        # titulo
        title={'Dr':     [1, 0, 0, 0, 0, 0, 0],
               'Master': [0, 1, 0, 0, 0, 0, 0],
               'Miss':   [0, 0, 1, 0, 0, 0, 0],
               'Mr':     [0, 0, 0, 1, 0, 0, 0],
               'Mrs':    [0, 0, 0, 0, 1, 0, 0],
               'Rev':    [0, 0, 0, 0, 0, 1, 0],
               'Unknown':[0, 0, 0, 0, 0, 0, 1]}

            
        # clase
        p_class={'First':  [1, 0, 0],
                 'Second': [0, 1, 0],
                 'Third':  [0, 0, 1]}
            
        
        # pasajero
        pasajero=[[int(response['s_age']),          # edad
                   int(response['s_fare']),         # tarifa
                   response['s_gender']]+           # genero
                   p_class[response['s_pclass']]+   # clase    
                   title[response['s_title']]]      # titulo
        
        
        # prediccion
        y_prob=modelo.predict_proba(pasajero)
        
        
        # plot
        with plt.xkcd():
            plt.figure()
 
            plt.bar(range(2),[tasa_media, y_prob[0][1]*100],
                    align='center', color=['y', 'b'], alpha=0.5)
            
            plt.xticks(range(2), ['Tasa Supervivencia media', 'Pasajero'])
            plt.axhline(tasa_media, color='r')
            plt.ylim([0,100])
            plt.ylabel('Probabilidad Supervivencia')
            plt.title('¿Sobrevivirá tu pasajero? \n '+'¡{:.2f}% de probabilidad!'.format(y_prob[0][1]*100))
            img=io.BytesIO()
            plt.savefig(img, format='png')
            img.seek(0)
            plot_url=base64.b64encode(img.getvalue()).decode()
            
        
        return render_template('index.html',
                               model_results='',
                               model_plot=Markup('<img src="data:image/png;base64,{}">'.format(plot_url)),
                               s_age=response['s_age'],
                               s_fare=response['s_fare'],
                               s_gender=request.form['s_gender'],
                               s_pclass=response['s_pclass'],
                               s_title=response['s_title'])
    
    else:
        # parametros por defecto
        return render_template('index.html',
                               model_results = '',
                               model_plot = '',
                               s_age=30,
                               s_fare=35,
                               s_gender='Female',
                               s_pclass='Second',
                               s_title='Mrs')

    

# solo en local
if __name__=='__main__':
    app.run(debug=False)
    