In [1]:
import numpy as np
import pandas as pd

import urllib.request
from PIL import Image
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

from sklearn.svm import SVC

from sklearn.ensemble import BaggingClassifier
from sklearn.decomposition import PCA

In [2]:
df = pd.read_csv("hospital_train.csv", index_col=0)

In [3]:
class HospitalEncoder:

    @staticmethod
    def encode_admission(x):
        data = {'Urgent': 2, 'Trauma': 1, 'Emergency': 3}
        return data[x]

    @staticmethod
    def encode_severity(x):
        data = {'Moderate': 2, 'Extreme': 3, 'Minor': 1}
        return data[x]

    @staticmethod
    def encode_age(x):
        data = {'21-30': 2, '51-60': 5, '71-80': 7, '11-20': 1, 
        '31-40': 3, '0-10': 0, '61-70': 6, '41-50': 4, '81-90': 4, '91-100': 9}
        return data[x]

In [4]:
# Quitamos las columnas no relevantes
filtered = df.drop(['1', '3', '4', '7', '8','9', '10', '11','14','16'], axis=1)

# Aplicamos los encoders
filtered['12'] = filtered['12'].apply(HospitalEncoder.encode_admission)
filtered['13'] = filtered['13'].apply(HospitalEncoder.encode_severity)
filtered['15'] = filtered['15'].apply(HospitalEncoder.encode_age)

# Get dummies
features = filtered[['2', '6']]
features = pd.get_dummies(features)
filtered.drop(['2', '6'], axis=1, inplace=True)
train = pd.concat([filtered, features], axis=1)

# Creamos X e y
X = np.array(train.drop(['17'], axis=1))
y = np.array(train['17'])

# Conjuntos train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

In [8]:
p = PCA(0.95)
p.fit_transform(X).shape

(100000, 8)

In [5]:
svc = Pipeline(steps=[
    ('inputer', SimpleImputer()),
    ('pca', PCA(0.95)),
    ('svc', SVC())
])

bilbo = BaggingClassifier(svc, n_estimators=10, n_jobs=-1)

In [9]:
bilbo.fit(X_train, y_train)

y_pred = bilbo.predict(X_test)
print('Test:', accuracy_score(y_test, y_pred))
y_pred_train = bilbo.predict(X_train)
print('Train:', accuracy_score(y_train, y_pred_train))

[LibSVM]

In [6]:
def chequeator(df_to_submit):
    """
    Esta función se asegura de que tu submission tenga la forma requerida por Kaggle.
    
    Si es así, se guardará el dataframe en un `csv` y estará listo para subir a Kaggle.
    
    Si no, LEE EL MENSAJE Y HAZLE CASO.
    
    Si aún no:
    - apaga tu ordenador, 
    - date una vuelta, 
    - enciendelo otra vez, 
    - abre este notebook y 
    - leelo todo de nuevo. 
    Todos nos merecemos una segunda oportunidad. También tú.
    """
    sample = pd.read_csv("sample_submission.csv")
    if df_to_submit.shape == sample.shape:
        if df_to_submit.columns.all() == sample.columns.all():
            if df_to_submit.id.all() == sample.id.all():
                print("You're ready to submit!")
                df_to_submit.to_csv("submission.csv", index = False) #muy importante el index = False
                urllib.request.urlretrieve("https://i.kym-cdn.com/photos/images/facebook/000/747/556/27a.jpg", "gfg.png")     
                img = Image.open("gfg.png")
                img.show()   
            else:
                print("Check the ids and try again")
        else:
            print("Check the names of the columns and try again")
    else:
        print("Check the number of rows and/or columns and try again")
        print("\nMensaje secreto de Clara: No me puedo creer que después de todo este notebook hayas hecho algún cambio en las filas de `diamonds_test.csv`. Lloro.")

filepath = 'hospital_test.csv'

def prepare_test(model, filepath):
    df = pd.read_csv(filepath, index_col=0)
    # Operaciones de transformación.
    # Quitamos las columnas no relevantes
    filtered = df.drop(['1', '3', '4', '7', '8', '9','10', '11', '14','16'], axis=1)

    # Aplicamos los encoders
    filtered['12'] = filtered['12'].apply(HospitalEncoder.encode_admission)
    filtered['13'] = filtered['13'].apply(HospitalEncoder.encode_severity)
    filtered['15'] = filtered['15'].apply(HospitalEncoder.encode_age)

    # Get dummies
    features = filtered[['2', '6']]
    features = pd.get_dummies(features)
    filtered.drop(['2', '6'], axis=1, inplace=True)
    test = pd.concat([filtered, features], axis=1)

    # Creamos X
    X = np.array(test)

    # Cambiamos Nan por la media
    sim = SimpleImputer()
    X = sim.fit_transform(X)

    # Reducción de Dimensiones
    pc = PCA(0.95)
    X = pc.fit_transform(X)

    # Cogemos índice de sample_submission.csv
    sample = pd.read_csv('sample_submission.csv')

    # Preparamos dataframe de test.
    predictions_submit = model.predict(X)
    submission = pd.DataFrame({"id": sample.id, "days": predictions_submit.ravel()})
    
    return submission

In [7]:
bilbo.fit(X, y)
submission = prepare_test(bilbo, filepath)
chequeator(submission)