In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

In [2]:
df = pd.read_csv("hospital_train.csv", index_col=0)

In [3]:
class HospitalEncoder:

    @staticmethod
    def encode_admission(x):
        data = {'Urgent': 2, 'Trauma': 1, 'Emergency': 3}
        return data[x]

    @staticmethod
    def encode_severity(x):
        data = {'Moderate': 2, 'Extreme': 3, 'Minor': 1}
        return data[x]

    @staticmethod
    def encode_age(x):
        data = {'21-30': 2, '51-60': 5, '71-80': 7, '11-20': 1, 
        '31-40': 3, '0-10': 0, '61-70': 6, '41-50': 4, '81-90': 4, '91-100': 9}
        return data[x]

In [4]:
def normalize_df(df):
    for col in df.columns:
        df[col] = df[col]/df[col].max()
    return df

In [33]:
# Quitamos las columnas no relevantes
filtered = df.drop(['1', '3', '4', '7', '8','9', '10', '11','14','16'], axis=1)

# Aplicamos los encoders
filtered['12'] = filtered['12'].apply(HospitalEncoder.encode_admission)
filtered['13'] = filtered['13'].apply(HospitalEncoder.encode_severity)
filtered['15'] = filtered['15'].apply(HospitalEncoder.encode_age)

# Get dummies
features = filtered[['2', '6']]
features = pd.get_dummies(features)
filtered.drop(['2', '6'], axis=1, inplace=True)
train = pd.concat([filtered, features], axis=1)

# Creamos X y lo normalizamosC
X = np.array(normalize_df(train.drop(['17'], axis=1)))

# Cambiamos Nans de la columna 9 por la media
simp = SimpleImputer()
X = simp.fit_transform(X)

# Creamos y con encoder
le = LabelEncoder()
y = np.array(normalize_df(pd.DataFrame(le.fit_transform(train['17']))))

# Conjuntos train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

In [34]:
X_train.shape

(80000, 16)

In [51]:
model = keras.Sequential([
    layers.InputLayer(input_shape=(16)),
    layers.Dense(64, activation='sigmoid'),
    layers.Dense(32, activation='relu'),
    layers.Dense(11, activation="softmax")
  ])

In [52]:
model.compile(optimizer='adam', 
              loss="sparse_categorical_crossentropy", 
              metrics='accuracy')

In [53]:
model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1a709476788>

In [54]:
y_pred = model.predict(X_test)

In [55]:
pd.DataFrame(y_pred)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
1,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
2,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
3,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
4,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
...,...,...,...,...,...,...,...,...,...,...,...
19995,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
19996,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
19997,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018
19998,0.976529,0.023332,0.000013,0.000016,0.000014,0.000018,0.000014,0.000015,0.000015,0.000017,0.000018


In [117]:
y_pred = le.inverse_transform(y_pred)
y_pred

ValueError: y should be a 1d array, got an array of shape (20000, 11) instead.