In [None]:
import numpy as np
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder as OHE, LabelEncoder as LE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv('./data/train.csv')

In [None]:
data.head()

In [None]:
data.isna().sum()

In [None]:
data.info()

In [None]:
mapper = {
    'N_Days': data['N_Days'].median(),
    'Drug': data['Drug'].mode().iloc[0],
    'Ascites': data['Ascites'].mode().iloc[0],
    'Hepatomegaly': data['Hepatomegaly'].mode().iloc[0],
    'Spiders': data['Spiders'].mode().iloc[0],
    'Cholesterol': data['Cholesterol'].median(),
    'Copper': data['Copper'].median(),
    'Alk_Phos': data['Alk_Phos'].median(),
    'SGOT': data['SGOT'].median(),
    'Tryglicerides': data['Tryglicerides'].median(),
    'Platelets': data['Platelets'].median(),
    'Prothrombin': data['Prothrombin'].median(),
    
    }

In [None]:
data_cleaned = data.fillna(mapper)

In [None]:
X = data_cleaned.drop(columns=['Drug', 'Hepatomegaly', 'Spiders', 'Edema', 'Sex', 'Ascites', 'Status', 'id'])
X = StandardScaler().fit_transform(X)
y = data_cleaned['Status']

ohe_obj = OHE()
ohe = ohe_obj.fit_transform(data_cleaned[['Drug', 'Hepatomegaly', 'Spiders', 'Edema', 'Sex', 'Ascites']].values).toarray()
le = LE()
y = le.fit_transform(y)
X = np.column_stack([X, ohe])

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
X.shape, y.shape, xtrain.shape, ytrain.shape

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Input((xtrain.shape[1],)),
        tf.keras.layers.Dense(128, activation=tf.keras.layers.LeakyReLU(alpha=0.01)),
        tf.keras.layers.Dense(64, activation=tf.keras.layers.LeakyReLU(alpha=0.01)),
        tf.keras.layers.Dense(32, activation=tf.keras.layers.LeakyReLU(alpha=0.01)),
        tf.keras.layers.Dense(16, activation=tf.keras.layers.LeakyReLU(alpha=0.01)),
        tf.keras.layers.Dense(3, activation='softmax'),
    ]
)

In [None]:
model.compile(optimizer='adam', loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])

In [None]:
model.fit(xtrain, ytrain, epochs=7, batch_size=25)

In [None]:
model.layers[-1].output

In [None]:
model.evaluate(xtest, ytest)

In [None]:
test=pd.read_csv('./data/test.csv')

In [None]:
test_cleaned = test.fillna(mapper)

In [None]:
X_sub = test_cleaned.drop(columns=['Drug', 'Hepatomegaly', 'Spiders', 'Edema', 'Sex', 'Ascites', 'id'])
X_sub = StandardScaler().fit_transform(X_sub)

ohe = ohe_obj.transform(test_cleaned[['Drug', 'Hepatomegaly', 'Spiders', 'Edema', 'Sex', 'Ascites']].values).toarray()
X_sub = np.column_stack([X_sub, ohe])

In [None]:
logits = model.predict(X_sub)

In [None]:
logits

In [None]:
label_mappers = {}
for i in range(0, len(y)):
    label_mappers[y[i]] = data_cleaned['Status'].iloc[i]

In [None]:
label_mappers

In [None]:
list(y)

In [None]:
list(data['Status'])

In [None]:
# 'D': 2
# 'C': 0
# 'CL': 1

In [None]:
sub = pd.DataFrame(data=np.column_stack([test_cleaned['id'], logits]), columns=['id', 'C', 'CL', 'D'])

In [None]:
sub['id'] = test_cleaned['id']

In [None]:
sub.to_csv('./submission.csv')

In [None]:
X.shape