In [143]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle


In [144]:
mean_data = 0

def get_data(filename):
    df = pd.read_csv(filename)
    mean_data = df.mean()
    df = df.fillna(mean_data)
    X = tf.convert_to_tensor(df.drop(["Id", "Week", "target"], axis=1).to_numpy())
    shape = X.shape
    print(shape)
    print((shape[0] // 4, 4, shape[1]))
    X = tf.reshape(X, (shape[0] // 4, 4, shape[1]))
    
    y = tf.one_hot(tf.convert_to_tensor(df["target"].to_numpy(), dtype=tf.int32)[::4], 2)
    
    return X, y

In [145]:
def get_test_data(filename):
    df = pd.read_csv(filename)
    df = df.fillna(mean_data)
    
    X = tf.convert_to_tensor(df.drop(["Id", "Week"], axis=1).to_numpy())
    shape = X.shape
    print(shape)
    X = tf.reshape(X, (shape[0] // 4, 4, shape[1]))
    
    ids = tf.convert_to_tensor(df["Id"].to_numpy(), dtype=tf.int64)[::4]
    
    return X, ids

In [146]:
df = pd.read_csv("train.csv")
df = df.fillna(df.mean())

In [148]:
X, y = get_data("train.csv")
# amount = int(len(X) * 0.2)
amount = 1
X_train, X_eval = X[:len(X) - amount], X[len(X) - amount:]
y_train, y_eval = y[:len(X) - amount], y[len(X) - amount:]

(30860, 49)
(7715, 4, 49)


In [149]:
model = tf.keras.Sequential([
    tf.keras.layers.Normalization(axis=1),
    tf.keras.layers.InputLayer(49*4),
    tf.keras.layers.Dense(100, activation='sigmoid'),
    tf.keras.layers.Dense(100, activation='sigmoid'),
    tf.keras.layers.Dense(2, activation='softmax'),
])

In [150]:
model = tf.keras.Sequential([
    tf.keras.layers.Normalization(axis=2),
    tf.keras.layers.InputLayer((4, 49)),
    tf.keras.layers.SimpleRNN(50, activation='sigmoid', return_sequences=True),
    tf.keras.layers.Reshape((-1,)),
    tf.keras.layers.Dense(50, activation='sigmoid'),
    tf.keras.layers.Dense(2, activation='softmax'),
])

In [151]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.AUC(), tf.keras.metrics.Recall()]
)

In [152]:
model.fit(X_train, y_train, epochs=20, batch_size=50)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x26387436610>

In [153]:
tf.keras.utils.plot_model(model, show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [154]:
model.evaluate(X_eval, y_eval)



[0.27901315689086914, 1.0, 1.0, 1.0]

In [155]:
X_test, ids = get_test_data("test.csv")
X_test

(13228, 49)


<tf.Tensor: shape=(3307, 4, 49), dtype=float64, numpy=
array([[[0.00000000e+00, 0.00000000e+00, 1.48809524e-02, ...,
         9.02439024e-01, 4.32100000e+03, 6.42857143e-01],
        [0.00000000e+00, 0.00000000e+00, 1.48809524e-02, ...,
         9.04761905e-01, 6.26500000e+03, 7.14285714e-01],
        [0.00000000e+00, 0.00000000e+00, 1.48809524e-02, ...,
         9.00000000e-01, 2.00300000e+03, 8.21428571e-01],
        [0.00000000e+00, 0.00000000e+00, 1.48809524e-02, ...,
         9.37500000e-01, 1.06900000e+03, 6.56250000e-01]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         1.90476190e-01, 2.41600000e+03, 9.52380952e-02],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         1.81818182e-01, 1.53800000e+03, 3.03030303e-02],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         1.31578947e-01, 2.19800000e+03, 1.31578947e-01],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         1.76470588e-01, 2.81100000e+03, 

In [156]:
y_predict = model.predict(X_test)

In [157]:
y_predict

array([[0.9967225 , 0.00327747],
       [0.98934513, 0.0106548 ],
       [0.14755596, 0.85244405],
       ...,
       [0.9983474 , 0.00165261],
       [0.99819297, 0.00180706],
       [0.6296453 , 0.3703547 ]], dtype=float32)

In [158]:
output = pd.DataFrame({"Id": ids, "Predicted": tf.cast(tf.argmax(y_predict, axis=1), dtype=tf.float32)})
output = output.set_index("Id")

In [159]:
output.to_csv('submission.csv')

In [160]:
output[output.Predicted != 0]

Unnamed: 0_level_0,Predicted
Id,Unnamed: 1_level_1
-2649749947427248381,1.0
2491230068529122787,1.0
3072806373507695662,1.0
7746549699970521640,1.0
1826411158033066922,1.0
...,...
7198661691036867853,1.0
-1455587686421958496,1.0
-8577792393317536620,1.0
8234138404209643668,1.0
