In [71]:
# Install the Antigranular package
!pip install antigranular &> /dev/null

In [72]:
import antigranular as ag
session = ag.login(<client_id>,<client_secret>, competition = "Heart Disease Prediction Hackathon")

Dataset "Heart Disease Prediction Hackathon Dataset" loaded to the kernel as [92mheart_disease_prediction_hackathon_dataset[0m
Key Name                       Value Type     
---------------------------------------------
train_y                        PrivateDataFrame
train_x                        PrivateDataFrame
test_x                         DataFrame      

Connected to Antigranular server session id: 9251731f-837b-4138-a311-dc5792be9941, the session will time out if idle for 25 minutes
Cell magic '%%ag' registered successfully, use `%%ag` in a notebook cell to execute your python code on Antigranular private python server
🚀 Everything's set up and ready to roll!


In [73]:
%%ag
x_train = heart_disease_prediction_hackathon_dataset["train_x"]
y_train = heart_disease_prediction_hackathon_dataset["train_y"]
x_test = heart_disease_prediction_hackathon_dataset["test_x"]

In [74]:
%%ag
x_train.info()

+----+----------+-------------+---------------+---------+------------+
|    | Column   | numerical   | categorical   | dtype   | bounds     |
|----+----------+-------------+---------------+---------+------------|
|  0 | age      | True        | False         | int64   | (21, 86)   |
|  1 | sex      | True        | False         | int64   | (0, 1)     |
|  2 | bp       | True        | False         | int64   | (80, 215)  |
|  3 | ch       | True        | False         | int64   | (102, 597) |
|  4 | bs       | True        | False         | int64   | (67, 157)  |
|  5 | phr      | True        | False         | int64   | (62, 222)  |
+----+----------+-------------+---------------+---------+------------+



In [75]:
%%ag
y_train.info()

+----+-----------+-------------+---------------+---------+----------+
|    | Column    | numerical   | categorical   | dtype   | bounds   |
|----+-----------+-------------+---------------+---------+----------|
|  0 | condition | True        | False         | int64   | (0, 1)   |
+----+-----------+-------------+---------------+---------+----------+



In [76]:
%%ag
# We can start by exploring the data, carefully using our epsilon
describe = x_train.describe(eps=0.1)
ag_print(describe)

               age          sex  ...           bs          phr
count  7416.000000  8362.000000  ...  8001.000000  8143.000000
mean     51.807030     0.662846  ...   100.369160   157.922115
std      15.127039     0.377032  ...    23.479141    36.508083
min      21.000000     0.000000  ...    67.000000    62.000000
25%      46.294875     0.211043  ...    91.325619   164.564286
50%      64.359468     0.928256  ...   104.355429   142.679817
75%      64.592915     0.991600  ...   138.982408   140.699427
max      68.189380     0.984142  ...   131.515120   182.134267

[8 rows x 6 columns]



In [77]:
%%ag
# We can start by exploring the data, carefully using our epsilon
describe = y_train.describe(eps=0.1)
ag_print(describe)

         condition
count  8549.000000
mean      0.520351
std       0.489844
min       0.000000
25%       0.031003
50%       0.932072
75%       0.925081
max       0.904621



In [78]:
%%ag
# x_test is a public test set, so we can print it without using epsilon
ag_print(x_test)

      age  sex   bp   ch   bs  phr
0      71    1  128  326   95  117
1      61    1  153  270   98  123
2      59    1  113  236  106  181
3      69    0  109  151  109  108
4      55    0  137  235  101  150
...   ...  ...  ...  ...  ...  ...
1995   60    1  128  261  112  143
1996   50    1  143  216   94  100
1997   64    1  120  172   87  142
1998   56    1  158  294   82  144
1999   69    0  117  559  112  157

[2000 rows x 6 columns]



In [90]:
%%ag
import tensorflow as tf
from op_pandas import standard_scaler, PrivateDataFrame
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from op_tensorflow import PrivateKerasModel, PrivateDataLoader


# Normal keras model
seqM = Sequential([
    Dense(32, activation='relu', input_shape=(6,)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Create DP keras model
dp_model = PrivateKerasModel(model=seqM, l2_norm_clip=1, noise_multiplier=1)

# Use a standard (non-DP) optimizer directly from keras.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

# PrivateKerasModel uses similar API as standard Keras
dp_model.compile(
	optimizer = optimizer,
	loss = 'binary_crossentropy',
	metrics = ["accuracy"]
)

  if (distutils.version.LooseVersion(tf.__version__) <
  distutils.version.LooseVersion(required_tensorflow_version)):



In [91]:
%%ag
x_train_scaled = standard_scaler(x_train, eps=.1)
x_train_scaled.info()

+----+----------+-------------+---------------+---------+-----------------------------------------+
|    | Column   | numerical   | categorical   | dtype   | bounds                                  |
|----+----------+-------------+---------------+---------+-----------------------------------------|
|  0 | age      | True        | False         | float64 | (-2.6299713225201486,                   |
|    |          |             |               |         | 2.2685366049445967)                     |
|  1 | sex      | True        | False         | float64 | (-1.7519738492551058,                   |
|    |          |             |               |         | 0.9516293508455532)                     |
|  2 | bp       | True        | False         | float64 | (-2.8426144496375385, 4.63314904236739) |
|  3 | ch       | True        | False         | float64 | (-1.2909434225837932,                   |
|    |          |             |               |         | 3.2296519043050655)                     |


In [92]:
%%ag
data_loader = PrivateDataLoader(feature_df=x_train_scaled , label_df=y_train, batch_size=4)

In [94]:
%%ag
dp_model.fit(x=data_loader, epochs=5, target_delta=1e-5)

Epoch 1/5

2000/2000 - 28s - loss: 0.4877 - accuracy: 0.7409 - 28s/epoch - 14ms/step

Epoch 2/5

2000/2000 - 29s - loss: 0.4824 - accuracy: 0.7478 - 29s/epoch - 14ms/step

Epoch 3/5

2000/2000 - 28s - loss: 0.4716 - accuracy: 0.7560 - 28s/epoch - 14ms/step

Epoch 4/5

2000/2000 - 28s - loss: 0.4783 - accuracy: 0.7586 - 28s/epoch - 14ms/step

Epoch 5/5

2000/2000 - 28s - loss: 0.4740 - accuracy: 0.7593 - 28s/epoch - 14ms/step



In [95]:
%%ag
x_test_scaler = standard_scaler(PrivateDataFrame(x_test), eps=.1)
y_pred = dp_model.predict(x_test_scaler, label_columns=["output"])

 1/63 [..............................] - ETA: 18s
14/63 [=====>........................] - ETA: 0s 



In [96]:
%%ag
# Note that the predictions are a float scalar
# so we scale it
def f(x: float) -> float:
  if x > 0.5:
    return 1
  else:
    return 0

y_pred["output"] = y_pred["output"].map(f, output_bounds=(0, 1))

In [97]:
%%ag
result = submit_predictions(y_pred)

score: {'leaderboard': 0.6909053730275597, 'logs': {'BIN_ACC': 0.703297753130009, 'LIN_EPS': -0.012392380102449224}}

