In [None]:
!pip install antigranular &> /dev/null

In [None]:
import antigranular as ag
session = ag.login(<client_id>,<client_secret>, competition = "Fraud Detection Hackathon with Privacy Village #DPD25FEST")

Dataset "Transaction Fraud Hackathon Dataset" loaded to the kernel as [92mtransaction_fraud_hackathon_dataset[0m
Key Name                       Value Type     
---------------------------------------------
test_x                         DataFrame      
train_x                        PrivateDataFrame
train_y                        PrivateDataFrame

Connected to Antigranular server session id: 2925d3bc-1afa-4052-9e6d-3d382980ec09, the session will time out if idle for 25 minutes
Cell magic '%%ag' registered successfully, use `%%ag` in a notebook cell to execute your python code on Antigranular private python server
🚀 Everything's set up and ready to roll!


In [None]:
%%ag
x_train = transaction_fraud_hackathon_dataset["train_x"]
y_train = transaction_fraud_hackathon_dataset["train_y"]
x_test = transaction_fraud_hackathon_dataset["test_x"]

In [None]:
%%ag
# Import necessary libraries
import tensorflow as tf
from op_pandas import standard_scaler, PrivateDataFrame
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from op_tensorflow import PrivateKerasModel, PrivateDataLoader

# Define a Sequential neural network model with multiple layers
seqM = Sequential([
    # First hidden layer: Wide input layer with 256 neurons
    # Input shape of 9 features, using L2 regularization to prevent overfitting
    Dense(256, input_shape=(9,), kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    # Batch normalization to normalize layer inputs, helping stabilize training
    BatchNormalization(momentum=0.9),
    # ReLU activation function to introduce non-linearity
    tf.keras.layers.ReLU(),
    # Dropout layer to reduce overfitting by randomly setting input units to 0
    Dropout(0.3),

    # Second hidden layer with 128 neurons
    Dense(128, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(momentum=0.9),
    tf.keras.layers.ReLU(),
    Dropout(0.3),

    # Third hidden layer with 64 neurons
    Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(momentum=0.9),
    tf.keras.layers.ReLU(),
    Dropout(0.3),

    # Fourth hidden layer with 32 neurons
    Dense(32, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(momentum=0.9),
    tf.keras.layers.ReLU(),

    # Output layer for binary classification using sigmoid activation
    Dense(1, activation='sigmoid')
])

# Create a Differentially Private Keras Model
# Adds privacy protection to the neural network
dp_model = PrivateKerasModel(
    model=seqM,
    l2_norm_clip=1,  # Clip gradient norms to prevent excessive updates
    noise_multiplier=1  # Add noise to gradients to protect individual data privacy
)

# Configure Adam optimizer with carefully tuned hyperparameters
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.0001,  # Small learning rate for stable convergence
    beta_1=0.9,  # Exponential decay rate for first moment estimates
    beta_2=0.999,  # Exponential decay rate for second moment estimates
    epsilon=1e-7,  # Small value to prevent division by zero
    amsgrad=True  # Use AMSGrad variant to improve convergence
)

# Compile the differentially private model
# Specifies loss function, optimizer, and evaluation metrics
dp_model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',  # Standard loss for binary classification
    metrics=['accuracy']  # Track model's accuracy during training
)

  import distutils as _distutils

2025-03-03 22:48:02.922341: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-03 22:48:02.925536: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-03 22:48:03.041360: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-03 22:48:03.041396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-03 22:48:03.042322: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to regi

In [None]:
%%ag
#Using batch size 256 for better generalization
data_loader = PrivateDataLoader(feature_df=x_train , label_df=y_train, batch_size=256)

In [None]:
%%ag
#I'm training the model running this several time, the best score I get I run this code 3 times
dp_model.fit(x=data_loader, epochs=50,shuffle=True)

In [None]:
%%ag
#make predictions
test=PrivateDataFrame(x_test).copy()
y_pred = dp_model.predict(test, label_columns=["output"])

In [None]:
%%ag
#prepare predictions, usually these datasets are umbalanced so I tried with different thresholds and the best was the default last on the public leaderbord may on the private leaderborad my best score was with another threshhold
def f(x: float) -> float:
    return 1 if x > 0.5 else 0

y_pred["output"] = y_pred["output"].map(f, output_bounds=(0, 1))

In [None]:
%%ag
#submit predictions
result = submit_predictions(y_pred)