# Installs

In [1]:
pip install "jax[tpu]>=0.4.6" -f https://storage.googleapis.com/jax-releases/libtpu_releases.html

Looking in links: https://storage.googleapis.com/jax-releases/libtpu_releases.html


In [2]:
!pip install pennylane
!pip install jax jaxlib
!pip install optax
!pip install pennylane-catalyst

Collecting pennylane
  Downloading PennyLane-0.39.0-py3-none-any.whl.metadata (9.2 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.15.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting autograd (from pennylane)
  Downloading autograd-1.7.0-py3-none-any.whl.metadata (7.5 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pennylane-lightning>=0.39 (from pennylane)
  Downloading PennyLane_Lightning-0.39.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (26 kB)
Downloading PennyLane-0.39.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading autoray-0.7.0-py3-none-any.whl (930 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m930.0/93

In [3]:
import pennylane as qml
from jax import numpy as jnp
import optax
import catalyst

In [4]:
import os

In [5]:
import os
os.environ['JAX_PLATFORMS'] = ''


## Data Install and PreProcess

### Function

In [6]:
pip install imblearn

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Collecting imbalanced-learn (from imblearn)
  Downloading imbalanced_learn-0.12.4-py3-none-any.whl.metadata (8.3 kB)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Downloading imbalanced_learn-0.12.4-py3-none-any.whl (258 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.3/258.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: imbalanced-learn, imblearn
Successfully installed imbalanced-learn-0.12.4 imblearn-0.0


In [7]:
from imblearn.over_sampling import SMOTE
import pandas as pd

from imblearn.over_sampling import SMOTE
import pandas as pd

def balance_with_smote(df, target_col='isFraud', smote_ratio=0.2, random_state=42):
    """Apply SMOTE to increase the minority class size by a specified ratio."""
    X = df.drop(columns=[target_col])
    y = df[target_col]

    # Apply SMOTE with a limited ratio
    smote = SMOTE(sampling_strategy=smote_ratio, random_state=random_state)
    X_resampled, y_resampled = smote.fit_resample(X, y)

    balanced_df = pd.DataFrame(X_resampled, columns=X.columns)
    balanced_df[target_col] = y_resampled
    return balanced_df

In [8]:
def count_fraud_percentage(df, fraud_col='isFraud'):
    """
    Calculate the count and percentage of fraud cases in a DataFrame.

    """

    fraud_count = df[fraud_col].sum()

    total_count = len(df)
    fraud_percentage = (fraud_count / total_count) * 100

    return fraud_count, fraud_percentage

### Loading - Processing

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
# Install the Kaggle library
!pip install kaggle

# Upload kaggle.json file
from google.colab import files
files.upload()  # This will prompt you to upload kaggle.json

# Make a directory for Kaggle and move the file there
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Now you can download the dataset
!kaggle datasets download -d ealaxi/paysim1 -p /content/datasets --unzip

print("Dataset downloaded to: /content/datasets")



Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/ealaxi/paysim1
License(s): CC-BY-SA-4.0
Downloading paysim1.zip to /content/datasets
 97% 173M/178M [00:02<00:00, 140MB/s]
100% 178M/178M [00:02<00:00, 90.9MB/s]
Dataset downloaded to: /content/datasets


In [11]:
df = pd.read_csv("/content/datasets/PS_20174392719_1491204439457_log.csv",delimiter=',')

In [12]:
# Sort by 'step' to ensure time ordering
finance_df_sorted = df.sort_values(by='step')

# Calculate the interval for sampling evenly across time
interval = len(finance_df_sorted) // 10000

# Sample the dataset
sampled_df = finance_df_sorted.iloc[::interval][:10000]

# Display or save the sampled dataset
sampled_df.reset_index(drop=True, inplace=True)
print(sampled_df)

      step      type     amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0        1   CASH_IN   42012.45  C1137667747      350501.24       350501.24   
1        1  CASH_OUT  361980.67  C1290698656           0.00            0.00   
2        1   PAYMENT    3056.61  C1800592689           0.00            0.00   
3        1   CASH_IN   92971.79  C2117642238     6588417.88      6681389.66   
4        1   PAYMENT    4518.42  C1504321715        8790.00         4271.58   
...    ...       ...        ...          ...            ...             ...   
9995   710  CASH_OUT  182572.59  C2110473217        2884.00            0.00   
9996   710  CASH_OUT   94011.34   C488292913       82660.00            0.00   
9997   714   PAYMENT    1609.45  C2059265367       63205.00        61595.55   
9998   714  CASH_OUT   59889.00  C1547853635       14703.00            0.00   
9999   715  CASH_OUT  270711.74  C1261501464        8236.00            0.00   

         nameDest  oldbalanceDest  newbalanceDest  

In [13]:
finance_df=sampled_df

In [14]:
df_cleaned = finance_df.loc[(finance_df['type'].isin(['CASH_OUT', 'TRANSFER'])),:]
df_cleaned.drop(columns=['nameOrig','nameDest','isFlaggedFraud'],inplace=True)
df_cleaned['type'] = finance_df['type'].map({'CASH_OUT': 0, 'TRANSFER': 1})
df_cleaned

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned.drop(columns=['nameOrig','nameDest','isFlaggedFraud'],inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['type'] = finance_df['type'].map({'CASH_OUT': 0, 'TRANSFER': 1})


Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud
1,1,0.0,361980.67,0.0,0.0,6057072.17,6453430.91,0
6,3,1.0,18873.10,0.0,0.0,54985.05,18021.11,0
10,6,1.0,308882.21,21996.0,0.0,186.00,1906158.59,0
12,7,1.0,77911.42,73140.0,0.0,93689.50,8748005.25,0
21,7,0.0,119162.63,0.0,0.0,4226811.09,14247328.87,0
...,...,...,...,...,...,...,...,...
9993,709,0.0,224498.31,20953.0,0.0,0.00,224498.31,0
9995,710,0.0,182572.59,2884.0,0.0,200185.23,382757.83,0
9996,710,0.0,94011.34,82660.0,0.0,179367.89,273379.23,0
9998,714,0.0,59889.00,14703.0,0.0,355460.57,415349.57,0


In [15]:
df_smote_balanced = balance_with_smote(df_cleaned, target_col='isFraud', smote_ratio=0.2, random_state=42)
fraud_count_smote, fraud_percentage_smote = count_fraud_percentage(df_smote_balanced)
print(f"After SMOTE - Fraudulent: {fraud_count_smote}, Fraud Percentage: {fraud_percentage_smote:.2f}%")
final_df=df_smote_balanced

After SMOTE - Fraudulent: 875, Fraud Percentage: 16.66%


In [16]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
final_df[final_df.columns] = scaler.fit_transform(final_df[final_df.columns])
# final_df
final_df = final_df.sort_values(by='step').astype(float)

In [17]:
print(type(final_df))

<class 'pandas.core.frame.DataFrame'>


In [18]:
# final_df.drop(columns=['step'],inplace=True)
final_df

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud
0,0.000000,0.0,0.010448,0.000000,0.000000,0.060697,0.064573,0.0
1,0.002801,1.0,0.000542,0.000000,0.000000,0.000551,0.000180,0.0
2,0.007003,1.0,0.008915,0.000884,0.000000,0.000002,0.019073,0.0
4,0.008403,0.0,0.003438,0.000000,0.000000,0.042356,0.142558,0.0
5,0.008403,0.0,0.002173,0.006886,0.006449,0.001001,0.000454,0.0
...,...,...,...,...,...,...,...,...
4379,0.991597,0.0,0.006479,0.000842,0.000000,0.000000,0.002246,0.0
4381,0.992997,0.0,0.002711,0.003321,0.000000,0.001797,0.002735,0.0
4380,0.992997,0.0,0.005268,0.000116,0.000000,0.002006,0.003830,0.0
4382,0.998599,0.0,0.001726,0.000591,0.000000,0.003562,0.004156,0.0


# Model

In [19]:
import pandas as pd
import numpy as np
from jax import numpy as jnp

In [21]:
features = final_df[['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']]
target = final_df['isFraud']  # Binary target

n_wires = 5

# Step 2: Normalize the features
# Scaling the features to the range [0, 1] for compatibility with quantum circuits
features_normalized = features / features.max()

# Step 3: Convert to JAX arrays
data = jnp.array(features_normalized).transpose()  # Features (inputs)
target = jnp.array(target)  # Binary target (outputs)

# Print the shapes for verification
print("Data shape:", data.shape)  # Should be (n_samples, n_features)
print("Target shape:", target.shape)  # Should be (n_samples,)

RuntimeError: Unable to initialize backend 'tpu': INVALID_ARGUMENT: Mismatched PJRT plugin PJRT API version (0.54) and framework PJRT API version 0.51). (set JAX_PLATFORMS='' to automatically choose an available backend)

In [None]:
data

In [None]:
dev = qml.device("lightning.qubit", wires=n_wires)
import jax
@qml.qnode(dev)
def circuit(data, weights):
    """Quantum circuit ansatz"""
    # jax.debug.print("Data array: {}", data)
    # jax.debug.print("Data[0]: {}", data[0])

    @qml.for_loop(0, n_wires, 1)
    def data_embedding(i):
        qml.RY(data[i], wires=i)

    data_embedding()

    @qml.for_loop(0, n_wires, 1)
    def ansatz(i):
        qml.RX(weights[i, 0], wires=i)
        qml.RY(weights[i, 1], wires=i)
        qml.RX(weights[i, 2], wires=i)
        qml.CNOT(wires=[i, (i + 1) % n_wires])

    ansatz()

    result = qml.expval(qml.sum(*[qml.PauliZ(i) for i in range(n_wires)]))
    # print("Circuit output (expectation value):", result)
    return result

In [None]:
circuit = qml.qjit(catalyst.vmap(circuit, in_axes=(1, None)))

In [None]:
def my_model(data, weights, bias):
    return circuit(data, weights) + bias

@qml.qjit
def loss_fn(params, data, targets):
    # print("Here is the: ",data)
    predictions = my_model(data, params["weights"], params["bias"])
    loss = jnp.sum((targets - predictions) ** 2 / len(data))
    return loss

In [None]:
weights = jnp.ones([n_wires, 3])
bias = jnp.array(0.)
params = {"weights": weights, "bias": bias}

In [None]:
loss_fn(params, data, target)

print(qml.qjit(catalyst.grad(loss_fn, method="fd"))(params, data, target))

In [None]:
opt = optax.adam(learning_rate=0.3)

@qml.qjit
def update_step(i, args):
    params, opt_state, data, targets = args

    grads = catalyst.grad(loss_fn, method="fd")(params, data, targets)
    updates, opt_state = opt.update(grads, opt_state)
    params = optax.apply_updates(params, updates)

    return (params, opt_state, data, targets)

loss_history = []

opt_state = opt.init(params)

for i in range(20):
    params, opt_state, _, _ = update_step(i, (params, opt_state, data, target))
    loss_val = loss_fn(params, data, target)

    # if i % 5 == 0:
    print(f"Step: {i} Loss: {loss_val}")

    loss_history.append(loss_val)