<a href="https://colab.research.google.com/github/Pranavgit6167/Suspicious-Transaction-Detection/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
df = pd.read_csv("/content/BitcoinHeistData.csv")

In [3]:
print("Dataset Shape:", df.shape)
print("Missing Values:\n", df.isnull().sum())
print("Label Counts:\n", df["label"].value_counts())

Dataset Shape: (310439, 10)
Missing Values:
 address      0
year         1
day          1
length       1
weight       1
count        1
looped       1
neighbors    1
income       1
label        1
dtype: int64
Label Counts:
 label
white                          269025
paduaCryptoWall                 12390
montrealCryptoLocker             9315
princetonCerber                  9223
princetonLocky                   6625
montrealCryptXXX                 2419
montrealNoobCrypt                 483
montrealDMALockerv3               354
montrealDMALocker                 251
montrealSamSam                     62
montrealCryptoTorLocker2015        55
montrealGlobeImposter              55
montrealGlobev3                    34
montrealGlobe                      32
montrealWannaCry                   28
montrealRazy                       13
montrealAPT                        11
paduaKeRanger                      10
montrealFlyper                      9
montrealXTPLocker                   8
montrealXLo

In [4]:

df = df.dropna()

In [25]:
df.head()

Unnamed: 0,address,year,day,length,weight,count,looped,neighbors,income,label
0,111K8kZAEnJg245r2cM6y9zgJGHZtJPy6,2017.0,11.0,0.125,1.672391e-05,0.0,0.0,0.001908,2e-06,26
1,1123pJv8jzeFQaCV4w644pzQJzVWay2zcA,2016.0,132.0,0.305556,4.899584e-07,0.0,0.0,0.0,2e-06,27
2,112536im7hy6wtKbpH1qYDWtTyMRAcA2p7,2016.0,246.0,0.0,0.002006869,0.0,0.0,0.001908,4e-06,26
3,1126eDRw2wqSkWosjTCre8cjjQW8sSeWH7,2016.0,322.0,0.5,7.839334e-06,0.0,0.0,0.001908,1e-06,26
4,1129TSjKtx65E35GiUo4AYVeyo48twbrGX,2016.0,238.0,1.0,0.0001461972,0.033801,0.0,0.0,4e-06,27


In [5]:
features = ["length", "weight", "count", "looped", "neighbors", "income"]
target = "label"

In [6]:
label_encoder = LabelEncoder()
df[target] = label_encoder.fit_transform(df[target])

In [7]:
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

In [8]:
X = df[features].values
y = df[target].values

In [9]:
X = X.reshape((X.shape[0], 1, X.shape[1]))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
num_classes = len(np.unique(y))

In [12]:
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(num_classes, activation='softmax')  # Multiclass classification
])

  super().__init__(**kwargs)


In [13]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [14]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)

Epoch 1/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.8710 - loss: 0.7169 - val_accuracy: 0.8797 - val_loss: 0.5402
Epoch 2/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 4ms/step - accuracy: 0.8807 - loss: 0.5391 - val_accuracy: 0.8799 - val_loss: 0.5353
Epoch 3/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.8792 - loss: 0.5403 - val_accuracy: 0.8803 - val_loss: 0.5318
Epoch 4/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3ms/step - accuracy: 0.8808 - loss: 0.5289 - val_accuracy: 0.8800 - val_loss: 0.5258
Epoch 5/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.8807 - loss: 0.5213 - val_accuracy: 0.8806 - val_loss: 0.5187
Epoch 6/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3ms/step - accuracy: 0.8809 - loss: 0.5126 - val_accuracy: 0.8803 - val_loss: 0.5058
Epoch 7/20

In [24]:
 # Save the model
model_path = "/content/saved_rnn_model.keras"
model.save(model_path)
print(f"Model saved to {model_path}")

Model saved to /content/saved_rnn_model.keras


In [None]:
#Run only for loading model
print("Saved model found. Loading the model...")
model = load_model(model_path)

In [15]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m1941/1941[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8840 - loss: 0.4491
Test Loss: 0.4463
Test Accuracy: 0.8844


In [17]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

[1m1941/1941[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step


In [18]:
precision = precision_score(y_test, y_pred_classes, average='weighted')
recall = recall_score(y_test, y_pred_classes, average='weighted')
f1 = f1_score(y_test, y_pred_classes, average='weighted')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
print("\nClassification Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print("\nClassification Report:\n", classification_report(y_test, y_pred_classes))


Classification Metrics:
Precision: 0.8492
Recall: 0.8844
F1-Score: 0.8450

Classification Report:
               precision    recall  f1-score   support

           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00       459
           4       0.60      0.03      0.05      1913
           5       0.00      0.00      0.00        14
           6       0.00      0.00      0.00        48
           7       0.00      0.00      0.00        74
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         2
          10       0.00      0.00      0.00         7
          11       0.00      0.00      0.00        12
          12       0.00      0.00      0.00         4
          14       0.00      0.00      0.00       103
          15       0.00      0.00      0.00         5
          17       0.00      0.00      0.00        13
          18       0.00      0.00      0.00         3
          19       0.00      0.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [26]:
# Predict a single input
X_pred = np.array([0.305556,	4.899584e-07,	0.000000,	0.0	,0.000000	,0.000002])
X_pred = scaler.transform([X_pred])  # Apply scaling
X_pred = X_pred.reshape(1, 1, len(X_pred[0]))



In [27]:
y_pred_single = model.predict(X_pred)
predicted_label = np.argmax(y_pred_single, axis=1)
print(f"Predicted Label for Single Input: {label_encoder.inverse_transform(predicted_label)[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
Predicted Label for Single Input: white


In [None]:
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)

# Convert to a dictionary
class_weight_dict = dict(enumerate(class_weights))

# Pass class weights to model.fit()
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test),
    class_weight=class_weight_dict,
    verbose=1
)


Epoch 1/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 4ms/step - accuracy: 0.8748 - loss: 7.0569 - val_accuracy: 0.8725 - val_loss: 0.7206
Epoch 2/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4ms/step - accuracy: 0.8707 - loss: 5.4854 - val_accuracy: 0.8734 - val_loss: 0.8209
Epoch 3/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.8707 - loss: 5.1585 - val_accuracy: 0.8724 - val_loss: 0.9051
Epoch 4/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 4ms/step - accuracy: 0.8686 - loss: 4.7107 - val_accuracy: 0.8695 - val_loss: 0.9934
Epoch 5/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.8710 - loss: 4.1837 - val_accuracy: 0.8673 - val_loss: 1.0736
Epoch 6/20
[1m7761/7761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.8681 - loss: 5.6538 - val_accuracy: 0.8687 - val_loss: 0.9347
Epoch 7/20