In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_path = '/content/drive/MyDrive/SEM3/Project/Dataset/creditcard.csv'  # Path to the CSV file
credit_data = pd.read_csv(data_path)


In [None]:
credit_data = credit_data.drop(columns=['Time'])

In [None]:
credit_data.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
credit_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 30 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   V1      284807 non-null  float64
 1   V2      284807 non-null  float64
 2   V3      284807 non-null  float64
 3   V4      284807 non-null  float64
 4   V5      284807 non-null  float64
 5   V6      284807 non-null  float64
 6   V7      284807 non-null  float64
 7   V8      284807 non-null  float64
 8   V9      284807 non-null  float64
 9   V10     284807 non-null  float64
 10  V11     284807 non-null  float64
 11  V12     284807 non-null  float64
 12  V13     284807 non-null  float64
 13  V14     284807 non-null  float64
 14  V15     284807 non-null  float64
 15  V16     284807 non-null  float64
 16  V17     284807 non-null  float64
 17  V18     284807 non-null  float64
 18  V19     284807 non-null  float64
 19  V20     284807 non-null  float64
 20  V21     284807 non-null  float64
 21  V22     28

In [None]:
# Split the data into features and target
X = credit_data.drop(['Class'], axis=1)
y = credit_data['Class']


In [None]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Save the fitted scaler to a file
joblib.dump(scaler, 'scaler.joblib')
print("Scaler saved as scaler.joblib")


Scaler saved as scaler.joblib


In [None]:
import json

# Save the scaler parameters
scaler_params = {
    "mean": scaler.mean_.tolist(),
    "scale": scaler.scale_.tolist()
}
with open("scaler_params.json", "w") as f:
    json.dump(scaler_params, f)
print("Scaler parameters saved as scaler_params.json")


Scaler parameters saved as scaler_params.json


In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Use only normal transactions (Class 0) for Autoencoder training
X_train_auto = X_train[y_train == 0]

# Build the Autoencoder model
input_dim = X_train_auto.shape[1]
autoencoder = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(input_dim,)),
    tf.keras.layers.Dense(14, activation="relu"),
    tf.keras.layers.Dense(7, activation="relu"),
    tf.keras.layers.Dense(14, activation="relu"),
    tf.keras.layers.Dense(input_dim, activation="sigmoid")
])

# Compile the Autoencoder
autoencoder.compile(optimizer="adam", loss="mse")

# Train the Autoencoder
autoencoder.fit(X_train_auto, X_train_auto, epochs=50, batch_size=256, validation_split=0.1, verbose=0)




<keras.src.callbacks.history.History at 0x7cb07a910400>

In [None]:
# Set a reconstruction error threshold based on training data
reconstruction = autoencoder.predict(X_train_auto)
reconstruction_error = np.mean(np.power(X_train_auto - reconstruction, 2), axis=1)
threshold = np.percentile(reconstruction_error, 95)


[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step


In [None]:
print(threshold)

1.5759429200795725


In [None]:
# Predict on the test set
reconstruction_test = autoencoder.predict(X_test)
test_error = np.mean(np.power(X_test - reconstruction_test, 2), axis=1)
y_pred_ae = [1 if e > threshold else 0 for e in test_error]

# Evaluate the Autoencoder model
print("\nAutoencoder Anomaly Detection Results:")
print(confusion_matrix(y_test, y_pred_ae))
print(classification_report(y_test, y_pred_ae))

[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Autoencoder Anomaly Detection Results:
[[54031  2833]
 [   10    88]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     56864
           1       0.03      0.90      0.06        98

    accuracy                           0.95     56962
   macro avg       0.51      0.92      0.52     56962
weighted avg       1.00      0.95      0.97     56962



In [None]:
autoencoder.save('autoencoder_model.keras')
print("Model saved as autoencoder_model.keras")



Model saved as autoencoder_model.keras


In [None]:
import tensorflow as tf
# Load the saved model in .keras format
autoencoder = tf.keras.models.load_model('/content/autoencoder_model.keras')

# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(autoencoder)
tflite_model = converter.convert()

# Save the converted model
with open('autoencoder_model.tflite', 'wb') as f:
    f.write(tflite_model)

print("Model converted and saved as autoencoder_model.tflite")


Saved artifact at '/tmp/tmpxxyfo4_5'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 29), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 29), dtype=tf.float32, name=None)
Captures:
  137097038595168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137097038599920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137097038605728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137097038610128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137097038603088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137097038609776: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137096922810672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137096922813840: TensorSpec(shape=(), dtype=tf.resource, name=None)
Model converted and saved as autoencoder_model.tflite


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import joblib

# Load the saved model and scaler
autoencoder = tf.keras.models.load_model('/content/drive/MyDrive/SEM3/Project/Model created using Nerual network/autoencoder_model.keras')
scaler = joblib.load('/content/drive/MyDrive/SEM3/Project/Model created using Nerual network/scaler.joblib')

# Load the new data (assuming new_data is a DataFrame with the same features as the training data)
new_data_path = '/content/drive/MyDrive/SEM3/Project/Test data/new_data.csv'  # Path to the new CSV file
new_data = pd.read_csv(new_data_path)

# Ensure the new data has the same columns as the original training data
X_new = new_data.drop(['Class'], axis=1, errors='ignore')  # Drop 'Class' if it's not part of the new data

# Scale the new data using the saved scaler
X_new_scaled = scaler.transform(X_new)

# Predict the reconstruction error on the new data
reconstruction_new = autoencoder.predict(X_new_scaled)
new_error = np.mean(np.power(X_new_scaled - reconstruction_new, 2), axis=1)

# Apply the threshold (assumed threshold is the same as the one used during training)
threshold = 0.5  # Replace with the actual threshold from your previous training

# Predict anomalies (1 for anomalous, 0 for normal)
y_pred_new = [1 if e > threshold else 0 for e in new_error]
print(new_error)

# Optionally, you can create a DataFrame with the results
new_data['Anomaly_Prediction'] = y_pred_new

# Print the prediction results
print(new_data[['Anomaly_Prediction']])  # Show only the prediction column


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[0.27270899 0.16557389 0.17481582 0.54150001 0.16025411]
   Anomaly_Prediction
0                   0
1                   0
2                   0
3                   1
4                   0


In [None]:

import tensorflow as tf
import numpy as np
import pandas as pd
import joblib

# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="/content/drive/MyDrive/SEM3/Project/Model created using Nerual network/autoencoder_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load the scaler
scaler = joblib.load('/content/drive/MyDrive/SEM3/Project/Model created using Nerual network/scaler.joblib')

# Load new data (replace with your actual new data loading)
new_data_path = '/content/drive/MyDrive/SEM3/Project/Test data/new_data.csv'  # Path to the new CSV file
new_data = pd.read_csv(new_data_path)
X_new = new_data.drop(['Class'], axis=1, errors='ignore')
X_new_scaled = scaler.transform(X_new)


# Make predictions with the TFLite model.
predictions = []
for i in range(len(X_new_scaled)):
    input_data = np.array([X_new_scaled[i]], dtype=np.float32)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    predictions.append(output_data[0])

reconstruction_new = np.array(predictions)
new_error = np.mean(np.power(X_new_scaled - reconstruction_new, 2), axis=1)
threshold = 0.5 # Replace with your actual threshold
y_pred_new = [1 if e > threshold else 0 for e in new_error]
print(new_error)
new_data['Anomaly_Prediction'] = y_pred_new
print(new_data[['Anomaly_Prediction']])

[0.27270898 0.16557385 0.17481588 0.54150007 0.16025412]
   Anomaly_Prediction
0                   0
1                   0
2                   0
3                   1
4                   0
