<a href="https://colab.research.google.com/github/DevaYadhala-04/Credit-Card-Fraud/blob/main/CreditCardFraud.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Conv1D, concatenate, Reshape, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Function to load dataset
def load_dataset(file_path):
    return pd.read_csv(file_path)

# Function to preprocess data
def preprocess_data(df):
    df = df.dropna(subset=['Class'])
    X = df.drop(['Class'], axis=1)
    y = df['Class']
    return X, y

# Function to handle class imbalance
def handle_class_imbalance(X, y):
    from imblearn.over_sampling import SMOTE
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

# Function to scale data
def scale_data(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

# Function to split data into training and testing sets
def split_data(X, y, test_size):
    return train_test_split(X, y, test_size=test_size, random_state=42)

# Function to define autoencoder model
def define_autoencoder_model(input_dim, encoding_dim):
    input_layer = Input(shape=(input_dim,))
    encoder = Dense(encoding_dim, activation='relu')(input_layer)
    decoder = Dense(input_dim, activation='sigmoid')(encoder)
    autoencoder = Model(inputs=input_layer, outputs=decoder)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder

# Function to train autoencoder model
def train_autoencoder_model(autoencoder, X_train, epochs, batch_size):
    checkpoint_path = "autoencoder_weights.best.weights.h5"
    checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, save_weights_only=True, mode='min', verbose=1)
    autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(X_train, X_train), callbacks=[EarlyStopping(patience=10), checkpoint])
    return checkpoint_path

# Function to define hybrid model
def define_hybrid_model(input_dim):
    input_layer = Input(shape=(input_dim,))
    x = Reshape((input_dim, 1))(input_layer)
    x_conv = Conv1D(32, kernel_size=3, activation='relu', padding='same')(x)
    x_conv = Conv1D(32, kernel_size=3, activation='relu', padding='same')(x_conv)
    x_lstm = LSTM(32, return_sequences=True)(x)
    x_lstm = LSTM(32, return_sequences=False)(x_lstm)
    x_conv_reshape = Reshape((x_conv.shape[1]*x_conv.shape[2],))(x_conv)
    x_concat = concatenate([x_conv_reshape, x_lstm])
    x_dense = Dense(64, activation='relu')(x_concat)
    x_dense = Dropout(0.2)(x_dense)
    output_layer = Dense(1, activation='sigmoid')(x_dense)
    hybrid_model = Model(inputs=input_layer, outputs=output_layer)
    hybrid_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return hybrid_model

# Function to train hybrid model
def train_hybrid_model(hybrid_model, X_train, y_train, epochs, batch_size):
    checkpoint_path = "hybrid_model_weights.best.weights.h5"
    checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True, save_weights_only=True, mode='max', verbose=1)
    history = hybrid_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(X_train, y_train), callbacks=[EarlyStopping(patience=10), checkpoint])
    return checkpoint_path, history

# Function to evaluate hybrid model and visualize
def evaluate_hybrid_model(hybrid_model, X_test, y_test, history, X_train, X_resampled, y_resampled):
    # Plot training and validation accuracy first
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Validation Loss')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training and Validation Accuracy')

    plt.show()

    # Evaluate test accuracy
    loss, accuracy = hybrid_model.evaluate(X_test, y_test, verbose=0)
    print(f'Test Accuracy: {accuracy:.2f}')

    y_pred_prob = hybrid_model.predict(X_test)

    auc = roc_auc_score(y_test, y_pred_prob)
    print(f'AUC: {auc:.2f}')

    # Predict and compute confusion matrix
    y_pred = (y_pred_prob > 0.5).astype(int)

    fraud_count = np.sum(y_pred)
    non_fraud_count = len(y_pred) - fraud_count
    print(f'Predicted Non-Fraud Transactions: {non_fraud_count}')
    print(f'Predicted Fraud Transactions: {fraud_count}')

    conf_matrix = confusion_matrix(y_test, y_pred)

    # Display confusion matrix
    plt.figure(figsize=(6, 6))
    plt.imshow(conf_matrix, cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    plt.xticks([0, 1], ['Non-Fraud', 'Fraud'])
    plt.yticks([0, 1], ['Non-Fraud', 'Fraud'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')

    for i in range(2):
        for j in range(2):
            plt.text(j, i, conf_matrix[i, j], ha='center', va='center', color='red')

    plt.show()

    # Print classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Visualizing fraud vs non-fraud transactions
    results = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred.flatten()})
    fraud_transactions = results[results['Predicted'] == 1]
    non_fraud_transactions = results[results['Predicted'] == 0]

    print("Fraud Transactions:\n", fraud_transactions)
    print("Non-Fraud Transactions:\n", non_fraud_transactions)

    # Visualize the balance of the dataset
    plt.figure(figsize=(6, 4))
    y_resampled.value_counts().plot(kind='bar')
    plt.title('Class Distribution')
    plt.xlabel('Class')
    plt.ylabel('Count')
    plt.xticks([0, 1], ['Non-Fraud', 'Fraud'])
    plt.show()

    # PCA + t-SNE for visualization
    X_combined = np.concatenate((X_train, X_test), axis=0)
    y_combined = np.concatenate((y_resampled, y_test), axis=0)

    sample_size = 1000  # Adjust this size based on your machine's capabilities
    np.random.seed(42)
    sample_indices = np.random.choice(X_combined.shape[0], sample_size, replace=False)
    X_sample = X_combined[sample_indices]
    y_sample = y_combined[sample_indices]

    # Perform PCA to reduce dimensionality before t-SNE
    pca = PCA(n_components=30)  # Reduce to 30 components before applying t-SNE
    X_pca = pca.fit_transform(X_sample)

    # Perform t-SNE for 3D visualization
    tsne = TSNE(n_components=3, random_state=42, perplexity=30)
    X_tsne = tsne.fit_transform(X_pca)

    # Plot the results
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')

    # Plot non-fraud transactions
    ax.scatter(X_tsne[y_sample == 0, 0], X_tsne[y_sample == 0, 1], X_tsne[y_sample == 0, 2],
               c='blue', label='Non-Fraud', alpha=0.5)

    # Plot fraud transactions
    ax.scatter(X_tsne[y_sample == 1, 0], X_tsne[y_sample == 1, 1], X_tsne[y_sample == 1, 2],
               c='red', label='Fraud', alpha=0.5)

    ax.set_title('3D t-SNE Visualization of Fraudulent Transactions')
    ax.set_xlabel('t-SNE Feature 1')
    ax.set_ylabel('t-SNE Feature 2')
    ax.set_zlabel('t-SNE Feature 3')
    ax.legend()
    plt.show()

# Main function
def main():
    file_path = input("Enter the file path of the dataset: ")
    df = load_dataset(file_path)
    X, y = preprocess_data(df)
    X_resampled, y_resampled = handle_class_imbalance(X, y)  # y_resampled is defined here
    X_scaled = scale_data(X_resampled)
    test_size = float(input("Enter the test size (between 0 and 1): "))
    X_train, X_test, y_train, y_test = split_data(X_scaled, y_resampled, test_size)

    encoding_dim = int(input("Enter the encoding dimension for the autoencoder: "))
    autoencoder = define_autoencoder_model(X_train.shape[1], encoding_dim)
    autoencoder_epochs = int(input("Enter the number of epochs to train the autoencoder: "))
    autoencoder_batch_size = int(input("Enter the batch size to train the autoencoder: "))
    autoencoder_checkpoint_path = train_autoencoder_model(autoencoder, X_train, autoencoder_epochs, autoencoder_batch_size)

    hybrid_model = define_hybrid_model(X_train.shape[1])
    hybrid_epochs = int(input("Enter the number of epochs to train the hybrid model: "))
    hybrid_batch_size = int(input("Enter the batch size to train the hybrid model: "))
    hybrid_checkpoint_path, history = train_hybrid_model(hybrid_model, X_train, y_train, hybrid_epochs, hybrid_batch_size)

    hybrid_model.load_weights(hybrid_checkpoint_path)
    evaluate_hybrid_model(hybrid_model, X_test, y_test, history, X_train, X_resampled, y_resampled)

if __name__ == "main":
  main()