In [1]:
#Importing the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler, TensorDataset
import pandas as pd
import numpy as np
import time
from tabulate import tabulate
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from skorch import NeuralNetClassifier
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [2]:
df = pd.read_csv('creditcard.csv')
#print count of column Class
print(df['Class'].value_counts())

X = df.drop('Class', axis=1)
y = df['Class']
X = X.values
y = y.values

#Splitting the data into training and testing data
# Utiliser train_Test_split de scikit-learn
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
batch_size = 2**11


Class
0    284315
1       492
Name: count, dtype: int64


# Création d'un dataset

In [3]:

class CustomDataSet(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self): 
        return len(self.data)
    
    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.labels[idx]
        return x, y      
    
#Converting the data into tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

X_train = CustomDataSet(X_train, y_train)
X_test = CustomDataSet(X_test, y_test) 

train_loader = DataLoader(X_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(X_test, batch_size=batch_size)


# Création d'un data équilibré

In [11]:
# Calcul des poids de classe pour gérer un dataset déséquilibré
class_counts = np.bincount(y_train)
class_weights = 1. / class_counts
sample_weights = class_weights[y_train]
sampler = WeightedRandomSampler(weights=torch.tensor(sample_weights, dtype=torch.float32), 
                                num_samples=len(sample_weights), 
                                replacement=True)

# Création des datasets d'entraînement et de test avec conversion explicite en float32
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), 
                              torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), 
                             torch.tensor(y_test, dtype=torch.float32))

# Création des DataLoaders
train_loader = DataLoader(train_dataset, batch_size=1000, sampler=sampler)
test_loader = DataLoader(test_dataset, batch_size=1000)


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [4]:
def confusion_matrix_function(classes, confusion_matrix):
    print("Confusion Matrix")
    # Créer£ les en-têtes de la table avec les classes prédites
    headers = [""] + [f"Predicted: {cls}" for cls in classes.index]

    # Créer les lignes de la table avec les valeurs de la matrice de confusion
    rows = [
        [f"Class: {classes.index[i]} - {classes[i]}"] + list(confusion_matrix[i])
        for i in range(len(classes))
    ]

    # Afficher la table
    print(tabulate(rows, headers=headers, tablefmt="grid"))

In [5]:
#Creation du model
class MyFlexibleModel(nn.Module):
    def __init__(self, in_features, hidden_layers, num_unit, out_features, dropout_rate=0.5):
        super(MyFlexibleModel, self).__init__()
        
        # Create a list to store the layers
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(in_features, num_unit))
        
        # Add the hidden layers
        for i in range(1, hidden_layers):
            self.layers.append(nn.Linear(num_unit, num_unit))
            self.layers.append(nn.BatchNorm1d(num_unit))
            self.layers.append(nn.ReLU())  
        
        # Ajouter la couche de sortie
        self.layers.append(nn.Linear(num_unit, out_features))
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        for i in range(len(self.layers) - 1):
            x = self.layers[i](x)
            # x = self.dropout(x)
        
        x = self.layers[-1](x)
        x = torch.sigmoid(x)
        return x




In [11]:
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    
    train_loss = []
    test_loss = []

    start_time = time.time()
    acc_train = []
    acc_test = []

    # avoir les attributs du model
    print(model)
    for epoch in range(epochs):
        model.train()
        losses_train = 0
        correct_predictions, total_predictions = 0, 0 
        
        y_pred_train = []
        y_true_train = []
        
    
        for inputs, labels in train_loader:
            
            optimizer.zero_grad()  # Reset gradients to zero
            outputs_train = model(inputs)  # Pass data through the model
            loss = criterion(outputs_train.squeeze(), labels)  # Calculate loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            # Cumulate loss from each bach
            losses_train += loss.item()
            
            # Calculate the accuracy
            #Apply sigmoid to the outputs cause outputs are logits
            predictions = (outputs_train.squeeze()  >= 0.5).float()  # convert probabilities to binary predictions
            correct_predictions += (predictions == labels).sum().item()
            total_predictions += labels.size(0) 
            
            # Append the predictions to the list
            y_pred_train.extend(predictions.cpu().numpy())
            y_true_train.extend(labels.cpu().numpy())
        
        losses = losses_train / len(train_loader)
        train_loss.append(losses)
        
        train_acc = correct_predictions / total_predictions
        #Evaluation Mode
        model.eval() 
        
        y_pred_test, y_true_test = [], []
        
        with torch.no_grad():
            #loss and accuracy for the validation set
            losses_test = 0
            test_acc = 0
            
            correct_predictions_label = 0
            total_predictions = 0
            
            for inputs_test, labels_test in test_loader:
                outputs_labels = model(inputs_test)
                loss = criterion(outputs_labels.squeeze(), labels_test)
                losses_test += loss.item()
                
                prediction_test = (outputs_labels.squeeze() >= 0.5).float()

                correct_predictions_label += (prediction_test == labels_test).sum().item()
                total_predictions += labels_test.size(0)
                
                y_pred_test.extend(prediction_test.numpy())
                y_true_test.extend(labels_test.numpy())
            test_acc = correct_predictions_label / total_predictions
            
            losses_test = losses_test / len(test_loader)
            test_loss.append(losses_test)
            
            acc_train.append(train_acc)
            acc_test.append(test_acc)

        print('='*100)   
        print(f'Epoch: {epoch+1} - Loss Train : {losses:4f} - Loss Val : {losses_test:.4f} - Acc Train : {train_acc:.4f} - Acc Val : {test_acc:.4f}')
            
    end_time = time.time() 

    print(f"Training time: {end_time - start_time}s")
    # Graphe pour les pertes (Loss)
    return train_loss, test_loss, acc_train, acc_test


def show_plt(train_loss, test_loss, acc_train, acc_test):
    fig1, ax1 = plt.subplots()
    ax1.plot(train_loss, label='Train Loss', color='blue')
    ax1.plot(test_loss, label='Test Loss', color='red')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.set_title('Training and Validation Loss')
    
    fig2, ax2 = plt.subplots()
    ax2.plot(acc_train, label='Train Accuracy', color='blue')
    ax2.plot(acc_test, label='Test Accuracy', color='red')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    ax2.set_title('Training and Validation Accuracy')

    plt.show()




In [12]:
params = [
    {
        "hidden_layers": 10,
        "num_unit": 20,
        "lr": 0.0001
    },
    {
        "hidden_layers": 10,
        "num_unit": 40,
        "lr": 0.0001
        
    },
    {
        "hidden_layers": 10,
        "num_unit": 60,
        "lr": 0.0001
    },
    {
        "hidden_layers": 10,
        "num_unit": 80,
        "lr": 0.0001
        
    },
    {
        "hidden_layers": 10,
        "num_unit": 100,
        "lr": 0.0001
        
    },
]

for param in params: 
    model = MyFlexibleModel(X.shape[1], param["hidden_layers"], param["num_unit"], 1)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    train_loss, test_loss, acc_train, acc_test = train_model(model, train_loader, criterion, optimizer, 30)
    show_plt(train_loss, test_loss, acc_train, acc_test)

MyFlexibleModel(
  (layers): ModuleList(
    (0): Linear(in_features=30, out_features=20, bias=True)
    (1): Linear(in_features=20, out_features=20, bias=True)
    (2): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=20, bias=True)
    (5): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Linear(in_features=20, out_features=20, bias=True)
    (8): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Linear(in_features=20, out_features=20, bias=True)
    (11): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): Linear(in_features=20, out_features=20, bias=True)
    (14): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): ReLU()
    (16): Linear(in_features=20, out_features=20, bias=True)
    (17

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x11143f2b0>>
Traceback (most recent call last):
  File "/opt/anaconda3/envs/cardfraud/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


Epoch: 6 - Loss Train : 0.418345 - Loss Val : 0.3498 - Acc Train : 0.9980 - Acc Val : 0.9986
Epoch: 7 - Loss Train : 0.383336 - Loss Val : 0.3170 - Acc Train : 0.9982 - Acc Val : 0.9987
Epoch: 8 - Loss Train : 0.351501 - Loss Val : 0.2509 - Acc Train : 0.9982 - Acc Val : 0.9987
Epoch: 9 - Loss Train : 0.322103 - Loss Val : 0.2244 - Acc Train : 0.9982 - Acc Val : 0.9987
Epoch: 10 - Loss Train : 0.294737 - Loss Val : 0.2436 - Acc Train : 0.9982 - Acc Val : 0.9987
Epoch: 11 - Loss Train : 0.268628 - Loss Val : 0.1450 - Acc Train : 0.9982 - Acc Val : 0.9987
Epoch: 12 - Loss Train : 0.244564 - Loss Val : 0.1749 - Acc Train : 0.9982 - Acc Val : 0.9987


KeyboardInterrupt: 