In [11]:
#Importing the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler, TensorDataset
import pandas as pd
import numpy as np
from tabulate import tabulate
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score , precision_score, confusion_matrix
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [47]:
df = pd.read_csv('creditcard.csv')
#print count of column Class
print(df['Class'].value_counts())

X = df.drop('Class', axis=1)
y = df['Class']
X = X.values
y = y.values

#Splitting the data into training and testing data
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


Class
0    284315
1       492
Name: count, dtype: int64


In [41]:
class CustomDataSet(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self): 
        return len(self.data)
    
    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.labels[idx]
        return x, y      
    
# #Converting the data into tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


X_train = CustomDataSet(X_train, y_train)
X_test = CustomDataSet(X_test, y_test) 

train_loader = DataLoader(X_train, batch_size=1000, shuffle=True)
test_loader = DataLoader(X_test, batch_size=1000)

In [52]:
# #Same X_Train but width weights
# #Handle unbalenced dataset
# class_counts = np.bincount(y_train)
# class_weights = 1. / class_counts
# sample_weights = class_weights[y_train]
# sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

# train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train) )
# test_dataset = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
# #DataLoader: Cette classe est utilisée pour charger les données en lots (batches) pendant l'entraînement du modèle.
# train_loader = DataLoader(train_dataset, batch_size=1000, sampler=sampler)
# test_loader = DataLoader(test_dataset, batch_size=1000)


In [53]:
for inputs, label in train_loader:
    if inputs.type() != 'torch.DoubleTensor':
        print(inputs.type())
    

In [6]:
class MyModel(nn.Module):
    def __init__(self, in_features, h1, h2, out_feature):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(in_features ,h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, out_feature)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x


In [25]:
def confusion_matrix_function(classes, confusion_matrix):
    print("Confusion Matrix")
    # Créer les en-têtes de la table avec les classes prédites
    headers = [""] + [f"Predicted: {cls}" for cls in classes.index]

    # Créer les lignes de la table avec les valeurs de la matrice de confusion
    rows = [
        [f"Class: {classes.index[i]} - {classes[i]}"] + list(confusion_matrix[i])
        for i in range(len(classes))
    ]

    # Afficher la table
    print(tabulate(rows, headers=headers, tablefmt="grid"))

In [54]:
model = MyModel(X.shape[1], 10, 10, 1)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

epochs = 10  # Number of epochs
train_loss = []

for epoch in range(epochs):
    model.train()
    losses_train = 0
    correct_predictions, total_predictions = 0, 0 
    
    y_pred_train = []
    y_true_train = []
    for inputs, labels in train_loader:
        
        optimizer.zero_grad()  # Reset gradients to zero
        outputs_train = model(inputs)  # Pass data through the model
        loss = criterion(outputs_train.squeeze(), labels)  # Calculate loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        # Cumulate loss from each bach
        losses_train += loss.item()
        
        # Calculate the accuracy
        sigmoid_output = torch.sigmoid(outputs_train.squeeze())  #Apply sigmoid to the outputs cause outputs are logits
        predictions = (sigmoid_output >= 0.5).float()  # convert probabilities to binary predictions
        correct_predictions += (predictions == labels).sum().item()
        total_predictions += labels.size(0) 
        
        # Append the predictions to the list
        y_pred_train.extend(predictions.cpu().numpy())
        y_true_train.extend(labels.cpu().numpy())
    
    losses = losses_train / len(train_loader)
    train_loss.append(losses)
    
    train_acc = correct_predictions / total_predictions
    #Evaluation Mode
    model.eval() 
    
    y_pred_test, y_true_test = [], []
    
    with torch.no_grad():
        #loss and accuracy for the validation set
        losses_test = 0
        test_acc = 0
        
        correct_predictions_label = 0
        total_predictions = 0
        
        for inputs_test, labels_test in test_loader:
            outputs_labels = model(inputs_test)
            loss = criterion(outputs_labels.squeeze(), labels_test)
            losses_test += loss.item()
            
            acc = torch.sigmoid(outputs_labels.squeeze())
            prediction_test = (acc >= 0.5).float()
            correct_predictions_label += (prediction_test == labels_test).sum().item()
            total_predictions += labels_test.size(0)
            
            y_pred_test.extend(prediction_test.cpu().numpy())
            y_true_test.extend(labels_test.cpu().numpy())
        test_acc = correct_predictions_label / total_predictions
        
        losses_test = losses_test / len(test_loader)

    print('='*100)   
    print(f'Epoch: {epoch+1} - Loss Train : {losses:4f} - Loss Val : {losses_test:.4f} - Acc Train : {train_acc:.4f} - Acc Val : {test_acc:.4f}')
    confusion_matrix_function(df['Class'].value_counts(), confusion_matrix(y_true_test, y_pred_test))
    
    #UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
    #This warning is due to the fact that the model is not predicting any positive (1) class in the test set.

fig, ax = plt.subplots()
ax.plot(train_loss, label='Train')
fig.show()

RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float