In [None]:
import torch
import torch.nn as nn
import pickle
import pandas as pd
from torch import nn, einsum
import numpy as np
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from imblearn.combine import SMOTETomek,SMOTEENN
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, roc_auc_score, f1_score, matthews_corrcoef,auc
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns


### load the txt file into the pandas dataframe

In [None]:
df_train = pd.read_table("/content/traininingdata.txt",sep=';')

df_test = pd.read_table("/content/testdata.txt",sep=';')
print(df_train.shape)
categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']

distinct_value_counts = [df_train[column].nunique() for column in categorical_columns]

print(distinct_value_counts)
print(df_train.shape)
print(Counter(df_train['y']))

Performing the Exploratory Data Analysis


In [None]:
df_train.head().append(df_train.tail())

In [None]:
df_train.describe()

In [None]:
df_train.info()

In [None]:
df_test.head().append(df_test.tail())

In [None]:
df_test.describe()

In [None]:
df_test.info()

### There are no null values in the table

In [None]:
df_train.isnull().sum()

In [None]:
df_test.isnull().sum()

In [None]:
sns.heatmap(df_train.isnull(),cbar=False,cmap='viridis')

In [None]:
sns.heatmap(df_test.isnull(),cbar=False,cmap='viridis')


In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(df_train.corr(),cbar=True,annot=True,cmap='Blues')

* Next we can analyze strongly correlated columns such as pdays and previous:
* How the two variables affect each other.
* Whether the Cartesian product of the variables has an effect on the label.
* Whether there is a possibility of crossover between the two variables.

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(x='pdays',y='previous',data=df_train)
plt.xlabel('pdays')
plt.ylabel('previous')

In [None]:
sns.countplot(x='y',data=df_train,palette='hls')
plt.show()

In [None]:
sns.distplot(df_train['age'])


In [None]:
sns.distplot(df_train['balance'])

In [None]:
sns.distplot(df_train['day'])

In [None]:
sns.distplot(df_train['duration'])

In [None]:
sns.distplot(df_train['campaign'])

* construct the Table transformer

In [None]:
!pip install einops

from einops import rearrange, repeat

# feedforward and attention

class GEGLU(nn.Module):
    def forward(self, x):
        x, gates = x.chunk(2, dim = -1)
        return x * F.gelu(gates)

def FeedForward(dim, mult = 4, dropout = 0.):
    return nn.Sequential(
        nn.LayerNorm(dim),
        nn.Linear(dim, dim * mult * 2),
        GEGLU(),
        nn.Dropout(dropout),
        nn.Linear(dim * mult, dim)
    )

class Attention(nn.Module):
    def __init__(
        self,
        dim,
        heads = 8,
        dim_head = 64,
        dropout = 0.
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads = heads
        self.scale = dim_head ** -0.5

        self.norm = nn.LayerNorm(dim)

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Linear(inner_dim, dim, bias = False)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h = self.heads

        x = self.norm(x)

        q, k, v = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v))
        q = q * self.scale

        sim = einsum('b h i d, b h j d -> b h i j', q, k)

        attn = sim.softmax(dim = -1)
        dropped_attn = self.dropout(attn)

        out = einsum('b h i j, b h j d -> b h i d', dropped_attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)', h = h)
        out = self.to_out(out)

        return out, attn

# transformer

class Transformer(nn.Module):
    def __init__(
        self,
        dim,
        depth,
        heads,
        dim_head,
        attn_dropout,
        ff_dropout
    ):
        super().__init__()
        self.layers = nn.ModuleList([])

        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout),
                FeedForward(dim, dropout = ff_dropout),
            ]))

    def forward(self, x, return_attn = False):
        post_softmax_attns = []

        for attn, ff in self.layers:
            attn_out, post_softmax_attn = attn(x)
            post_softmax_attns.append(post_softmax_attn)

            x = attn_out + x
            x = ff(x) + x

        if not return_attn:
            return x

        return x, torch.stack(post_softmax_attns)

# numerical embedder

class NumericalEmbedder(nn.Module):
    def __init__(self, dim, num_numerical_types):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(num_numerical_types, dim))
        self.biases = nn.Parameter(torch.randn(num_numerical_types, dim))

    def forward(self, x):
        x = rearrange(x, 'b n -> b n 1')
        return x * self.weights + self.biases

# main class

class FTTransformer(nn.Module):
    def __init__(
        self,
        *,
        categories,
        num_continuous,
        dim,
        depth,
        heads,
        dim_head = 16,
        dim_out = 1,
        num_special_tokens = 2,
        attn_dropout = 0.,
        ff_dropout = 0.
    ):
        super().__init__()
        assert all(map(lambda n: n > 0, categories)), 'number of each category must be positive'
        assert len(categories) + num_continuous > 0, 'input shape must not be null'

        # categories related calculations

        self.num_categories = len(categories)
        self.num_unique_categories = sum(categories)

        # create category embeddings table

        self.num_special_tokens = num_special_tokens
        total_tokens = self.num_unique_categories + num_special_tokens

        # for automatically offsetting unique category ids to the correct position in the categories embedding table

        if self.num_unique_categories > 0:
            categories_offset = F.pad(torch.tensor(list(categories)), (1, 0), value = num_special_tokens)
            categories_offset = categories_offset.cumsum(dim = -1)[:-1]
            self.register_buffer('categories_offset', categories_offset)

            # categorical embedding

            self.categorical_embeds = nn.Embedding(total_tokens, dim)

        # continuous

        self.num_continuous = num_continuous

        if self.num_continuous > 0:
            self.numerical_embedder = NumericalEmbedder(dim, self.num_continuous)

        # cls token

        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))

        # transformer

        self.transformer = Transformer(
            dim = dim,
            depth = depth,
            heads = heads,
            dim_head = dim_head,
            attn_dropout = attn_dropout,
            ff_dropout = ff_dropout
        )

        # to logits

        self.to_logits = nn.Sequential(
            nn.LayerNorm(dim),
            nn.ReLU(),
            nn.Linear(dim, dim_out)
        )

    def forward(self, x_categ, x_numer, return_attn = False):
        assert x_categ.shape[-1] == self.num_categories, f'you must pass in {self.num_categories} values for your categories input'

        xs = []
        if self.num_unique_categories > 0:
            x_categ = x_categ + self.categories_offset

            x_categ = self.categorical_embeds(x_categ)

            xs.append(x_categ)

        # add numerically embedded tokens
        if self.num_continuous > 0:
            x_numer = self.numerical_embedder(x_numer)

            xs.append(x_numer)

        # concat categorical and numerical

        x = torch.cat(xs, dim = 1)

        # append cls tokens
        b = x.shape[0]
        cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b = b)
        x = torch.cat((cls_tokens, x), dim = 1)

        # attend

        x, attns = self.transformer(x, return_attn = True)

        # get cls token

        x = x[:, 0]

        # out in the paper is linear(relu(ln(cls)))

        logits = self.to_logits(x)

        if not return_attn:
            return logits

        return logits, attns

* Using SMOTE to oversampling the traindata to enlarge the number of label '1'.
* it interpolate the data using data near by

In [None]:
from sklearn.preprocessing import LabelEncoder
from imblearn.combine import SMOTETomek,SMOTEENN
from imblearn.over_sampling import KMeansSMOTE
from collections import Counter
from imblearn.over_sampling import SMOTE

d = {}
le = LabelEncoder()
categorical_columns = ['job','marital','education','default','housing','loan','contact','month','poutcome']
for col in categorical_columns:
    df_train[col] = le.fit_transform(df_train[col])
    d[col] = le.classes_
for col in categorical_columns:
    df_test[col] = le.fit_transform(df_test[col])
    d[col] = le.classes_
print('---')
smo = SMOTE(sampling_strategy={'yes': 31937, 'no': 31937},random_state=2023)

label_mapping = {"no": 0, "yes": 1}

print(df_train)
X_smo, y_smo = smo.fit_resample(df_train.iloc[:,:-1], df_train.iloc[:,-1])
print(Counter(y_smo))
col_x = [i for i in df_train.columns if i not in ['y']]
x_train = X_smo
y_train = y_smo
x_test, y_test = df_test[col_x], df_test.y
print(y_train)
print(y_test)
y_test = y_test.map(label_mapping)
y_train = y_train.map(label_mapping)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(df_train.shape)


In [None]:
plt.figure(figsize=(16,10))
sns.heatmap(df_train.corr(),cbar=True,annot=True,cmap='Blues')

In [None]:

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
device

* Prepare training and test data
* Place discontinuous values first and continuous values second

In [None]:

res = [i for i in x_train.columns if i not in categorical_columns]

x_train_reordered = x_train[categorical_columns + res]
x_test_reordered = x_test[categorical_columns + res]
print(x_train_reordered)
x_train_tensor = torch.FloatTensor(x_train_reordered.values)
print(x_train_tensor.shape)
y_train_tensor = torch.LongTensor(y_train.values)
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
# train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True,pin_memory=True)

x_test_tensor = torch.FloatTensor(x_test_reordered.values)
y_test_tensor = torch.LongTensor(y_test.values)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
# test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False,pin_memory=True)







In [None]:
import torch.nn.functional as F
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_continuous = len(res)
model = FTTransformer(
    categories = distinct_value_counts,      # tuple containing the number of unique values within each category
    num_continuous = num_continuous,                # number of continuous values
    dim = x_train.shape[1],                           # dimension, paper set at 32
    dim_out = 1,                        # binary prediction, but could be anything
    depth = 6,                          # depth, paper recommended 6
    heads = 8,                          # heads, paper recommends 8
    attn_dropout = 0.1,                 # post-attention dropout
    ff_dropout = 0.1                    # feed forward dropout
).to(device)

* Change the loss weight base on the number of each class

In [None]:
class WeightedBCEWithLogitsLoss(nn.Module):
    def __init__(self, weight=None):
        super(WeightedBCEWithLogitsLoss, self).__init__()
        self.weight = weight

    def forward(self, inputs, targets):
        weights = torch.zeros(targets.shape).to(device)
        for i in range(targets.shape[0]):
            for j in range(targets.shape[1]):
                weights[i][j] = class_weights[int(targets[i][j])]
        loss = nn.functional.binary_cross_entropy_with_logits(inputs, targets, weight=weights)
        return loss
# Create category weights
class_weights = torch.tensor([1,4])  #



In [None]:
# Define the loss function and optimizer
criterion = WeightedBCEWithLogitsLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# training model
epochs = 10
for epoch in range(epochs):
    model.train()
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs_categorical = inputs[:, :len(categorical_columns)].long().to(device)
        inputs_continuous = inputs[:, len(categorical_columns):].to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs_categorical, inputs_continuous)
        loss = criterion(outputs, labels.float().unsqueeze(1))

        loss.backward()
        optimizer.step()

        if (epoch+1) % 1 == 0 and i % 250 == 0:
            print("epoch: {}, iteration: {}, loss: {}".format(epoch, i, loss.item()))

    # Evaluate on the test set at the end of each epoch
    if (epoch + 1) % 10 == 0:
        torch.save(model.state_dict(), f"model_{epoch+1}.pth")

# save model file
filename = 'best_model_TF_continue3.pkl'
# using pickle
with open(filename, 'wb') as file:
    pickle.dump(model, file)

# Predictions on the test set
model.eval()
with torch.no_grad():
    predictions = []
    for inputs, _ in test_dataloader:
        inputs_categorical = inputs[:, :len(categorical_columns)].long().to(device)
        inputs_continuous = inputs[:, len(categorical_columns):].to(device)
        outputs = model(inputs_categorical, inputs_continuous)
        predicted = torch.round(torch.sigmoid(outputs.squeeze()))
        predictions.extend(predicted.tolist())

print("Predictions:", predictions)

In [None]:
filename = 'best_model_TF_with_weight_8000_data_120_epoch_.pkl'
#
with open(filename, 'wb') as file:
    pickle.dump(model, file)

In [None]:
# load model file
# filename = '/content/best_model_TF_with_weight_8000_data_120_epoch_.pkl'
# # model = None
# with open(filename, 'rb') as file:
#     model = pickle.load(file)


model.eval()
with torch.no_grad():
    predictions = []
    pred_probability = []
    for inputs, _ in test_dataloader:
        inputs_categorical = inputs[:, :len(categorical_columns)].long().to(device)
        inputs_continuous = inputs[:, len(categorical_columns):].to(device)
        outputs = model(inputs_categorical, inputs_continuous)
        predicted = torch.round(torch.sigmoid(outputs.squeeze()))
        predicted_probability = torch.sigmoid(outputs.squeeze())
        predictions.extend(predicted.tolist())
        pred_probability.extend(predicted_probability.tolist())

print("Predictions:", predictions)
print(outputs)
print(outputs.shape)
print(type(predictions))
print(len(predictions))
print(type(outputs))
print(outputs.shape)
y_pred = predictions
print(len(pred_probability))
print(pred_probability)

In [None]:
from scipy.special import expit
y_pred = predictions


fpr, tpr, thresholds = roc_curve(y_test, pred_probability)
auc_value = auc(fpr, tpr)


plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {auc_value:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

print("AUC Score:", auc_value)

y_pred = predictions


print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

f1 = f1_score(y_test, y_pred)
macro_f1 = f1_score(y_test, y_pred, average='macro')

print("Macro-average F-measure:", macro_f1)

micro_f1 = f1_score(y_test, y_pred, average='micro')

print("Micro-average F-measure:", micro_f1)

mcc = matthews_corrcoef(y_test, y_pred)

print("Matthews Correlation Coefficient (MCC):", mcc)
print("F1 Score:", f1)

# %%

In [None]:
np.save('y_pred.npy',y_pred)

In [None]:
np.save('y_test.npy',y_test)


In [None]:


report = classification_report(y_test, y_pred)

report_dict = classification_report(y_test, y_pred, output_dict=True)

sns.heatmap(pd.DataFrame(report_dict).iloc[:-1, :].T, annot=True, cmap="Blues")

plt.xlabel("Metrics")
plt.ylabel("Target Labels")
plt.title("Classification Report")

plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(3, 2.25))
sns.heatmap(cm, annot=True, cmap='Reds', fmt='d', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:
labels = [label for label in report_dict.keys() if label not in ('accuracy', 'macro avg', 'weighted avg')]
precision = [report_dict[label]['precision'] for label in labels]
recall = [report_dict[label]['recall'] for label in labels]
f1_score = [report_dict[label]['f1-score'] for label in labels]

x = range(len(labels))
width = 0.185

plt.bar(x, precision, width, label='Precision')
plt.bar([i + width for i in x], recall, width, label='Recall')
plt.bar([i + 2 * width for i in x], f1_score, width, label='F1-Score')
for i, val in enumerate(precision):
    plt.text(i, val, round(val, 2), ha='center', va='bottom')
for i, val in enumerate(recall):
    plt.text(i+width, val, round(val, 2), ha='center', va='bottom')
for i, val in enumerate(f1_score):
    plt.text(i+2*width, val, round(val, 2), ha='center', va='bottom')
plt.xlabel('Labels')
plt.ylabel('Score')
plt.title('Classification Report')

plt.xticks([i + width for i in x], labels)
# plt.legend()
plt.legend(loc='upper center')

plt.show()
