In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.metrics import accuracy_score

from tqdm.auto import tqdm
from timeit import default_timer as timer

In [2]:
def print_train_time(start, end):
    print(f"\ntrain time: {end - start}")

In [3]:
data = pd.read_csv("drug_consumption.csv")

In [4]:
df = data.copy()

In [5]:
df = df.drop(columns=['ID'])

In [6]:
df.columns

Index(['Age', 'Gender', 'Education', 'Country', 'Ethnicity', 'Nscore',
       'Escore', 'Oscore', 'Ascore', 'Cscore', 'Impulsive', 'SS', 'Alcohol',
       'Amphet', 'Amyl', 'Benzos', 'Caff', 'Cannabis', 'Choc', 'Coke', 'Crack',
       'Ecstasy', 'Heroin', 'Ketamine', 'Legalh', 'LSD', 'Meth', 'Mushrooms',
       'Nicotine', 'Semer', 'VSA'],
      dtype='object')

In [7]:
features = df.select_dtypes(include=['number']).columns
labels = df.select_dtypes(include=['object']).columns

In [8]:
#{i:len(df[i].value_counts()) for i in num_cols}
cat_cols = [i for i in features if len(df[i].value_counts()) < 10]
cat_cols

['Age', 'Gender', 'Education', 'Country', 'Ethnicity']

### Age

In [9]:
start = 24
temp = [[18 if i == 0 else start + 10*(i-1) + 1, start + 10*i] for i in range(5)]
age_groups = [f"{i[0]} - {i[1]}" for i in temp]
age_groups.append("65+")
Age_map = {j:age_groups[i] for i, j in enumerate(sorted(df[cat_cols[0]].unique()))}
Age_map

{-0.95197: '18 - 24',
 -0.07854: '25 - 34',
 0.49788: '35 - 44',
 1.09449: '45 - 54',
 1.82213: '55 - 64',
 2.59171: '65+'}

### Gender

In [10]:
Gender_map = dict(zip(df[cat_cols[1]].unique(), ["Female", "Male"]))
Gender_map

{0.48246: 'Female', -0.48246: 'Male'}

### Education

In [11]:
education_groups = ["Left School Before 16 years",
"Left School at 16 years",
"Left School at 17 years",
"Left School at 18 years",
"Some College,No Certificate Or Degree",
"Professional Certificate/ Diploma",
"University Degree",
"Masters Degree",
"Doctorate Degree"]
Education_map = dict(zip(sorted(df[cat_cols[2]].unique()), education_groups))
Education_map

{-2.43591: 'Left School Before 16 years',
 -1.7379: 'Left School at 16 years',
 -1.43719: 'Left School at 17 years',
 -1.22751: 'Left School at 18 years',
 -0.61113: 'Some College,No Certificate Or Degree',
 -0.05921: 'Professional Certificate/ Diploma',
 0.45468: 'University Degree',
 1.16365: 'Masters Degree',
 1.98437: 'Doctorate Degree'}

### Country

In [12]:
Country_map = {-0.09765 : "Australia",
0.24923 : "Canada",
-0.46841 : "New Zealan",
-0.28519 : "Other",
0.21128 : "Republic of Ireland",
0.96082 : "UK",
-0.57009 : "USA"}
Country_map

{-0.09765: 'Australia',
 0.24923: 'Canada',
 -0.46841: 'New Zealan',
 -0.28519: 'Other',
 0.21128: 'Republic of Ireland',
 0.96082: 'UK',
 -0.57009: 'USA'}

### Ethnicity

In [13]:
Ethnicity_map  = {-0.50212 : "Asian",
-1.10702 : "Black",
1.90725 : "Mixed-Black/Asian",
0.12600 : "Mixed-White/Asian",
-0.22166 : "Mixed-White/Black",
0.11440 : "Other",
-0.31685 : "White"}
Ethnicity_map

{-0.50212: 'Asian',
 -1.10702: 'Black',
 1.90725: 'Mixed-Black/Asian',
 0.126: 'Mixed-White/Asian',
 -0.22166: 'Mixed-White/Black',
 0.1144: 'Other',
 -0.31685: 'White'}

In [14]:
Label_map = {"CL0" : "Never Used",
"CL1" : "Used over a Decade Ago",
"CL2" : "Used in Last Decade",
"CL3" : "Used in Last Year",
"CL4" : "Used in Last Month",
"CL5" : "Used in Last Week",
"CL6" : "Used in Last Day"}
Label_map

{'CL0': 'Never Used',
 'CL1': 'Used over a Decade Ago',
 'CL2': 'Used in Last Decade',
 'CL3': 'Used in Last Year',
 'CL4': 'Used in Last Month',
 'CL5': 'Used in Last Week',
 'CL6': 'Used in Last Day'}

In [15]:
maps = [Age_map, Gender_map, Education_map, Country_map, Ethnicity_map]

for i, j in enumerate(maps):
    df[cat_cols[i]] = df[cat_cols[i]].map(j)

In [16]:
df = df.drop(["Semer", "Ethnicity"], axis=1)

In [17]:
np.unique(df["Age"])

array(['18 - 24', '25 - 34', '35 - 44', '45 - 54', '55 - 64', '65+'],
      dtype=object)

In [18]:
df["Age"] = df["Age"].replace({'65+' : '55 - 64'})
df["Age"] = df["Age"].replace({'55 - 64' : '55+'})

In [19]:
df.loc[(df['Country']=='Australia')|(df['Country']=='Canada')|(df['Country']=='New Zealand')|(df['Country']=='Other')|(df['Country']=='Republic of Ireland'),'Country']='Other_countries'

In [20]:
stimulants = ["Choc","Caff","Amphet", "Coke", "Crack", "Meth", "Nicotine"]
depressants = ["Alcohol", "Benzos", "Heroin", "Legalh", "Amyl", "VSA"]
hallucinogens = ["LSD", "Mushrooms", "Ketamine", "Cannabis", "Ecstasy"]

In [21]:
df.head()

Unnamed: 0,Age,Gender,Education,Country,Nscore,Escore,Oscore,Ascore,Cscore,Impulsive,...,Crack,Ecstasy,Heroin,Ketamine,Legalh,LSD,Meth,Mushrooms,Nicotine,VSA
0,35 - 44,Female,Professional Certificate/ Diploma,UK,0.31287,-0.57545,-0.58331,-0.91699,-0.00665,-0.21712,...,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL2,CL0
1,25 - 34,Male,Doctorate Degree,UK,-0.67825,1.93886,1.43533,0.76096,-0.14277,-0.71126,...,CL0,CL4,CL0,CL2,CL0,CL2,CL3,CL0,CL4,CL0
2,35 - 44,Male,Professional Certificate/ Diploma,UK,-0.46725,0.80523,-0.84732,-1.6209,-1.0145,-1.37983,...,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL1,CL0,CL0
3,18 - 24,Female,Masters Degree,UK,-0.14882,-0.80615,-0.01928,0.59042,0.58489,-1.37983,...,CL0,CL0,CL0,CL2,CL0,CL0,CL0,CL0,CL2,CL0
4,35 - 44,Female,Doctorate Degree,UK,0.73545,-1.6334,-0.45174,-0.30172,1.30612,-0.21712,...,CL0,CL1,CL0,CL0,CL1,CL0,CL0,CL2,CL2,CL0


In [22]:
## df.to_csv("drug.csv")

In [23]:
score_cols = df.iloc[:, 4:(4+7)].columns
demo_cols = df.iloc[:, :4].columns
drug_cols = df.iloc[:, 11:].columns

In [24]:
np.unique(df.Alcohol)

array(['CL0', 'CL1', 'CL2', 'CL3', 'CL4', 'CL5', 'CL6'], dtype=object)

In [25]:
class OneHot(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns
        self.encoder = OneHotEncoder(sparse_output=False, drop='first')

    def fit(self, X, y=None):
        if (self.columns is not None):
            self.encoder.fit(X[self.columns])
        return self

    def transform(self, X, y=None):
        if (self.columns is not None):
            onehot_encoded = self.encoder.transform(X[self.columns])
            feature_names = self.encoder.get_feature_names_out()
            result_df = pd.DataFrame(onehot_encoded, columns=feature_names, index=X.index)
            return pd.concat([X.drop(columns=self.columns), result_df], axis=1)
        else:
            return X

In [26]:
class Ordinal(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None, custom_categories=None):
        """
        columns: list of column names to be encoded.
        custom_categories: list of lists specifying the order of categories for each column.
        """
        self.columns = columns
        self.custom_categories = custom_categories
        self.encoder = OrdinalEncoder(categories=custom_categories)

    def fit(self, X, y=None):
        if self.columns is not None:
            self.encoder.fit(X[self.columns])
        return self

    def transform(self, X, y=None):
        if self.columns is not None:
            ordinal_encoded = self.encoder.transform(X[self.columns])
            result_df = pd.DataFrame(ordinal_encoded, columns=self.columns, index=X.index)
            return pd.concat([X.drop(columns=self.columns), result_df], axis=1)
        else:
            return X

In [27]:
oneHot = OneHot(demo_cols[demo_cols != "Education"])
X = oneHot.fit_transform(pd.concat([df[demo_cols],  df[score_cols]], axis=1))
ordinal = Ordinal(["Education"], [education_groups])
X = ordinal.fit_transform(X)

In [28]:
y = df[drug_cols]
usage_map = dict(zip(np.unique(y), np.array([0, 0, 0, 1, 1, 1, 1])))
y = y.replace(usage_map)

  y = y.replace(usage_map)


## MLSMOTE

In [29]:
from sklearn.neighbors import NearestNeighbors
import random

In [30]:
y.shape

(1885, 18)

In [31]:
np.unique(y, axis=0).shape

(654, 18)

In [32]:
(y.value_counts()).sum()

1885

In [33]:
def get_tail_label(df: pd.DataFrame, ql=[0.05, 1.]) -> list:
    """
    Find the underrepresented targets.
    Underrepresented targets are those which are observed less than the median occurance.
    Targets beyond a quantile limit are filtered.
    """
    irlbl = df.sum(axis=0)
    irlbl = irlbl[(irlbl > irlbl.quantile(ql[0])) & ((irlbl < irlbl.quantile(ql[1])))]  # Filtering
    irlbl = irlbl.max() / irlbl
    threshold_irlbl = irlbl.median()
    tail_label = irlbl[irlbl > threshold_irlbl].index.tolist()
    return tail_label

def get_minority_samples(X: pd.DataFrame, y: pd.DataFrame, ql=[0.05, 1.]):
    """
    return
    X_sub: pandas.DataFrame, the feature vector minority dataframe
    y_sub: pandas.DataFrame, the target vector minority dataframe
    """
    tail_labels = get_tail_label(y, ql=ql)
    index = y[y[tail_labels].apply(lambda x: (x == 1).any(), axis=1)].index.tolist()
    
    X_sub = X[X.index.isin(index)].reset_index(drop = True)
    y_sub = y[y.index.isin(index)].reset_index(drop = True)
    return X_sub, y_sub

def nearest_neighbour(X: pd.DataFrame, neigh) -> list:
    """
    Give index of 10 nearest neighbor of all the instance
    
    args
    X: np.array, array whose nearest neighbor has to find
    
    return
    indices: list of list, index of 5 NN of each element in X
    """
    nbs = NearestNeighbors(n_neighbors=neigh, metric='euclidean', algorithm='kd_tree').fit(X)
    euclidean, indices = nbs.kneighbors(X)
    return indices

def MLSMOTE(X, y, n_sample, neigh=5):
    """
    Give the augmented data using MLSMOTE algorithm
    
    args
    X: pandas.DataFrame, input vector DataFrame
    y: pandas.DataFrame, feature vector dataframe
    n_sample: int, number of newly generated sample
    
    return
    new_X: pandas.DataFrame, augmented feature vector data
    target: pandas.DataFrame, augmented target vector data
    """
    indices2 = nearest_neighbour(X, neigh=5)
    n = len(indices2)
    new_X = np.zeros((n_sample, X.shape[1]))
    target = np.zeros((n_sample, y.shape[1]))
    for i in range(n_sample):
        reference = random.randint(0, n-1)
        neighbor = random.choice(indices2[reference, 1:])
        all_point = indices2[reference]
        nn_df = y[y.index.isin(all_point)]
        ser = nn_df.sum(axis = 0, skipna = True)
        target[i] = np.array([1 if val > 0 else 0 for val in ser])
        ratio = random.random()
        gap = X.loc[reference,:] - X.loc[neighbor,:]
        new_X[i] = np.array(X.loc[reference,:] + ratio * gap)
    new_X = pd.DataFrame(new_X, columns=X.columns)
    target = pd.DataFrame(target, columns=y.columns)
    return new_X, target

In [34]:
get_tail_label(y)

['Amyl', 'Coke', 'Heroin', 'Ketamine', 'LSD', 'Meth', 'Mushrooms', 'VSA']

In [35]:
X_sub, y_sub = get_minority_samples(X, y)

In [36]:
X_res, y_res = MLSMOTE(X_sub, y_sub, 500, 5)

In [37]:
X_new = pd.concat([X, X_res], axis=0)
y_new = pd.concat([y, y_res], axis=0)

In [38]:
X_new.shape, y_new.shape

((2385, 16), (2385, 18))

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)

In [40]:
X_train.shape, X_test.shape

((1508, 16), (377, 16))

## OneVsRest

In [94]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, hamming_loss

In [95]:
#logistic_clf = MultiOutputClassifier(LogisticRegression())
logistic_clf = OneVsRestClassifier(LogisticRegression())

In [96]:
logistic_clf.fit(X_train, y_train)

In [97]:
preds = logistic_clf.predict(X_test)

In [98]:
accuracy_score(y_test, preds)

0.20689655172413793

In [99]:
y_test.shape

(377, 18)

In [100]:
hamming_loss(y_test, preds)

0.14913056292366636

In [101]:
loss = 0
for i in range(y_test.shape[0]):
    h_loss =hamming_loss(y_test.to_numpy()[i], preds[i])
    loss += h_loss
print(loss / y_test.shape[0])

0.14913056292366658


In [102]:
print(y_test.iloc[4:5, :].to_numpy())
print(preds[4:5, :])

[[1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0]]
[[1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 1 0]]


In [108]:
# Initialize the base classifier
base_rf = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)

# Initialize the MultiOutputClassifier with the base classifier
multi_rf = MultiOutputClassifier(base_rf, n_jobs=-1)

# Fit the model
multi_rf.fit(X_train, y_train)

# Predict on the test set
y_pred = multi_rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
hamming = hamming_loss(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Hamming Loss: {hamming:.4f}')

y_pred = multi_rf.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
hamming = hamming_loss(y_train, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Hamming Loss: {hamming:.4f}')

Accuracy: 0.2334
Hamming Loss: 0.1468
Accuracy: 0.2805
Hamming Loss: 0.1223


## MLKNN

In [50]:
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.neighbors import KNeighborsClassifier
from skmultilearn.adapt import MLkNN

In [67]:
# Initialize KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)

# Wrap KNN with Binary Relevance for multilabel classification
#classifier = BinaryRelevance(knn)
classifier = knn

# Fit the model
classifier.fit(X_train.to_numpy(), y_train)

# Predict on the test set
y_pred = classifier.predict(X_test.to_numpy())

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

hamming = hamming_loss(y_test, y_pred)
print(f'Hamming Loss: {hamming:.4f}')

Accuracy: 0.2042
Hamming Loss: 0.1581


In [68]:
# Initialize MLkNN classifier
classifier = MLkNN(k=5)

# Fit the model
classifier.fit(np.array(X_train), np.array(y_train))

# Predict on the test set
y_pred = classifier.predict(X_test.to_numpy())

# Evaluate the model using Hamming loss
hamming = hamming_loss(y_test, y_pred)
print(f'Hamming Loss: {hamming:.4f}')

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

Hamming Loss: 0.1578
Accuracy: 0.2016


## ChianClassifer

In [69]:
from sklearn.multioutput import ClassifierChain

## 1. Understanding Label Dependencies:

    Substance Use: Drugs like "Coke" (Cocaine) and "Crack" (Crack Cocaine) might be related.
    Party Drugs: "Ecstasy", "LSD", "Cannabis", "Mushrooms" might correlate.
    Common Use: "Alcohol", "Nicotine", and "Caffeine" are legal and commonly used, possibly indicating less correlation with illegal drugs.
    Stimulants: "Meth", "Coke", "Amphet" (Amphetamines) might be grouped.
    Psychedelics: "LSD", "Mushrooms", "Ketamine" could be placed near each other.

## 2. Proposed Order:

> Based on these correlations, a possible order could be:

    Caff (Caffeine): Commonly used stimulant, often independent of illicit drug use.
    Nicotine: Another common substance, often related to legal use.
    Alcohol: Widely used, potentially correlated with social/party drugs.
    Cannabis: Often used in combination with both legal substances and party drugs.
    Ecstasy: Party drug, possibly correlated with other recreational drugs.
    LSD: Psychedelic, often used in similar contexts as Ecstasy.
    Mushrooms: Psychedelic, related to LSD.
    Ketamine: Used recreationally, might follow other party drugs.
    Amphet: Stimulant, related to Meth and other uppers.
    Meth: Potent stimulant, likely correlated with other stimulants.
    Coke: Another stimulant, similar to Meth and Crack.
    Crack: Often related to Coke, might follow it.
    Benzos: Depressant, might be used with stimulants or to counteract them.
    Heroin: Potent depressant, often correlated with high-risk substance use.
    Legalh (Legal Highs): Synthetic drugs, might follow illicit ones.
    VSA (Volatile Substance Abuse): Inhalants, possibly correlated with high-risk behaviors.
    Choc (Chocolate): Less commonly related to other substances.
    Amyl: Nitrites, sometimes used recreationally but less common.

In [130]:
# Initialize a base classifier
#base_classifier = LogisticRegression(solver='lbfgs', max_iter=1000)
base_classifier = KNeighborsClassifier(n_neighbors=5)

index_list = [4, 16, 0, 5, 9, 13, 15, 11, 1, 14, 7, 8, 3, 10, 12, 17, 6, 2]

# Initialize the Classifier Chain
chain_classifier = ClassifierChain(base_classifier, order=index_list, random_state=42)

# Fit the model
chain_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = chain_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
hamming = hamming_loss(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Hamming Loss: {hamming:.4f}')

Accuracy: 0.1963
Hamming Loss: 0.1615


## MLP

In [110]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size, hidden_units):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_units)
        self.layer2 = nn.Linear(hidden_units, hidden_units)
        self.output = nn.Linear(hidden_units, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = torch.relu(x)
        x = self.layer2(x)
        x = torch.relu(x)
        x = self.output(x)
        x = torch.sigmoid(x)
        return x
    

In [111]:
X_train_tensor = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)

X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

In [112]:
X_train_tensor.dtype, y_train_tensor.dtype

(torch.float32, torch.float32)

In [113]:
y_test_tensor

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 1., 1., 0.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 0.],
        [1., 1., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 1., 0.]])

In [114]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32)

In [115]:
next(iter(train_dataset))[0].dtype

torch.float32

In [116]:
next(iter(train_dataloader))[0].dtype, next(iter(train_dataloader))[1].dtype

(torch.float32, torch.float32)

In [117]:
input_shape = X_train.shape[1]
output_shape = y_train.shape[1]

In [118]:
model = MLP(input_shape, output_shape, 30)

In [119]:
model

MLP(
  (layer1): Linear(in_features=16, out_features=30, bias=True)
  (layer2): Linear(in_features=30, out_features=30, bias=True)
  (output): Linear(in_features=30, out_features=18, bias=True)
)

In [120]:
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

In [121]:
def accuracy(y_true, y_pred):
    y_pred = y_pred.round()
    correct = (y_pred == y_true).float()  # Get a tensor of 1s and 0s
    acc = correct.sum() / correct.numel()  # Mean of correct predictions
    return acc

In [122]:
torch.manual_seed(42)
start = timer()

epochs = 100

for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch} \n ---------")

    train_loss, train_acc = 0, 0

    for batch, (X, y) in enumerate(train_dataloader):
        y_pred = model(X)

        loss = loss_fn(y_pred, y)

        train_acc += accuracy(y, y_pred)

        train_loss += loss

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        if batch % 10 == 0:
            print(f"looked at {batch * len(X)} / {len(train_dataloader.dataset)} samples")

    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)

    test_loss, test_acc = 0, 0

    model.eval()

    with torch.inference_mode():
        for X, y, in test_dataloader:

            test_pred = model(X)

            test_acc += accuracy(y, test_pred)
            test_loss += loss_fn(test_pred, y)

        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader)

    print(f"\ntrain loss: {train_loss} | train acc: {train_acc} | val loss: {test_loss} | val acc: {test_acc}\n")

            
end = timer()
print_train_time(start, end)

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0 
 ---------
looked at 0 / 1508 samples
looked at 320 / 1508 samples
looked at 640 / 1508 samples
looked at 960 / 1508 samples
looked at 1280 / 1508 samples

train loss: 0.5967720150947571 | train acc: 0.7168330550193787 | val loss: 0.47205042839050293 | val acc: 0.8201272487640381

Epoch: 1 
 ---------
looked at 0 / 1508 samples
looked at 320 / 1508 samples
looked at 640 / 1508 samples
looked at 960 / 1508 samples
looked at 1280 / 1508 samples

train loss: 0.4106966257095337 | train acc: 0.831163227558136 | val loss: 0.3853749930858612 | val acc: 0.8307870030403137

Epoch: 2 
 ---------
looked at 0 / 1508 samples
looked at 320 / 1508 samples
looked at 640 / 1508 samples
looked at 960 / 1508 samples
looked at 1280 / 1508 samples

train loss: 0.3716700077056885 | train acc: 0.8414351344108582 | val loss: 0.36188456416130066 | val acc: 0.8364872932434082

Epoch: 3 
 ---------
looked at 0 / 1508 samples
looked at 320 / 1508 samples
looked at 640 / 1508 samples
looked at 960 / 1508

In [124]:
model.eval()

with torch.inference_mode():

    total_h_loss = 0
    total_acc = 0
    for X, y in test_dataloader:
        y_pred = model(X)
        acc = accuracy(y, y_pred)
        h_loss = hamming_loss(y, y_pred.round())
        total_h_loss += h_loss
        total_acc += acc
    print(total_h_loss/ len(test_dataloader))
    print(total_acc / len(test_dataloader))

    total_h_loss = 0
    total_acc = 0
    for X, y in train_dataloader:
        y_pred = model(X)
        acc = accuracy(y, y_pred)
        h_loss = hamming_loss(y, y_pred.round())
        total_h_loss += h_loss
        total_acc += acc
    print(total_h_loss/ len(train_dataloader))
    print(total_acc / len(train_dataloader))

0.15020833333333333
tensor(0.8498)
0.12532552083333337
tensor(0.8747)
