##Downloads & Imports

In [None]:
%%capture
!pip install -U --no-cache-dir gdown --pre
!gdown --id 1b6Vu_yh1eAbJinvp63P67VQ6Sn7pcrIy&export=download
!gdown --id 1TzCcl3-V_pkk7kktHmwELcu4Cmfv17Pn&export=download
!gdown --id 1f4xhqYUxMdRKVzxGpYL0j_l5ELGdj9Wq&export=download
!gdown --id 1qomdb9MzBQqKrKZiDuLD1UsUBwGTwL7H&export=download
!gdown --id 1ovjoOMZmQ2GV9dVxRB4pHA63xCeSgQNG&export=download
!gdown --id 17SKe_98cC3yk-W3Hi-jVV1RVqlmqMeaB&export=download
!pip install py-readability-metrics
!pip install detoxify
!pip install -U sentence-transformers

In [None]:
import pandas as pd
import numpy as np
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize
from readability import Readability
from textblob import TextBlob
import xml.etree.ElementTree as ET
from detoxify import Detoxify
from sentence_transformers import SentenceTransformer
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


Moving 0 files to the new cache system


0it [00:00, ?it/s]

## Feature Extraction

In [None]:
example = "It depends on what people want. If you personally NEED personal contact and proximity, then yes, obviously long distance does not work for you. But if both parties don't need that, then long distance might be the factor that actually makes the relationship *possible.* Not everyone is the same. Things that are absolutely necessary for you are not even desired by other people. If you need sexual contact in a relationship, then going without is a big deal. But for people who's fetishes can be done online (masturbation, voyeurism, talking dirty, phone sex) the distance is not a problem. Some relationships don't even want or need sex of any kind. They might simply NOT have a sex drive, but still crave companionship. TL;DR: People are very different and have different needs. Just because many people can't make long distance relationships work, does not mean that all people can't make long distance relationships work."

Statistics Features

In [None]:
def extract_statistics_features(text):
  tokenize_sentences = sent_tokenize(text)
  tokenize_words = word_tokenize(text)

  num_of_words_per_suntentce_list = []
  for sent in tokenize_sentences:
    num_of_words_per_suntentce_list.append(len(word_tokenize(sent)))

  num_of_words_per_suntentce_list = np.array(num_of_words_per_suntentce_list)
  
  num_sentences = len(tokenize_sentences)
  num_words = len(tokenize_words)
  num_unique_words = len(set(tokenize_words))
  mean_words = np.mean(num_of_words_per_suntentce_list)
  std_words = np.std(num_of_words_per_suntentce_list)

  return [num_sentences, num_words, num_unique_words, mean_words, std_words]

statistics_features = ['num_sentences', 'num_words', 'num_unique_words', 'mean_words', 'std_words']
assert len(extract_statistics_features(example)) == len(statistics_features)

Special Characters Count

In [None]:
special_chars = ["?", "!", ":)", ":(", ";)" "*", '"', "#", "TL;DR", "~~"]
special_chars.remove(':(')
special_chars.remove(';)*')

def get_features_special_chars(text):
  features = [text.count(char) for char in special_chars]
  return features

assert len(get_features_special_chars(example)) == len(special_chars)

Sentiment Analysis (textblob)

In [None]:
def get_stantiment_analysis(text):
  tb = TextBlob(text)
  return [tb.sentiment.polarity, tb.sentiment.subjectivity]

santiment_features = ["polarity", "subjectivity"]
assert len(get_stantiment_analysis(example)) == len(santiment_features)

Discourse Markers (DiMLex-Eng)

In [None]:
dimlex_xml = r"/content/DiMLex-Eng.xml"

tree = ET.parse(dimlex_xml)
root = tree.getroot()

# xml words appear as: <entry id="1" word="once">
word_list_dimlex = []
for entry in root.findall('entry'):
  word = entry.get("word")
  word_list_dimlex.append(word)

# print(word_list_dimlex)
# print(f"# of words: {len(word_list_dimlex)}")

def get_features_from_dimlex(text):
  lower_text = text.lower()
  features = [1 if word in lower_text else 0 for word in word_list_dimlex]
  return features

discourse_features = [f"dm{x} - {word_list_dimlex[x]}" for x in range(len(word_list_dimlex))]
assert len(get_features_from_dimlex(example)) == len(discourse_features)

Detoxify

In [None]:
detoxcifier = Detoxify('original')

def get_detoxify_features(text):
  results = detoxcifier.predict(text)
  return list(results.values())

ditoxify_features = ["toxicity", "severe_toxicity", "obscene", "threat", "insult", "identity_attack"]
assert len(get_detoxify_features(example)) == len(ditoxify_features)

Downloading: "https://github.com/unitaryai/detoxify/releases/download/v0.1-alpha/toxic_original-c1212f89.ckpt" to /root/.cache/torch/hub/checkpoints/toxic_original-c1212f89.ckpt


  0%|          | 0.00/418M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Sentence Transformer

In [None]:
SentenceTransformer_model = SentenceTransformer('all-MiniLM-L6-v2')  # 384 dimensional, Max Sequence Length:	256
EMBD_SIZE = 384

def get_sentence_embedding(text):
  sentence_embeddings = SentenceTransformer_model.encode(text)
  return sentence_embeddings.tolist()

embedding_features = [f"embd{i}" for i in range(EMBD_SIZE)]
assert len(get_sentence_embedding(example)) == len(embedding_features)

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Readability Metrics

In [None]:
def get_readability_metrics(text):
  r = Readability(text)
  metrics = []
  ease_dict = {
      "very_easy" : 1,
      "easy" : 2,
      "fairly_easy" : 3,
      "standard" : 4,
      "fairly_difficult" : 5,
      "difficult" : 6,
      "very_confusing" : 7
  }

  # Flesch-Kincaid Reading Ease - 100 words required
  try:
    f = r.flesch()
    metrics.append(f.score)
    metrics.append(ease_dict[f.ease])
  except:
    metrics.append(-1)
    metrics.append(-1)

  # Flesch-Kincaid Grade Level - 100 words required
  try:
    fk = r.flesch_kincaid()
    metrics.append(fk.score)
  except:
    metrics.append(-1)

  # Dale Chall Readability - 100 words required
  try:
    dc = r.dale_chall()
    metrics.append(dc.score)
  except:
    metrics.append(-1)

  # Gunning Fog - 100 words required
  try:
    gf = r.gunning_fog()
    metrics.append(gf.score)
  except:
    metrics.append(-1)
  
  # Automated Readability Index - 100 words required
  try:
    ari = r.ari()
    metrics.append(ari.score)
  except:
    metrics.append(-1)
  
  # Coleman Liau - 100 words required
  try:
    cl = r.coleman_liau()
    metrics.append(cl.score)
  except:
    metrics.append(-1)

  try:
    metrics.append(r.linsear_write().score)
  except:
    metrics.append(-1)

  # SMOG - 100 words required, requires 30 sentences
  try:
    s = r.smog()
    metrics.append(s.score)
  except:
    metrics.append(-1)

  # SPACHE - 100 words required
  try:
    sp = r.spache()
    metrics.append(sp.score)
  except:
    metrics.append(-1)

  return metrics

readability_features = ["fk", "fk ease", "fk grade", "dc", "gf", "ari", "cl", "linsear_write", "smog", "spache"]
assert len(get_readability_metrics(example)) == len(readability_features)

Get All Features

In [None]:
def extract_features_from_text(text):
  all_features = []
  all_features.extend(extract_statistics_features(text))
  all_features.extend(get_features_special_chars(text))
  all_features.extend(get_features_from_dimlex(text))
  all_features.extend(get_stantiment_analysis(text))
  all_features.extend(get_detoxify_features(text))
  all_features.extend(get_sentence_embedding(text))
  # all_features.extend(get_readability_metrics(text))

  return all_features

##Create Feature Based Data

In [None]:
# apply the min-max scaling in Pandas using the .min() and .max() methods
# copied from - https://towardsdatascience.com/data-normalization-with-pandas-and-scikit-learn-7c1cc6ed6475#:~:text=The%20min%2Dmax%20approach%20(often,max()%20methods.
def min_max_scaling(df, cols_to_norm):
    # copy the dataframe
    df_norm = df.copy()
    # apply min-max scaling
    for column in cols_to_norm:
        df_norm[column] = (df_norm[column] - df_norm[column].min()) / (df_norm[column].max() - df_norm[column].min())
        
    return df_norm

In [None]:
all_features_names = ["has_quote"]
all_features_names.extend(statistics_features)
all_features_names.extend(special_chars)
all_features_names.extend(discourse_features)
all_features_names.extend(santiment_features)
all_features_names.extend(ditoxify_features)
all_features_names.extend(embedding_features)
# all_features_names.remove(';)*')
# all_features_names.remove(':(')
# all_features_names.extend(readability_features)

N_FEATURES = len(all_features_names)

labels = ['Aggressive', 'AgreeBut', 'AgreeToDisagree', 'Alternative', 'Answer',
       'AttackValidity', 'BAD', 'Clarification', 'Complaint', 'Convergence',
       'CounterArgument', 'CriticalQuestion', 'DirectNo', 'DoubleVoicing',
       'Extension', 'Irrelevance', 'Moderation', 'NegTransformation',
       'Nitpicking', 'NoReasonDisagreement', 'Personal', 'Positive',
       'Repetition', 'RephraseAttack', 'RequestClarification', 'Ridicule',
       'Sarcasm', 'Softening', 'Sources', 'ViableTransformation',
       'WQualifiers']

N_LABELS = len(labels)

def extract_features_labels_from_df(df):
  # check if has quote
  has_quate = df["clean_quote"].notna().apply(lambda x: 1 if x else 0)

  #apply extract on text
  all_features = df["clean_text"].apply(extract_features_from_text)
  all_features = all_features.apply(pd.Series)
  all_features.columns = all_features_names[1:]

  #normelize statistics_features & special_chars
  cols_to_norm = statistics_features + special_chars
  all_features = min_max_scaling(all_features, cols_to_norm)

  #save to new df-x
  all_features.insert(0, "has_quote", has_quate)
  assert len(all_features.columns) == N_FEATURES

  #extract labels-y
  all_labels = df[labels]
  assert len(all_labels.columns) == N_LABELS

  #return combined df
  all_features[labels] = all_labels
  return all_features

In [None]:
# train_df = pd.read_csv('/content/train_df.csv')
# features_train = extract_features_labels_from_df(train_df)
# features_train.to_csv('/content/train_features_df.csv', index=False)

In [None]:
# test_df = pd.read_csv('/content/test_df.csv')
# features_test = extract_features_labels_from_df(test_df)
# features_test.to_csv('/content/test_features_df.csv', index=False)

##Load Datasets

In [None]:
def split_x_y(df):
  y = df[labels]
  assert len(y.columns) == N_LABELS

  x = df[all_features_names]
  assert len(x.columns) == N_FEATURES
  
  return x,y

In [None]:
!gdown --id 1_3ySVG0XZmDlXPbEyakcVa7uqhZ99X6h&export=download
!gdown --id 1gWVjPP_a2itfqBlTepNMsD2P5hkJCtAT&export=download

Downloading...
From: https://drive.google.com/uc?id=1_3ySVG0XZmDlXPbEyakcVa7uqhZ99X6h
To: /content/train_features_df.csv
100% 76.4M/76.4M [00:01<00:00, 51.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1gWVjPP_a2itfqBlTepNMsD2P5hkJCtAT
To: /content/test_features_df.csv
100% 14.3M/14.3M [00:00<00:00, 42.0MB/s]


In [None]:
from sklearn.model_selection import train_test_split
train_set = pd.read_csv('/content/train_features_df.csv')
train_set = train_set.drop(columns=[';)*', ':('])
X_train, y_train = split_x_y(train_set)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

test_set = pd.read_csv('/content/test_features_df.csv')
test_set = test_set.drop(columns=[';)*', ':('])
X_test, y_test = split_x_y(test_set)

# sklearn

In [None]:
from sklearn.base import BaseEstimator
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import ParameterGrid
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from tqdm import tqdm
from sklearn.metrics import f1_score, recall_score, classification_report, precision_score
RS=121

In [None]:
from sklearn.metrics import precision_recall_fscore_support

def score(y_true, y_pred, index):
    """Calculate precision, recall, and f1 score"""
    
    metrics = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    performance = {'precision': metrics[0], 'recall': metrics[1], 'f1': metrics[2]}
    return pd.DataFrame(performance, index=[index])

In [None]:
class ClfSwitcher(BaseEstimator):
    
    def __init__(self, estimator=RandomForestClassifier()):
        """
        A Custom BaseEstimator that can switch between classifiers.
        
        Parameters
        ----------
        estimator: sklearn object, the classifier
        """
        self.estimator = estimator
    
    def fit(self, X, y):
        self.estimator.fit(X, y)
        return self
    
    def predict(self, X):
        return self.estimator.predict(X)
    
    def predict_proba(self, X):
        return self.estimator.predict_proba(X)
    
    def score(self, X, y):
        return self.estimator.score(X, y)

In [None]:
pipeline = Pipeline([
    ('clf', ClfSwitcher())
])

In [None]:
grid = ParameterGrid({
    'clf__estimator': [
        MultiOutputClassifier(LogisticRegression(class_weight='balanced', random_state=RS), n_jobs=-1),
        MultiOutputClassifier(SGDClassifier(class_weight='balanced', random_state=RS, loss='modified_huber'), n_jobs=-1),
        MultiOutputClassifier(LinearSVC(class_weight='balanced', random_state=RS), n_jobs=-1),
        KNeighborsClassifier(n_jobs=-1),
        RandomForestClassifier(class_weight='balanced', random_state=RS, n_jobs=-1),
        # XGBClassifier(random_state=RS, n_jobs=-1),
        MultiOutputClassifier(LGBMClassifier(is_unbalance=True, random_state=RS), n_jobs=-1)
    ],
})

# models = ['logreg1', 'sgd1', 'svm1', 'knn1', 'rf1', 'xgb1', 'lgbm1']
models = ['logreg1', 'sgd1', 'svm1', 'knn1', 'rf1', 'lgbm1']

In [None]:
scores = pd.DataFrame()

for model, params in tqdm(zip(models, grid), total=len(models)):
    # print(model)
    # break
    pipeline.set_params(**params)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_val)
    performance = score(y_val, y_pred, model)
    scores = pd.concat([scores, performance])

    
scores

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
 83%|████████▎ | 5/6 [06:18<01:15, 75.68s/it]


KeyboardInterrupt: ignored

In [None]:
target_names = ['Aggressive', 'AgreeBut', 'AgreeToDisagree', 'Alternative',
       'Answer', 'AttackValidity', 'BAD', 'Clarification', 'Complaint',
       'Convergence', 'CounterArgument', 'CriticalQuestion', 'DirectNo',
       'DoubleVoicing', 'Extension', 'Irrelevance', 'Moderation',
       'NegTransformation', 'Nitpicking', 'NoReasonDisagreement', 'Personal',
       'Positive', 'Repetition', 'RephraseAttack', 'RequestClarification',
       'Ridicule', 'Sarcasm', 'Softening', 'Sources', 'ViableTransformation',
       'WQualifiers']



In [None]:
logreg = MultiOutputClassifier(LogisticRegression(class_weight='balanced', random_state=RS), n_jobs=-1)
logreg.fit(X_train, y_train)
preds = logreg.predict(X_test)
print(classification_report(y_test, preds, target_names=target_names))

                      precision    recall  f1-score   support

          Aggressive       0.06      0.36      0.10        44
            AgreeBut       0.20      0.60      0.30       109
     AgreeToDisagree       0.06      0.35      0.10        20
         Alternative       0.04      0.50      0.07        26
              Answer       0.04      0.44      0.07        16
      AttackValidity       0.09      0.40      0.15        35
                 BAD       0.01      0.13      0.02        15
       Clarification       0.11      0.50      0.19       145
           Complaint       0.16      0.59      0.25        75
         Convergence       0.13      0.47      0.20        34
     CounterArgument       0.69      0.71      0.70       953
    CriticalQuestion       0.26      0.95      0.41       155
            DirectNo       0.13      0.49      0.20        97
       DoubleVoicing       0.05      0.33      0.09        40
           Extension       0.07      0.36      0.11        28
       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
lgbm = MultiOutputClassifier(LGBMClassifier(is_unbalance=True, random_state=RS), n_jobs=-1)
lgbm.fit(X_train, y_train)
preds = lgbm.predict(X_test)
cr = classification_report(y_test, preds, target_names=target_names,output_dict=True)
print(cr)


KeyboardInterrupt: ignored

In [None]:
cr = classification_report(y_test, preds, target_names=target_names,output_dict=True)
pd.DataFrame(cr).T.to_csv('test.csv')

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
rfc = MultiOutputClassifier(RandomForestClassifier(class_weight='balanced', random_state=121, n_jobs=-1))
# rfc = RandomForestClassifier(class_weight='balanced', random_state=121, n_jobs=-1)
rfc.fit(X_train, y_train)

In [None]:
# SVM = MultiOutputClassifier(LinearSVC(class_weight=w, random_state=121), n_jobs=-1)
SVM = MultiOutputClassifier(LinearSVC(class_weight='balanced', random_state=121), n_jobs=-1)
SVM.fit(X_train, y_train)

In [None]:
rfc_pred = rfc.predict(X_test)
svm_pred = SVM.predict(X_test)
# pred[22]

In [None]:
print(classification_report(y_test, rfc_pred, target_names=target_names))

In [None]:
print(classification_report(y_test, svm_pred, target_names=target_names))

# pytorch

##Build Model

In [None]:
# run from GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# devide features to categories
n1 = len(statistics_features) +1 # statistics + has quote
n2 = len(special_chars + discourse_features) # chars & discourse markers
n3 = len(santiment_features + ditoxify_features) # sentiment & detoxify
n4 = len(embedding_features) # embedding

class CustomDataset(Dataset):
    def __init__(self, features_df, labels_df):
        self.labels = labels_df.reset_index(drop=True)
        self.features = features_df.reset_index(drop=True)
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        labels = torch.tensor(self.labels.iloc[idx])
        features = torch.tensor(self.features.iloc[idx])

        return torch.split(features, [n1, n2, n3, n4]), labels

train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)
test_dataset = CustomDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True)

# data, labels = next(iter(train_dataloader))

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.drop = nn.Dropout()

        # statistics + has quote features
        self.statistics_layer = nn.Linear(n1, (n1//2)+1)

        # chars & discourse markers features
        self.disc_layer = nn.Linear(n2, 64)

        # sentiment & detoxify features
        self.sentiment_detoxify_layer = nn.Linear(n3, n3//2)
        
        # sentence_embeddings
        self.sent_emb_layer_1 = nn.Linear(n4, 256)
        self.sent_emb_layer_2 = nn.Linear(256, 128)

        s = ((n1//2)+1)+64+(n3//2)+128
        self.all_1 = nn.Linear(s, 128)
        self.all_2 = nn.Linear(128, 64)
        self.all_3 = nn.Linear(64, 31)

    def forward(self, x):
        x1, x2, x3, x4 = x[0], x[1], x[2], x[3]
        
        x1, x2, x3, x4 = x1.to(torch.float32), x2.to(torch.float32), x3.to(torch.float32), x4.to(torch.float32)
        x1 = F.relu(self.statistics_layer(x1))
        x1 = self.drop(x1)
        
        x2 = F.relu(self.disc_layer(x2))
        x2 = self.drop(x2)

        x3 = F.relu(self.sentiment_detoxify_layer(x3))
        x3 = self.drop(x3)

        # x4 = F.relu(self.sent_emb_layer_1(x4))
        x4 = self.sent_emb_layer_1(x4)
        x4 = self.drop(x4)
        # x4 = F.relu(self.sent_emb_layer_2(x4))
        x4 = self.sent_emb_layer_2(x4)
        x4 = self.drop(x4)

        x = torch.cat((x1, x2, x3, x4), dim=1)
        x = F.relu(self.all_1(x))
        x = self.drop(x)
        x = F.relu(self.all_2(x))
        x = self.drop(x)
        x = self.all_3(x)
        # x = torch.sigmoid(self.all_3(x))
        # x = F.relu(self.all_3(x))
        # x = self.all_4(x)

        return x

In [None]:
class AsymmetricLoss(nn.Module):
    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=True):
        super(AsymmetricLoss, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """

        # Calculating Probabilities
        x_sigmoid = torch.sigmoid(x)
        xs_pos = x_sigmoid
        xs_neg = 1 - x_sigmoid

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            xs_neg = (xs_neg + self.clip).clamp(max=1)

        # Basic CE calculation
        los_pos = y * torch.log(xs_pos.clamp(min=self.eps))
        los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps))
        loss = los_pos + los_neg

        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            pt0 = xs_pos * y
            pt1 = xs_neg * (1 - y)  # pt = p if t > 0 else 1-p
            pt = pt0 + pt1
            one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y)
            one_sided_w = torch.pow(1 - pt, one_sided_gamma)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            loss *= one_sided_w

        return -loss.sum()

In [None]:
model = Model()
model = model.to(device)
# loss_fn = torch.nn.MultiLabelSoftMarginLoss()
loss_fn = AsymmetricLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# sanity check
# inputs, labels = next(iter(train_dataloader))
# inputs, labels = [input.to(device) for input in inputs], labels.to(device)
# torch.any(torch.isnan(data[3]))
# outputs = model(inputs)
# outputs
# torch.any(torch.isnan(outputs))

##Run Model

In [None]:
from tqdm import tqdm
def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in tqdm(enumerate(train_dataloader)):
        # Every data instance is an input + label pair
        inputs, labels = data
        inputs, labels = [input.to(device) for input in inputs], labels.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        # if i % 5 == 0:
        #     last_loss = running_loss / len(train_dataloader) # loss per batch
        #     print('  batch {} loss: {}'.format(i + 1, last_loss))
        #     running_loss = 0.

    return running_loss / (i + 1)

In [None]:
# Initializing in a separate cell so we can easily add more epochs to the same run

epoch_number = 0

EPOCHS = 150

best_vloss = 1_000_000.

loss_train = []
loss_val = []

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number)
    loss_train.append(avg_loss)

    # We don't need gradients on to do reporting
    model.train(False)

    running_vloss = 0.0
    for i, vdata in enumerate(val_dataloader):
        vinputs, vlabels = vdata
        vinputs, vlabels = [vinput.to(device) for vinput in vinputs], vlabels.to(device)
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
        running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    loss_val.append(avg_vloss.item())
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}'.format(epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

In [None]:
# download best model
from google.colab import files
files.download(model_path) 

In [None]:
import matplotlib.pyplot as plt

epochs = range(1,75+1)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

Evaluation

In [None]:
PATH = '/content/model_149'
eval_model = Model()
eval_model.load_state_dict(torch.load(PATH))
eval_model = eval_model.to(device)
eval_model.eval()
eval_model

In [None]:
df = pd.DataFrame()

out = []
out_labels = []

for i, tdata in enumerate(test_dataloader):
        tinputs, tlabels = tdata
        out_labels += tlabels.tolist()
        tinputs, tlabels = [tinput.to(device) for tinput in tinputs], tlabels.to(device)
        toutputs = eval_model(tinputs)
        # out += list([list(t) for t in toutputs])
        out += toutputs.tolist()
        # vloss = loss_fn(voutputs, vlabels)
        

out, out_labels
df = pd.DataFrame({'outputs': out, 'labels': out_labels})
df

In [None]:
df.to_csv('outputs.csv')