In [None]:
! pip install transformers simpletransformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Loading and testing original model

In [None]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from transformers import DistilBertForSequenceClassification
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd
import logging


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
# load original model

# Optional model configuration
model_args = ClassificationArgs(num_train_epochs=1)

# Create a ClassificationModel
model = ClassificationModel(
    "distilbert", "/content/drive/MyDrive/Big Data Analytics/Project/models/st_model", args=model_args#, use_cuda=False
)

In [None]:
# Preparing eval data
eval_df = pd.read_csv("/content/drive/MyDrive/Big Data Analytics/Project/splits/val.csv")
eval_df.columns = ["text", "labels"]

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df)

# print results
preds = np.argmax(model_outputs, axis=1)
print(classification_report(eval_df.labels, preds))

  0%|          | 0/39427 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4929 [00:00<?, ?it/s]

In [None]:
# preparing test data
test_df = pd.read_csv("/content/drive/MyDrive/Big Data Analytics/Project/splits/test.csv")
test_df.columns = ["text", "labels"]

# evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(test_df)

# print results
preds = np.argmax(model_outputs, axis=1)
print(classification_report(test_df.labels, preds))

## MapReduce routines for averaging models

In [None]:
def mapp(model, n):
  # divide operation
  with torch.no_grad():
    layers = model.state_dict().keys()
    for layer in layers:
      model.state_dict()[layer].data.copy_(model.state_dict()[layer].data/n)
    return model

def reduce(model1, model2):
  # add operation
  with torch.no_grad():
    added = DistilBertForSequenceClassification(config=model1.config)
    layers = model1.state_dict().keys()
    for layer in layers:
      added.state_dict()[layer].data.copy_(model1.state_dict()[layer].data + model2.state_dict()[layer].data)
    return added

## Testing averaging functions: Add then Divide

In [None]:
! rm -rf temp outputs cache_dir

In [None]:
# averaging the same model should give no difference in results
averaged_model = reduce(model.model, model.model) # add halved weights twice
averaged_model = mapp(averaged_model, 2) # divide weights by 2

# make temp directory
! mkdir temp
# save model to temp directory
model.save_model_args("temp") # using old args
model.tokenizer.save_pretrained("temp") # using old tokenizer
averaged_model.save_pretrained("temp") # saving averaged model

In [None]:
# wrapping averaged model in simpletransformers
model_args = ClassificationArgs()

# Create a ClassificationModel
averaged_model = ClassificationModel(
    "distilbert", "temp", args=model_args
)

In [None]:
# Preparing eval data
eval_df = pd.read_csv("/content/drive/MyDrive/Big Data Analytics/Project/splits/val.csv")
eval_df.columns = ["text", "labels"]

# Evaluate the model
result, model_outputs, wrong_predictions = averaged_model.eval_model(eval_df)

# print results
preds = np.argmax(model_outputs, axis=1)
print(classification_report(eval_df.labels, preds))

  0%|          | 0/39427 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4929 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.94      0.97      0.96     31975
           1       0.84      0.75      0.80      7452

    accuracy                           0.93     39427
   macro avg       0.89      0.86      0.88     39427
weighted avg       0.92      0.93      0.93     39427



In [None]:
# preparing test data
test_df = pd.read_csv("/content/drive/MyDrive/Big Data Analytics/Project/splits/test.csv")
test_df.columns = ["text", "labels"]

# evaluate the model
result, model_outputs, wrong_predictions = averaged_model.eval_model(test_df)

# print results
preds = np.argmax(model_outputs, axis=1)
print(classification_report(test_df.labels, preds))

## Testing averaging functions: Divide then Add

In [None]:
! rm -rf temp outputs cache_dir

In [None]:
# averaging the same model should give no difference in results
averaged_model = mapp(model.model, 2) # divide weights by 2
averaged_model = reduce(averaged_model, averaged_model) # add halved weights twice

# make temp directory
! mkdir temp
# save model to temp directory
model.save_model_args("temp") # using old args
model.tokenizer.save_pretrained("temp") # using old tokenizer
averaged_model.save_pretrained("temp") # saving averaged model

In [None]:
# wrapping averaged model in simpletransformers
model_args = ClassificationArgs()

# Create a ClassificationModel
averaged_model = ClassificationModel(
    "distilbert", "temp", args=model_args
)

In [None]:
# Preparing eval data
eval_df = pd.read_csv("/content/drive/MyDrive/Big Data Analytics/Project/splits/val.csv")
eval_df.columns = ["text", "labels"]

# Evaluate the model
result, model_outputs, wrong_predictions = averaged_model.eval_model(eval_df)

# print results
preds = np.argmax(model_outputs, axis=1)
print(classification_report(eval_df.labels, preds))

  0%|          | 0/39427 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4929 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.94      0.97      0.96     31975
           1       0.84      0.75      0.80      7452

    accuracy                           0.93     39427
   macro avg       0.89      0.86      0.88     39427
weighted avg       0.92      0.93      0.93     39427



In [None]:
# preparing test data
test_df = pd.read_csv("/content/drive/MyDrive/Big Data Analytics/Project/splits/test.csv")
test_df.columns = ["text", "labels"]

# evaluate the model
result, model_outputs, wrong_predictions = averaged_model.eval_model(test_df)

# print results
preds = np.argmax(model_outputs, axis=1)
print(classification_report(test_df.labels, preds))

In [None]:
! rm -rf temp outputs cache_dir