# Mahalanobis - Softmax - Trusted for IMDB, Movie Review and MNLI Datasets

In [None]:
import numpy as np
import torch

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!pip install git+https://github.com/Julien2048/NLP_Project_OOD.git --quiet
!git clone https://github.com/Julien2048/NLP_Project_OOD
!pip install -r NLP_Project_OOD/requirements.txt --quiet

In [None]:
%cd gdrive/MyDrive/NLP_Project_OOD/distilbert
!rm /usr/local/lib/python3.8/dist-packages/transformers/models/distilbert/modeling_distilbert.py
!cp modeling_distilbert.py /usr/local/lib/python3.8/dist-packages/transformers/models/distilbert/

!rm /usr/local/lib/python3.8/dist-packages/transformers/models/distilbert/__init__.py
!cp __init__.py /usr/local/lib/python3.8/dist-packages/transformers/models/distilbert/
%cd ..

%cd transformers
!rm /usr/local/lib/python3.8/dist-packages/transformers/__init__.py
!cp __init__.py /usr/local/lib/python3.8/dist-packages/transformers/
%cd ..
%cd ..
%cd ..
%cd ..

In [None]:
from nlp_project.data_loader import IMDBDataset
from nlp_project.OOD_detector import Mahalanobis
from nlp_project.metrics import ResultsOOD
from nlp_project.BERT_adaptation import DistilBertClassifier

## Load Classifier

In [None]:
# Check if a GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [None]:
model_prelogits = DistilBertClassifier(device=device, prelogits=True,training=False, path_pretrained_model="/NLP_Project_OOD/model_transformers/models_trained/model_imdb_distilbert")
model_hidden_state = DistilBertClassifier(device=device, prelogits=False, training=False, path_pretrained_model="/NLP_Project_OOD/model_transformers/models_trained/model_imdb_distilbert")

In [None]:
imdb_prelogits_train, imdb_logits_train = model_prelogits.load_prelogit_logit("imdb", "train", path="/content/gdrive/MyDrive/NLP_Project_OOD/logits-prelogits/imdb-mr-mnli/")
imdb_prelogits_test, imdb_logits_test = model_prelogits.load_prelogit_logit("imdb", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/logits-prelogits/imdb-mr-mnli/")

movie_review_prelogits_test, movie_review_logits_test = model_prelogits.load_prelogit_logit("movie_review", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/logits-prelogits/imdb-mr-mnli/")
mnli_prelogits_test, mnli_logits_test = model_prelogits.load_prelogit_logit( "mnli", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/logits-prelogits/imdb-mr-mnli/")
sst2_prelogits_test, sst2_logits_test = model_prelogits.load_prelogit_logit( "sst2", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/logits-prelogits/imdb-mr-mnli/")
rte_prelogits_test, rte_logits_test = model_prelogits.load_prelogit_logit( "rte", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/logits-prelogits/imdb-mr-mnli/")

In [None]:
imdb_hidden_state_train = model_hidden_state.load_hidden_layer("imdb", "train", path="/content/gdrive/MyDrive/NLP_Project_OOD/hidden_layer/imdb-mr-mnli/")
imdb_hidden_state_test = model_hidden_state.load_hidden_layer("imdb", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/hidden_layer/imdb-mr-mnli/")

movie_review_hidden_state_test = model_hidden_state.load_hidden_layer("movie_review", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/hidden_layer/imdb-mr-mnli/")
mnli_hidden_state_test = model_hidden_state.load_hidden_layer("mnli", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/hidden_layer/imdb-mr-mnli/")
sst2_hidden_state_test = model_hidden_state.load_hidden_layer("sst2", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/hidden_layer/imdb-mr-mnli/")
rte_hidden_state_test = model_hidden_state.load_hidden_layer("rte", "test", path="/content/gdrive/MyDrive/NLP_Project_OOD/hidden_layer/imdb-mr-mnli/")

## Computing OOD

### Mahalanobis Score

#### IMDB - Movie Review

In [None]:
get_imdb = IMDBDataset()
imdb_train_labels, imdb_test_labels = get_imdb.load_labels("/content/gdrive/MyDrive/NLP_Project_OOD/data/imdb/")

maha_imdb_mr = Mahalanobis(
    imdb_prelogits_train,
    imdb_prelogits_test,
    movie_review_prelogits_test,
    imdb_train_labels[:10000],
    substract_mean = False,
    normalize_to_unity = False,
    substract_train_distance = False,
    norm_name = "L1"
)
onehots_imdb_mr, scores_imdb_mr = maha_imdb_mr()
results_imdb_mr = ResultsOOD(onehots_imdb_mr, scores_imdb_mr)
results_imdb_mr()

#### IMDB - MNLI

In [None]:
maha_imdb_mnli = Mahalanobis(
    imdb_prelogits_train,
    imdb_prelogits_test,
    mnli_prelogits_test,
    imdb_train_labels[:10000],
    substract_mean = False,
    normalize_to_unity = False,
    substract_train_distance = False,
    norm_name = "L1"
)
onehots_imdb_mnli, scores_imdb_mnli = maha_imdb_mnli()
results_imdb_mnli = ResultsOOD(onehots_imdb_mnli, scores_imdb_mnli)
results_imdb_mnli()

#### IMDB - SST2

In [None]:
maha_imdb_sst2 = Mahalanobis(
    imdb_prelogits_train,
    imdb_prelogits_test,
    sst2_prelogits_test,
    imdb_train_labels[:10000],
    substract_mean = False,
    normalize_to_unity = False,
    substract_train_distance = False,
    norm_name = "L1"
)
onehots_imdb_sst2, scores_imdb_sst2 = maha_imdb_sst2()
results_imdb_sst2 = ResultsOOD(onehots_imdb_sst2, scores_imdb_sst2)
results_imdb_sst2()

#### IMDB - RTE

In [None]:
maha_imdb_rte = Mahalanobis(
    imdb_prelogits_train,
    imdb_prelogits_test,
    rte_prelogits_test,
    imdb_train_labels[:10000],
    substract_mean = False,
    normalize_to_unity = False,
    substract_train_distance = False,
    norm_name = "L1"
)
onehots_imdb_rte, scores_imdb_rte = maha_imdb_rte()
results_imdb_rte = ResultsOOD(onehots_imdb_rte, scores_imdb_rte)
results_imdb_rte()

### Maximum Softmax

In [None]:
def np_softmax(zs):
  exps = np.exp(zs-np.max(zs))
  return exps/np.sum(exps,axis=-1,keepdims=True)

#### IMDB - Movie_review

In [None]:
scores = np.array(
    np.concatenate([
     np.max(np_softmax(imdb_logits_test),axis=-1),
     np.max(np_softmax(movie_review_logits_test),axis=-1),
    ],axis=0)
)

onehots = np.array(
    [1]*len(imdb_logits_test)+[0]*len(movie_review_logits_test)
)

results = ResultsOOD(onehots, scores)
results()

#### IMDB - MNLI

In [None]:
scores = np.array(
    np.concatenate([
     np.max(np_softmax(imdb_logits_test),axis=-1),
     np.max(np_softmax(mnli_logits_test),axis=-1),
    ],axis=0)
)

onehots = np.array(
    [1]*len(imdb_logits_test)+[0]*len(mnli_logits_test)
)

results = ResultsOOD(onehots, scores)
results()

#### IMDB - SST2

In [None]:
scores = np.array(
    np.concatenate([
     np.max(np_softmax(imdb_logits_test),axis=-1),
     np.max(np_softmax(sst2_logits_test),axis=-1),
    ],axis=0)
)

onehots = np.array(
    [1]*len(imdb_logits_test)+[0]*len(sst2_logits_test)
)

results = ResultsOOD(onehots, scores)
results()

#### IMDB - RTE

In [None]:
scores = np.array(
    np.concatenate([
     np.max(np_softmax(imdb_logits_test),axis=-1),
     np.max(np_softmax(rte_logits_test),axis=-1),
    ],axis=0)
)

onehots = np.array(
    [1]*len(imdb_logits_test)+[0]*len(rte_logits_test)
)

results = ResultsOOD(onehots, scores)
results()