In [1]:
import bentoml

import pickle
from sklearn.base import BaseEstimator

import transformers
transformers.logging.set_verbosity_error()
from torch.utils.data import DataLoader 

from transformers import AutoTokenizer
from transformers import AutoModel
from transformers import TFDistilBertModel, DistilBertTokenizerFast

from sklearn.base import BaseEstimator, TransformerMixin
import torch
import numpy as np

from scipy.sparse import csr_matrix
from tqdm import tqdm

from sklearn.linear_model import LogisticRegression

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("using device:", device)

if "disilbert_model" not in locals():
    disilbert_tokenizer =  AutoTokenizer.from_pretrained("distilbert-base-uncased")
    disilbert_model = AutoModel.from_pretrained("distilbert-base-uncased")
    handle = disilbert_model.to(device)


class BERTEmbeddings(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.tokenizer =  disilbert_tokenizer
        self.model = disilbert_tokenizer
        self.max_length = 256
        self.model_name = disilbert_model

    def fit(self, X=None, y=None):
        pass
    
    def encode(self, txt):
        return self.tokenizer(txt, max_length=self.max_length, 
                              truncation=True, padding=True, return_tensors="pt")

    def transform(self, X=None):
        dataloader = DataLoader(X, batch_size=4, shuffle=False)
        allembeds = []
        for batch in tqdm(dataloader):
            batchenc = disilbert_tokenizer(batch, max_length=256, truncation=True, padding=True, return_tensors="pt")
            input_ids = batchenc['input_ids'].to(device)
            attention_mask = batchenc['attention_mask'].to(device)
            batchout = disilbert_model(input_ids, attention_mask=attention_mask, output_hidden_states=True)
            embeds = [vec[0].cpu().detach().numpy() for vec in batchout[1][-1]]
            allembeds.extend(embeds)
        return csr_matrix(allembeds)


class BigPappy(BaseEstimator):
    def __init__(self):
        # load
        self.bert = BERTEmbeddings()
        
        with open('domain_cls.pickle', 'rb') as handle:
            self.pipe_domain = pickle.load(handle)

        self.domain_model_pipe = []
        for i in range(6):
            with open(f'model_{i}.pickle', 'rb') as handle:
                p=pickle.load(handle)
                self.domain_model_pipe.append(p)

    def predictw(self,  X_test):
        domain_pred = self.pipe_domain.predict(X_test)
        ypred = []
        for i,dpred in enumerate(domain_pred):
            model = self.domain_model_pipe[dpred] 
            ypred.append(model.predict(X_test[i:i+1])[0])
        return ypred, domain_pred
        
    def predict(self,  X_raw):
        X_test = self.bert.transform(X_raw).toarray()
        return self.predictw(X_test)

using device: cuda


In [3]:
bp = BigPappy()
bp.predict(["A hard story to tell. by EeffingT I loved this film. It is heartbreaking without a doubt, but anyone who has ever loved someone whose life fizzled out slowly like Alice’s will understand. It’s such a hard thing to see someone you love essentially die before they actually leave this earth, because the person you loved no longer exists. It’s so honest and real, and portrays how real families are changed when tragedy strikes. I loved how they ended with the line from Angels in America. So fitting especially given that the co-creator of this movie passed away before his time due to ALS. Julianna Moore really did deserve the Oscar for this film. Kristen Stewart should have won an award as well. ugh by serrata so, souls are going to form a net with hands grasping ankles and heal the ozone layer? and that’s “love”? ugh. tragic story by littleRedFiat Most of the characters in this movie, other than Julianne Moore and Kristen Stewart, were like placeholders. Not the fault of the actors but of the overall story, or lack thereof, I felt. The scenes between Julianne Moore and Kristen Stewart were wrenching, because it felt like there was some kind of real relationship there—the rest was just sketched in. The sketchiness and lack of depth that the rest of the story and characters had detracted from the power of the story of the main character, which is too bad, because I think the movie serves to bring the topic of Alzheimer’s disease to a larger audience. But I think as a movie, overall it didn’t quite succeed. Viewers Also Bought"])

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 65.85it/s]


([0], array([5]))