In [3]:
import pandas as pd
from datasets import load_dataset
tomatoes = load_dataset("rotten_tomatoes")

#pandas for easier control
train_df = pd.DataFrame(tomatoes["train"])
eval_df = pd.DataFrame(tomatoes["test"])

In [29]:
# zero shot using NIL
from transformers import pipeline
from sklearn.metrics import classification_report
pipe = pipeline("text-classification", model="facebook/bart-large-mnli")
candidate_labels_dict = {"negative": 0, "positive": 1}
candidate_labels = ['negative', 'positive']
predictions = pipe(eval_df.text.values.tolist(), candidate_labels)
y_pred = [candidate_labels_dict[prediction["labels"][0]] for prediction in predictions]
print(classification_report(eval_df.label, y_pred))

Ignoring args : (['negative', 'positive'],)


KeyboardInterrupt: 

In [15]:
# zero-shot, embed the label's decription
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-mpnet-base-v2')
# no train embeddings, we just compare the simliarity between evaluation and label
eval_embeddings = model.encode(eval_df.text)
# encode the labels
label_embeddings = model.encode(["A negative review", "A positive review"])

In [16]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
# calculate the sim
sim_matrix = cosine_similarity(eval_embeddings, label_embeddings)
# predict
y_pred = np.argmax(sim_matrix, axis=1)

In [18]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
# create another label embeddings
label_embeddings = model.encode(["A very negative movie review", "A very positive movie review"])
# calculate the sim
sim_matrix = cosine_similarity(eval_embeddings, label_embeddings)
# predict
y_pred = np.argmax(sim_matrix, axis=1)

In [19]:
from sklearn.metrics import classification_report
print(classification_report(eval_df.label, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.73      0.79       533
           1       0.76      0.88      0.82       533

    accuracy                           0.80      1066
   macro avg       0.81      0.80      0.80      1066
weighted avg       0.81      0.80      0.80      1066



In [4]:
# pre trained embedding
from sentence_transformers import SentenceTransformer, util
from sklearn.linear_model import LogisticRegression
model = SentenceTransformer('all-mpnet-base-v2')
# encode the data
train_embeddings = model.encode(train_df.text)
eval_embeddings = model.encode(eval_df.text)
# using logistic regression to classify
clf = LogisticRegression(random_state=42).fit(train_embeddings, train_df.label)

In [5]:
from sklearn.metrics import classification_report
y_pred = clf.predict(eval_embeddings)
print (classification_report(eval_df.label, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.86      0.85       533
           1       0.86      0.85      0.85       533

    accuracy                           0.85      1066
   macro avg       0.85      0.85      0.85      1066
weighted avg       0.85      0.85      0.85      1066



In [6]:
# import simple transformers
from simpletransformers.classification import ClassificationModel, ClassificationArgs
# train only the classifier layers' parameters
model_args = ClassificationArgs() #
model_args.train_custom_parameters_only = True
model_args.custom_parameter_groups = [
    {
        "params" : ["classifier.weight"],
        "lr" : 1e-3,#learning rate
    },
        {
            "params": ["classifier.bias"], "lr": 1e-3, "weight_decay": 0.0,
        },
]
# Initializing the pre-trained BERT model for classification
model = ClassificationModel("bert", "bert-base-cased", num_labels=2, args=model_args)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
from sklearn.metrics import f1_score
import numpy as np
# Train the model
model.train_model(train_df)
# predict and Evaluation
result, model_outputs, wrong_predictions = model.eval_model(eval_df, f1 =f1_score)
# get prediction
y_pred = np.argmax(model_outputs, axis = 1)




  0%|          | 0/8530 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/1067 [00:00<?, ?it/s]



  0%|          | 0/1066 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/134 [00:00<?, ?it/s]

In [4]:
#Test if GPU is available
import torch

if torch.cuda.is_available():
    print("GPU is available.")
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available, using CPU.")

GPU is available.
GPU Name: NVIDIA GeForce RTX 2060 with Max-Q Design


In [9]:
from sklearn.metrics import classification_report
# use report to calculate the result, eval.df and y_pred are provided above
report = classification_report(eval_df.label, y_pred) 
print(report)

              precision    recall  f1-score   support

           0       0.58      0.89      0.70       533
           1       0.77      0.35      0.48       533

    accuracy                           0.62      1066
   macro avg       0.67      0.62      0.59      1066
weighted avg       0.67      0.62      0.59      1066



In [10]:
# print the parameters in model: use state_dict()
model_parameters = model.model.state_dict()

for parameter_name in model_parameters:
    print(parameter_name)


bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.attention.self.query.weight
bert.enc

In [4]:
# openai account has some problem. I don't have a credit card to manage my pay bill.
from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a poetic assistant"},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content='In the realm of code, a concept profound,\nLies recursion, where magic is found.\nA loop within a loop, a pattern sublime,\nUnveiling the secrets of endless time.\n\nLike a mirror reflecting its own reflection,\nRecursion echoes with infinite affection.\nIt calls upon itself, a humble request,\nTo solve intricate problems, at its best.\n\nWith elegance and grace, it journeys ahead,\nExploring the depths where no other can tread.\nThe function calls itself, without a strife,\nDelving deeper into the realms of life.\n\nLike a fractal unfolding, with each iteration,\nRevealing the beauty of self-replication,\nRecursion unravels the mysteries unknown,\nBreaking down complexities, into its own.\n\nScaling down mountains of tasks in its way,\nDividing and conquering, a creative display.\nIt splits the problem, into smaller tasks,\nSolving them one by one, the code never asks.\n\nYet, caution must be heeded, in this art,\nFor a base case is needed, to not fall a