### SimCLS: A Simple Framework for Contrastive Learning of Abstractive Summarization - Anuraaga Nath

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Suvidha_Foundation_Internship

/content/drive/MyDrive/Suvidha_Foundation_Internship


In [3]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive/'

In [4]:
! kaggle datasets download -d gowrishankarp/newspaper-text-summarization-cnn-dailymail

newspaper-text-summarization-cnn-dailymail.zip: Skipping, found more recently modified local copy (use --force to force download)


In [5]:
! unzip newspaper-text-summarization-cnn-dailymail.zip

Archive:  newspaper-text-summarization-cnn-dailymail.zip
  inflating: cnn_dailymail/test.csv  
  inflating: cnn_dailymail/train.csv  
  inflating: cnn_dailymail/validation.csv  


In [6]:
# Reading the dataset

import pandas as pd

train_data= pd.read_csv('cnn_dailymail/train.csv')
validation_data = pd.read_csv('cnn_dailymail/validation.csv')


In [8]:

train_data= train_data.drop('id', axis=1).reset_index(drop=True)
validation_data = validation_data.drop('id', axis=1).reset_index(drop=True)

# Text Preprocessing

In [9]:
# defining contractions
contractions = {
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he's": "he is",
"how'd": "how did",
"how'll": "how will",
"how's": "how is",
"i'd": "i would",
"i'll": "i will",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'll": "it will",
"it's": "it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"must've": "must have",
"mustn't": "must not",
"needn't": "need not",
"oughtn't": "ought not",
"shan't": "shall not",
"sha'n't": "shall not",
"she'd": "she would",
"she'll": "she will",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"that'd": "that would",
"that's": "that is",
"there'd": "there had",
"there's": "there is",
"they'd": "they would",
"they'll": "they will",
"they're": "they are",
"they've": "they have",
"wasn't": "was not",
"we'd": "we would",
"we'll": "we will",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what'll": "what will",
"what're": "what are",
"what's": "what is",
"what've": "what have",
"where'd": "where did",
"where's": "where is",
"who'll": "who will",
"who's": "who is",
"won't": "will not",
"wouldn't": "would not",
"you'd": "you would",
"you'll": "you will",
"you're": "you are"
}



In [10]:
# Cleaning data
import re

import nltk

nltk.download('stopwords')
from nltk.corpus import stopwords

def clean_text(text, remove_stopwords=True):
    text = text.lower() # lowercasing text
    text = text.split() # splitting text
    tmp = []
    for word in text:
        if word in contractions:
            tmp.append(contractions[word])
        else:
            tmp.append(word)
    text = ' '.join(tmp)
    # removing special characters
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    text = re.sub(r'\<a href', ' ', text)
    text = re.sub(r'&amp;', '', text)
    text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
    text = re.sub(r'<br />', ' ', text)
    text = re.sub(r'\'', ' ', text)

    if remove_stopwords:
        text = text.split()
        stops = set(stopwords.words('english'))
        text = [w for w in text if w not in stops]
        text = ' '.join(text)

    return text


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [11]:
# Creating clean text and summarization

clean_summaries1 = []
for summary in train_data.highlights:
    clean_summaries1.append(clean_text(summary, remove_stopwords=False))
print('Cleaning Summaries Complete')

clean_texts1 = []
for text in train_data.article:
    clean_texts1.append(clean_text(text))
print('Cleaning Texts Complete')


del train_data

Cleaning Summaries Complete
Cleaning Texts Complete


In [12]:
# Creating clean text and summarization

clean_summaries2 = []
for summary in validation_data.highlights:
    clean_summaries2.append(clean_text(summary, remove_stopwords=False))
print('Cleaning Summaries Complete')

clean_texts2 = []
for text in validation_data.article:
    clean_texts2.append(clean_text(text))
print('Cleaning Texts Complete')


del validation_data

Cleaning Summaries Complete
Cleaning Texts Complete


In [13]:
# Adding clean texts and summarizations to a new dataframe
import numpy as np
clean_df1 = pd.DataFrame()
clean_df1['text'] = clean_texts1
clean_df1['summary'] = clean_summaries1
clean_df1['summary'].replace('', np.nan, inplace=True)
clean_df1.dropna(axis=0, inplace=True)


del clean_texts1
del clean_summaries1

In [14]:
clean_df1.to_csv('cleaned_data/clean_train.csv')

In [15]:
# Adding clean texts and summarizations to a new dataframe
import numpy as np
clean_df2 = pd.DataFrame()
clean_df2['text'] = clean_texts2
clean_df2['summary'] = clean_summaries2
clean_df2['summary'].replace('', np.nan, inplace=True)
clean_df2.dropna(axis=0, inplace=True)


del clean_texts2
del clean_summaries2

In [16]:
clean_df2.to_csv('cleaned_data/clean_val.csv')

In [17]:
import pandas as pd
train_data = pd.read_csv('cleaned_data/clean_train.csv')
val_data = pd.read_csv('cleaned_data/clean_val.csv')

In [18]:
train_data.head()

Unnamed: 0.1,Unnamed: 0,text,summary
0,0,associated press published 14 11 est 25 octobe...,bishop john folda of north dakota is taking ...
1,1,cnn ralph mata internal affairs lieutenant mia...,criminal complaint cop used his role to help ...
2,2,drunk driver killed young woman head crash che...,craig eccleston todd 27 had drunk at least t...
3,3,cnn breezy sweep pen president vladimir putin ...,nina dos santos says europe must be ready to a...
4,4,fleetwood team still 100 record sky bet league...,fleetwood top of league one after 2 0 win at s...


In [19]:
val_data.head()

Unnamed: 0.1,Unnamed: 0,text,summary
0,0,sally forrest actress dancer graced silver scr...,sally forrest an actress dancer who graced th...
1,1,middle school teacher china inked hundreds ske...,works include pictures of presidential palace ...
2,2,man convicted killing father sister former gir...,iftekhar murtaza 29 was convicted a year ago...
3,3,avid rugby fan prince harry could barely watch...,prince harry in attendance for england s crunc...
4,4,triple radio producer inundated messages prosp...,nick slater s colleagues uploaded a picture to...


# FINE TUNING BART MODEL AND SAVING IT TO ./new_model



In [20]:
!pip install git+https://github.com/huggingface/accelerate
!pip install -U accelerate
!pip install --upgrade transformers

Collecting git+https://github.com/huggingface/accelerate
  Cloning https://github.com/huggingface/accelerate to /tmp/pip-req-build-tcthf_xx
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/accelerate /tmp/pip-req-build-tcthf_xx
  Resolved https://github.com/huggingface/accelerate to commit 5b3f3b99d6aa1d71eaa3380af87a6ca6126505fd
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting huggingface-hub (from accelerate==0.25.0.dev0)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: accelerate
  Building wheel for accelerate (pyproject.toml) ... [?25l[?25hdone
  Created wheel for accelerate: filename=accelerate-0.25.0.dev0-py3-none-any.whl size=262682 sha

In [21]:
import pandas as pd
import torch
from transformers import BartTokenizer, BartForConditionalGeneration, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

# Load the pre-trained model and tokenizer
model_name = 'facebook/bart-large'
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [42]:

# Load your training data (you need to prepare it first)
train_df = pd.read_csv('cleaned_data/clean_train.csv')
eval_df = pd.read_csv('cleaned_data/clean_val.csv')


In [43]:
train_df.shape

(287112, 3)

In [24]:

# Define training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir='./output',             # Directory where model checkpoints and logs will be saved
    num_train_epochs=6,               # Total number of training epochs
    per_device_train_batch_size=2,    # Batch size per device (change based on your GPU memory)
    save_steps=500,                   # Save checkpoint every N updates steps
    save_total_limit=5,               # Maximum number of checkpoints to keep
    logging_dir='./logs',             # Directory for storing logs
    evaluation_strategy="steps",      # Evaluation strategy: "steps" or "epoch"
    eval_steps=500,                   # Evaluate every N updates steps
    learning_rate=1e-4,               # Learning rate
    warmup_steps=500,                 # Number of warmup steps
    weight_decay=0.01,                # Weight decay
    gradient_accumulation_steps=2,    # Number of gradient accumulation steps (useful for large batch sizes)
    report_to="tensorboard",          # Report metrics to TensorBoard
    load_best_model_at_end=True,       # Load the best model at the end of training
    fp16=True
)


In [25]:
# Tokenize your data and create a dataset
def tokenize_data(row):
    inputs = tokenizer(row['text'], max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(row['summary'], max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    return {
        "input_ids": inputs.input_ids[0].numpy().tolist(),  # Convert to list
        "attention_mask": inputs.attention_mask[0].numpy().tolist(),  # Convert to list
        "labels": targets.input_ids[0].numpy().tolist(),  # Convert to list
    }

In [26]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, datasets
Successfully installed datasets-2.14.6 dill-0.3.7 multiprocess-0.70.15


In [44]:
train_df.iloc[:10000, :].shape

(10000, 3)

In [45]:
train_df = train_df.iloc[:10000, :]

In [52]:
eval_df = eval_df.iloc[:10000, :]

In [47]:
from datasets import Dataset
# Apply the tokenize_data function to the DataFrame
train_df['input_ids'] = train_df.apply(lambda row: tokenize_data(row)["input_ids"], axis=1)
train_df['attention_mask'] = train_df.apply(lambda row: tokenize_data(row)["attention_mask"], axis=1)
train_df['labels'] = train_df.apply(lambda row: tokenize_data(row)["labels"], axis=1)

# Create a custom Dataset using the Hugging Face Dataset class
train_data = Dataset.from_pandas(train_df[['input_ids', 'attention_mask', 'labels']])


In [None]:
from datasets import Dataset
# Apply the tokenize_data function to the DataFrame
eval_df['input_ids'] = eval_df.apply(lambda row: tokenize_data(row)["input_ids"], axis=1)
eval_df['attention_mask'] = eval_df.apply(lambda row: tokenize_data(row)["attention_mask"], axis=1)
eval_df['labels'] = eval_df.apply(lambda row: tokenize_data(row)["labels"], axis=1)

# Create a custom Dataset using the Hugging Face Dataset class
eval_data = Dataset.from_pandas(eval_df[['input_ids', 'attention_mask', 'labels']])


In [46]:
train_df.shape

(10000, 3)

In [48]:
train_data.shape

(10000, 3)

In [None]:
eval_data.shape

In [33]:

# Create a data collator for sequence-to-sequence tasks
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Create a Trainer instance
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_data,
    eval_dataset = eval_data
)


In [34]:
torch.cuda.empty_cache()

# train

In [35]:

# Fine-tune the model
trainer.train()


Step,Training Loss,Validation Loss
500,2.8306,1.343908
1000,0.8626,1.40213
1500,0.3372,1.672092


TrainOutput(global_step=1500, training_loss=1.3434392191569011, metrics={'train_runtime': 2137.9802, 'train_samples_per_second': 2.806, 'train_steps_per_second': 0.702, 'total_flos': 1.3002627612672e+16, 'train_loss': 1.3434392191569011, 'epoch': 6.0})

# saving model


In [41]:

# # Save the trained model
tokenizer.save_pretrained('./fit_model1k')
model.save_pretrained('./fit_model1k')

In [None]:
!pwd

/content/drive/MyDrive/Suvidha_Foundation_Internship


# Testing Model using Test data

In [None]:
import pandas as pd
df = pd.read_csv('cnn_dailymail/test.csv')
df.head()

Unnamed: 0,id,article,highlights
0,92c514c913c0bdfe25341af9fd72b29db544099b,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,caabf9cbdf96eb1410295a673e953d304391bfbb,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [None]:
sample_text=df.article[1]

In [None]:
sample_summary=df.highlights[1]

In [None]:
from transformers import pipeline, AutoTokenizer

# Set the path to the local folder containing the pre-trained model and tokenizer
model_path = './new_model'

# Load the pre-trained model and tokenizer from the local folder
tokenizer = AutoTokenizer.from_pretrained(model_path)

# beam sampling
gen_kwargs = {"length_penalty": 0.7,
               "num_beams": 8,
               "diversity_penalty": 0.5,
               "max_length": 100,
              "num_beam_groups":2,
              "early_stopping": True}

pipe = pipeline("summarization", model=model_path, tokenizer=tokenizer)



In [None]:
sample_text

"A drunk teenage boy had to be rescued by security after jumping into a lions' enclosure at a zoo in western India. Rahul Kumar, 17, clambered over the enclosure fence at the\xa0Kamla Nehru Zoological Park in Ahmedabad, and began running towards the animals, shouting he would 'kill them'. Mr Kumar explained afterwards that he was drunk and 'thought I'd stand a good chance' against the predators. Next level drunk: Intoxicated Rahul Kumar, 17, climbed into the lions' enclosure at a zoo in Ahmedabad and began running towards the animals shouting 'Today I kill a lion!' Mr Kumar had been sitting near the enclosure when he suddenly made a dash for the lions, surprising zoo security. The intoxicated teenager ran towards the lions, shouting: 'Today I kill a lion or a lion kills me!' A zoo spokesman said: 'Guards had earlier spotted him close to the enclosure but had no idea he was planing to enter it. 'Fortunately, there are eight moats to cross before getting to where the lions usually are an

In [None]:
print(pipe(sample_text, **gen_kwargs)[0]['summary_text'])

rapy Rahul Kumar  17  climbed over the enclosure fence at the Kamla Nehru Zoological Park in Ahmedabad  and began running towards the lions   he was drunk at the time and was shouting   today  i kill a lion or a lion kills me  


# Using wikipedia texts as input

In [None]:
text_wiki = '''
Subhas Bose was born into wealth and privilege in a large Bengali family in Orissa during the British Raj. The early recipient of an Anglocentric education, he was sent after college to England to take the Indian Civil Service examination. He succeeded with distinction in the vital first exam but demurred at taking the routine final exam, citing nationalism as a higher calling. Returning to India in 1921, Bose joined the nationalist movement led by Mahatma Gandhi and the Indian National Congress. He followed Jawaharlal Nehru to leadership in a group within the Congress which was less keen on constitutional reform and more open to socialism.[i] Bose became Congress president in 1938. After reelection in 1939, differences arose between him and the Congress leaders, including Gandhi, over the future federation of British India and princely states, but also because discomfort had grown among the Congress leadership over Bose's negotiable attitude to non-violence, and his plans for greater powers for himself.[15] After the large majority of the Congress Working Committee members resigned in protest,[16] Bose resigned as president and was eventually ousted from the party.[17][18]

In April 1941 Bose arrived in Nazi Germany, where the leadership offered unexpected but equivocal sympathy for India's independence.[19][20] German funds were employed to open a Free India Centre in Berlin. A 3,000-strong Free India Legion was recruited from among Indian POWs captured by Erwin Rommel's Afrika Korps to serve under Bose.[21][j] Although peripheral to their main goals, the Germans inconclusively considered a land invasion of India throughout 1941. By the spring of 1942, the German army was mired in Russia and Bose became keen to move to southeast Asia, where Japan had just won quick victories.[23] Adolf Hitler during his only meeting with Bose in late May 1942 offered to arrange a submarine.[24] During this time, Bose became a father; his wife,[6][k] or companion,[25][l] Emilie Schenkl, gave birth to a baby girl.[6][m][19] Identifying strongly with the Axis powers, Bose boarded a German submarine in February 1943.[26][27] Off Madagascar, he was transferred to a Japanese submarine from which he disembarked in Japanese-held Sumatra in May 1943.[26]
'''

In [None]:
from transformers import AutoTokenizer, pipeline

model_path = './fit_model'
tokenizer = AutoTokenizer.from_pretrained(model_path)

pipe = pipeline('summarization', model=model_path, tokenizer=tokenizer)

In [None]:
gen_kwargs = {"length_penalty": 0.2,
               "num_beams": 8,
               "diversity_penalty": 0.5,
               "max_length": 150,
              "num_beam_groups":4}

# gen_kwargs = {"do_sample": True, "top_k": 50, "max_length": 150, "early_stopping":True}

print('Summarized text:', pipe(text_wiki, **gen_kwargs)[0]['summary_text'])

Summarized text: suspect subhas bose was born in a large Bengali family in Orissa during the British Raj   he joined the nationalist movement led by Mahatma Gandhi and the Indian National Congress  


# Evaluating Rouge score and similarity score

In [None]:
import pandas as pd
test_data = pd.read_csv('cnn_dailymail/test.csv')

In [None]:
from transformers import AutoTokenizer, pipeline

model_path = './fit_model'
tokenizer = AutoTokenizer.from_pretrained(model_path)

pipe = pipeline('summarization', model=model_path, tokenizer=tokenizer)

In [None]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=013e1f3980ec6bd6464b2a0b38bbd274dff52bdafd23728f019566214d963da8
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
reference_text = test_data['article'][1]
reference_summary = test_data['highlights'][1]

# beam sampling
gen_kwargs = {"length_penalty": 0.8,
               "num_beams": 8,
               "diversity_penalty": 0.5,
               "max_length": 150,
              "num_beam_groups":4}

generated_summary = pipe(reference_text)[0]['summary_text']

In [None]:
from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

scores = scorer.score(generated_summary, reference_summary)

In [None]:
scores = pd.DataFrame(scores)
scores

Unnamed: 0,rouge1,rouge2,rougeL
0,0.472222,0.2,0.333333
1,0.515152,0.21875,0.363636
2,0.492754,0.208955,0.347826


In [None]:
print(f'Reference text \n {reference_text}')
print(f'Reference summary \n{reference_summary}')
print(f'Generated summary \n{generated_summary}')

Reference text 
 A drunk teenage boy had to be rescued by security after jumping into a lions' enclosure at a zoo in western India. Rahul Kumar, 17, clambered over the enclosure fence at the Kamla Nehru Zoological Park in Ahmedabad, and began running towards the animals, shouting he would 'kill them'. Mr Kumar explained afterwards that he was drunk and 'thought I'd stand a good chance' against the predators. Next level drunk: Intoxicated Rahul Kumar, 17, climbed into the lions' enclosure at a zoo in Ahmedabad and began running towards the animals shouting 'Today I kill a lion!' Mr Kumar had been sitting near the enclosure when he suddenly made a dash for the lions, surprising zoo security. The intoxicated teenager ran towards the lions, shouting: 'Today I kill a lion or a lion kills me!' A zoo spokesman said: 'Guards had earlier spotted him close to the enclosure but had no idea he was planing to enter it. 'Fortunately, there are eight moats to cross before getting to where the lions u

In [None]:
# Cosine Similarity
import torch
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
count_matrix = vectorizer.fit_transform([generated_summary, reference_summary])

count_tensor = torch.tensor(count_matrix.toarray(), dtype=torch.float)

cos_sim = torch.nn.functional.cosine_similarity(count_tensor[0], count_tensor[1], dim=0)
print(f'cosine similarity: {cos_sim}')


cosine similarity: 0.4267045855522156


In [None]:
import platform
import accelerate
import transformers
import datasets
import torch

# Get the versions of the packages
accelerate_version = accelerate.__version__
transformers_version = transformers.__version__
datasets_version = datasets.__version__
python_version = platform.python_version()
pytorch_version = torch.__version__

print(f"accelerate version: {accelerate_version}")
print(f"transformers version: {transformers_version}")
print(f"datasets version: {datasets_version}")
print(f"Python version: {python_version}")
print(f"PyTorch version: {pytorch_version}")



accelerate version: 0.24.0.dev0
transformers version: 4.33.2
datasets version: 2.14.5
Python version: 3.10.12
PyTorch version: 2.0.1+cu118
