In [None]:
pip install transformers

In [None]:
pip install torch

In [None]:
pip install sentencepiece

In [1]:
#testing torch
import torch
print(torch.__version__)
print(torch.cuda.is_available())  # Check if CUDA is available (if you have a GPU)


2.4.0
False


In [None]:
#testing transformers
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
result = classifier("I love using transformers!")
print(result)

In [None]:
#testing transformers second time
from transformers import pipeline

# Initialize the sentiment-analysis pipeline
classifier = pipeline("sentiment-analysis")

# Example texts
texts = [
    "I love studying Computer Science!",
    "I am not happy with the current situation.",
    "The movie was fantastic!",
    "I'm feeling a bit sad today."
]

# Analyze the sentiment of each text
for text in texts:
    result = classifier(text)[0]
    print(f"Text: {text}")
    print(f"Sentiment: {result['label']}, Score: {result['score']:.4f}")
    print("-" * 40)


In [None]:
#trying paraphase with pretrained model from transformers
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# Load the Pegasus model and tokenizer fine-tuned for paraphrasing
model_name = "tuner007/pegasus_paraphrase"
model = PegasusForConditionalGeneration.from_pretrained(model_name)
tokenizer = PegasusTokenizer.from_pretrained(model_name)

def paraphrase(text, max_length=50, num_beams=10, temperature=1.5):
    # Tokenize and prepare the input text
    input_text = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    
    # Generate paraphrased text using the model
    outputs = model.generate(
        **input_text,
        max_length=max_length,
        num_beams=num_beams,
        temperature=temperature,
        early_stopping=True,
        do_sample=True
    )
    
    # Decode the output to text
    paraphrased_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased_text

# Main loop to ask for user input and provide paraphrases
while True:
    # Ask the user for a sentence to paraphrase
    user_input = input("Enter a sentence to paraphrase (or type 'exit' to quit): ")
    
    # Exit condition
    if user_input.lower() == 'exit':
        break
    
    # Paraphrase the user input
    paraphrased_output = paraphrase(user_input, max_length=60, num_beams=10, temperature=1.5)
    
    # Display the original and paraphrased sentences
    print(f"Original: {user_input}")
    print(f"Paraphrased: {paraphrased_output}\n")


In [None]:
pip install transformers datasets torch

In [None]:
pip install accelerate -U

In [None]:
# paraphasing and training the model from the csv file
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import pandas as pd

# Load and prepare the dataset
df = pd.read_csv("Dataset.csv")
dataset = Dataset.from_pandas(df)

# Tokenize the dataset
model_name = "tuner007/pegasus_paraphrase"
tokenizer = PegasusTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    input_texts = tokenizer(examples['original_sentence'], truncation=True, padding="longest", return_tensors="pt")
    target_texts = tokenizer(examples['paraphrased_sentence'], truncation=True, padding="longest", return_tensors="pt")
    
    # Return input_ids and attention_mask for inputs, and input_ids as labels for targets
    return {
        'input_ids': input_texts['input_ids'].squeeze(),
        'attention_mask': input_texts['attention_mask'].squeeze(),
        'labels': target_texts['input_ids'].squeeze()
    }

# Apply tokenization to the dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",  # Use "epoch" for both strategies
    save_strategy="epoch",  # Match this with eval_strategy
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="loss"
)

#Initialize the Trainer
model = PegasusForConditionalGeneration.from_pretrained(model_name)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    eval_dataset=tokenized_datasets,
)

# Train the model
trainer.train()

# Save the fine-tuned model with safe serialization disabled
model.save_pretrained("./fine_tuned_pegasus_v2", safe_serialization=False)
tokenizer.save_pretrained("./fine_tuned_pegasus_v2")

# Evaluating the model
fine_tuned_model = PegasusForConditionalGeneration.from_pretrained("./fine_tuned_pegasus_v2")
fine_tuned_tokenizer = PegasusTokenizer.from_pretrained("./fine_tuned_pegasus_v2")

def paraphrase(text, model=fine_tuned_model, tokenizer=fine_tuned_tokenizer, max_length=50, num_beams=10, temperature=1.5):
    input_text = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    outputs = model.generate(
        **input_text,
        max_length=max_length,
        num_beams=num_beams,
        temperature=temperature,
        early_stopping=True,
        do_sample=True
    )
    paraphrased_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased_text

#paraphrasing function interactively
while True:
    user_input = input("Enter a sentence to paraphrase (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    paraphrased_output = paraphrase(user_input)
    print("Paraphrased Output:", paraphrased_output)


In [None]:
# using the trained model from above
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# Loading the fine-tuned model and tokenizer
fine_tuned_model = PegasusForConditionalGeneration.from_pretrained("./fine_tuned_pegasus_v2")
fine_tuned_tokenizer = PegasusTokenizer.from_pretrained("./fine_tuned_pegasus_v2")

#paraphrasing function
def paraphrase(text, model=fine_tuned_model, tokenizer=fine_tuned_tokenizer, max_length=50, num_beams=10, temperature=1.5):
    input_text = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    outputs = model.generate(
        **input_text,
        max_length=max_length,
        num_beams=num_beams,
        temperature=temperature,
        early_stopping=True,
        do_sample=True
    )
    paraphrased_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased_text

# Using the paraphrasing function
while True:
    sentence = input("Enter a sentence to paraphrase (or 'exit' to quit): ")
    if sentence.lower() == 'exit':
        break
    paraphrased_output = paraphrase(sentence)
    print("Paraphrased sentence:", paraphrased_output)
