In [11]:
import random
import torch
import transformers

# Load the T5 model and tokenizer
model_name = 'ramsrigouthamg/t5_paraphraser'
tokenizer = transformers.AutoTokenizer.from_pretrained('ramsrigouthamg/t5_paraphraser')
model = transformers.AutoModelForSeq2SeqLM.from_pretrained('ramsrigouthamg/t5_paraphraser')

# Define the paraphrase categories
categories = {
    'Formal': 'paraphrase formal: ',
    'Informal': 'paraphrase informal: ',
    'Creative': 'paraphrase creative: ',
    'Expanded': 'paraphrase expanded: ',
    'Shortened': 'paraphrase shortened: ',
}

# Define a list of prompts for each category
prompts = {
    'Formal': ['What is the best course for data science?',
               'Which is the most suitable course to take to learn data science?',
               'What is the optimal course to enroll in to start learning data science?'],
    'Informal': ['What is the best way to learn data science?',
                 'What is the easiest course to take for data science?',
                 'What is the most fun way to start learning data science?'],
    'Creative': ['What is the best way to become a data science ninja?',
                 'What is the most efficient way to become a data science wizard?',
                 'What is the quickest way to master data science?'],
    'Expanded': ['What is the best course for data science that I should take to get started?',
                 'Which course for data science would be the most helpful for me to enroll in?',
                 'What is the top-rated course for data science that I should consider taking?'],
    'Shortened': ['What is the best course to learn data science?',
                  'Which course is the best for data science?',
                  'What is the top course for learning data science?']
}

# Function to generate paraphrases for a given input sentence
def generate_paraphrases(input_sentence):
    paraphrases = {}
    input_sentence = input_sentence.strip()
    for category in categories.keys():
        prompt = random.choice(prompts[category])
        input_text = prompt.replace('data science', input_sentence)
        input_ids = tokenizer.encode(input_text, return_tensors='pt')
        output_ids = model.generate(input_ids, max_length=50, num_beams=5, early_stopping=True)
        paraphrase = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        paraphrases[category] = paraphrase
    return paraphrases

# Example usage
input_sentence = "Which course should I take to get started in data science?"
paraphrases = generate_paraphrases(input_sentence)
print("Input sentence:", input_sentence)
for category, paraphrase in paraphrases.items():
    print(f"{category} paraphrase:", paraphrase)


Input sentence: Which course should I take to get started in data science?
Formal paraphrase: What is the best course to start learning data science?
Informal paraphrase: What is the easiest course to take for data science?
Creative paraphrase: What is the best way to get started in data science?
Expanded paraphrase: What is the best course for data science?
Shortened paraphrase: Which course should I take to get started in data science?
