In [None]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments

# Load the review-response dataset
df = pd.read_csv('reviews.csv')
reviews = df['review'].tolist()
responses = df['response'].tolist()

# Initialize the GPT-3 tokenizer and language model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Encode the reviews and responses using the tokenizer
inputs = tokenizer(reviews, padding=True, truncation=True, max_length=512, return_tensors='pt')
labels = tokenizer(responses, padding=True, truncation=True, max_length=512, return_tensors='pt')

# Create a TextDataset from the encoded inputs and labels
dataset = TextDataset(inputs=inputs['input_ids'], labels=labels['input_ids'])

# Create a data collator for language modeling
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=32,
    learning_rate=5e-5,
    evaluation_strategy='epoch',
    save_total_limit=5,
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False
)

# Define the trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)

# Fine-tune the language model
trainer.train()

# Save the fine-tuned model
trainer.save_model('./fine-tuned-gpt3')

# Generate email responses using the fine-tuned model
model = GPT2LMHeadModel.from_pretrained('./fine-tuned-gpt3')
model.eval()

review = "I loved the product. It exceeded my expectations."
input_ids = tokenizer.encode(review, return_tensors='pt')
output = model.generate(input_ids=input_ids, max_length=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
response = tokenizer.decode(output[0], skip_special_tokens=True)

print(response)

In [None]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments


In [None]:
# Load the sentiment-response dataset
df = pd.read_excel('result.xlsx')
sentiments = df['sentiment'].tolist()
responses = df['response'].tolist()

In [None]:
# Initialize the GPT-3 tokenizer and language model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

In [None]:
# Encode the sentiments and responses using the tokenizer
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = '[PAD]'

inputs = tokenizer(sentiments, padding=True, truncation=True, max_length=512, return_tensors='pt')
labels = tokenizer(responses, padding=True, truncation=True, max_length=512, return_tensors='pt')

In [None]:
inputs

In [None]:
with open('inputs.txt', 'w') as f:
    for input_ids in inputs['input_ids'].tolist():
        input_text = tokenizer.decode(input_ids, skip_special_tokens=True)
        f.write(input_text + '\n')

with open('labels.txt', 'w') as f:
    for label_ids in labels['input_ids'].tolist():
        label_text = tokenizer.decode(label_ids, skip_special_tokens=True)
        f.write(label_text + '\n')


In [None]:
class LabeledTextDataset(torch.utils.data.Dataset):
    def __init__(self, tokenizer, input_file, label_file, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        # Read the input and label files
        with open(input_file, 'r', encoding='utf-8') as f:
            self.inputs = [line.strip() for line in f]
        with open(label_file, 'r', encoding='utf-8') as f:
            self.labels = [line.strip() for line in f]
        
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        # Tokenize the input and label sequences
        input_tokens = self.tokenizer(
            self.inputs[idx],
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        label_tokens = self.tokenizer(
            self.labels[idx],
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # Return the input and label tokens as a dictionary
        return {
            'input_ids': input_tokens['input_ids'].squeeze(),
            'attention_mask': input_tokens['attention_mask'].squeeze(),
            'labels': label_tokens['input_ids'].squeeze(),
        }


In [None]:
# Create the labeled dataset
dataset = LabeledTextDataset(tokenizer, 'inputs.txt', 'labels.txt', max_length=512)

# Create the data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)


In [None]:
data_collator

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments

# Load the pre-trained GPT-3 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Load the pre-trained GPT-3 model
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Load the text dataset
dataset = TextDataset(
    tokenizer=tokenizer,
    file_path='inputs.txt',
    block_size=512
)

# Create a data collator for language modeling
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, num_samples=10)

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=32,
    learning_rate=5e-5,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=5,
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False
)

# Define the trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)

# Fine-tune the language model
trainer.train()

# Save the fine-tuned model
trainer.save_model('./best_model3.pt')


In [None]:
# # Define the training arguments
# training_args = TrainingArguments(
#     output_dir='./results',
#     num_train_epochs=3,
#     per_device_train_batch_size=8,
#     per_device_eval_batch_size=8,
#     gradient_accumulation_steps=32,
#     learning_rate=5e-5,
#     evaluation_strategy='epoch',
#     save_total_limit=5,
#     load_best_model_at_end=True,
#     metric_for_best_model='eval_loss',
#     greater_is_better=False
# )
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=16,
    learning_rate=5e-5,
    evaluation_strategy='epoch',
    save_strategy='epoch',  # change to 'epoch'
    save_total_limit=5,
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False
)


# Define the trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)

# Fine-tune the language model
trainer.train()


# Save the fine-tuned model
trainer.save_model('./fine-tuned-gpt3')

# Generate email responses using the fine-tuned model
model = GPT2LMHeadModel.from_pretrained('./fine-tuned-gpt3')
model.eval()





sentiment = "Positive"
input_ids = tokenizer.encode(sentiment, return_tensors='pt')
output = model.generate(input_ids=input_ids, max_length=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
response = tokenizer.decode(output[0], skip_special_tokens=True)



In [None]:
pip install rouge_score

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from transformers import pipeline, set_seed

# def compute_perplexity(model, text):
#     tokenizer = model.tokenizer
#     input_ids = tokenizer.encode(text, return_tensors='pt')
#     loss = model(input_ids, labels=input_ids)[0]
#     perplexity = loss.exp().item()
#     return perplexity

def compute_bleu(reference, generated):
    reference = [reference.split()]
    generated = generated.split()
    weights = (0.25, 0.25, 0.25, 0.25)
    score = sentence_bleu(reference, generated, weights)
    return score

def compute_rouge(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return scores['rouge1'].fmeasure, scores['rouge2'].fmeasure, scores['rougeL'].fmeasure

# Example usage
text = "The quick brown fox jumps over the lazy dog"
model = pipeline('text-generation', model='gpt2')

generated_text = model(text, max_length=50)[0]['generated_text']

perplexity = compute_perplexity(model, generated_text)
bleu_score = compute_bleu(text, generated_text)
rouge1, rouge2, rougeL = compute_rouge(text, generated_text)

# print("Perplexity: ", perplexity)
print("BLEU score: ", bleu_score)
print("ROUGE-1: ", rouge1)
print("ROUGE-2: ", rouge2)
print("ROUGE-L: ", rougeL)



In [None]:
generated_text

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from transformers import pipeline, set_seed

# def compute_perplexity(model, text):
#     tokenizer = model.tokenizer
#     input_ids = tokenizer.encode(text, return_tensors='pt')
#     loss = model(input_ids, labels=input_ids)[0]
#     perplexity = loss.exp().item()
#     return perplexity

def compute_bleu(reference, generated):
    reference = [reference.split()]
    generated = generated.split()
    weights = (0.25, 0.25, 0.25, 0.25)
    score = sentence_bleu(reference, generated, weights)
    return score

def compute_rouge(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return scores['rouge1'].fmeasure, scores['rouge2'].fmeasure, scores['rougeL'].fmeasure

# Example usage
text = "The quick brown fox jumps over the lazy dog"
model = pipeline('text-generation', model='gpt2')

generated_text = model(text, max_length=50)[0]['generated_text']

# perplexity = compute_perplexity(model, generated_text)
bleu_score = compute_bleu(text, generated_text)
rouge1, rouge2, rougeL = compute_rouge(text, generated_text)

# print("Perplexity: ", perplexity)
print("BLEU score: ", bleu_score)
print("ROUGE-1: ", rouge1)
print("ROUGE-2: ", rouge2)
print("ROUGE-L: ", rougeL)



In [None]:
def compute_bleu(reference, generated):
    reference = [reference.split()]
    generated = generated.split()
    weights = (0.25, 0.25, 0.25, 0.25)
    score = sentence_bleu(reference, generated, weights)
    return score

def compute_rouge(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return scores['rouge1'].fmeasure, scores['rouge2'].fmeasure, scores['rougeL'].fmeasure

text="Food was good and service was excellent Thank you for such nice food"
generated_text="Are you planning a special event or celebration? Our catering service offers a wide range of delicious dishes that are perfect for any occasion. From finger foods to full-course meals, we have everything you need to make your event a success. Contact us now and let us help you create a memorable culinary experience! Best regards, Food town"
bleu_score = compute_bleu(text, generated_text)
rouge1, rouge2, rougeL = compute_rouge(text, generated_text)

print("BLEU score: ", bleu_score)
print("ROUGE-1: ", rouge1)
print("ROUGE-2: ", rouge2)
print("ROUGE-L: ", rougeL)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelWithLMHead

# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = AutoModelWithLMHead.from_pretrained('gpt2')

# Define the review and generated response
review = 'This product is terrible, it broke after just one use.'
response = 'We apologize for the inconvenience and will provide a replacement.'

# Tokenize the review and response
review_tokens = tokenizer(review, return_tensors='pt')['input_ids']
response_tokens = tokenizer(response, return_tensors='pt')['input_ids']

# Generate the perplexity scores
review_score = torch.exp(model(review_tokens)[0]).mean()
response_score = torch.exp(model(response_tokens)[0]).mean()

# Print the results
print(f'Review perplexity: {review_score:.8f}')
print(f'Response perplexity: {response_score:.2f}')


In [None]:
pip install scikit-learn

In [None]:
pip install torch

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity

review = 'This product is terrible, it broke after just one use.'
response = 'We apologize for the inconvenience and will provide a replacement.'

# Load the BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Define a function to calculate the similarity score between the review and response
def calculate_similarity_score(response, review):
    # Tokenize the review and response
    inputs = tokenizer([response, review], padding=True, truncation=True, max_length=512, return_tensors='pt')
    # Pass the inputs through the model to get the output embeddings
    outputs = model(**inputs)
    response_embedding = outputs.last_hidden_state[0][-1]
    review_embedding = outputs.last_hidden_state[0][-2]
    # Calculate the cosine similarity between the embeddings
    similarity_score = cosine_similarity([response_embedding.detach().numpy()], [review_embedding.detach().numpy()])[0][0]
    return similarity_score

r=calculate_similarity_score(response,review)


In [None]:
r

In [None]:
pip install tenserflow_hub

In [None]:
import tensorflow_hub as hub
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load the Universal Sentence Encoder model
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

# Define a function to calculate the similarity score between the review and response
def calculate_similarity_score(response, review):
    # Encode the review and response into vectors
    response_embedding = use_model([response])[0]
    review_embedding = use_model([review])[0]
    # Calculate the cosine similarity between the vectors
    similarity_score = cosine_similarity([response_embedding], [review_embedding])[0][0]
    return similarity_score
