In [2]:
!pip install transformers torch nltk rouge
!pip install rouge-score


Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=2b5493ee9ac380c06cb0bc2343f27d9546104a4d9759fa91ffb4cc54f1c34b17
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [11]:
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge import Rouge

# Load the dataset
data_path = '/content/amazon.csv.zip'
df = pd.read_csv(data_path)

# Initialize the GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Function to generate text with controlled sampling
def generate_text(prompt, max_length=150):
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
    input_length = inputs.shape[1]
    max_input_length = 1024

    # If input is too long, truncate
    if input_length > max_input_length:
        inputs = inputs[:, -max_input_length:]

    # Create an attention mask
    attention_mask = torch.ones(inputs.shape, device=device)

    # Generate text with controlled randomness (temperature, top_k, top_p)
    outputs = model.generate(
        inputs,
        attention_mask=attention_mask,
        max_new_tokens=max_length,
        num_return_sequences=1,
        do_sample=True,
        temperature=0.7,  # Experiment with higher temperature
        top_k=50,  # Adjust for more diversity
        top_p=0.95,  # Nucleus sampling
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode the generated output
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Reward function using BLEU and ROUGE for automatic scoring
def reward_function(generated_text, reference_text):
    smoothing_function = SmoothingFunction()

    # Calculate BLEU score
    bleu_score = sentence_bleu([reference_text.split()], generated_text.split(),
                               smoothing_function=smoothing_function.method1)

    # Calculate ROUGE score
    rouge = Rouge()
    rouge_scores = rouge.get_scores(generated_text, reference_text)
    rouge_l_score = rouge_scores[0]['rouge-l']['f']  # Use F1 score of ROUGE-L

    # Combine BLEU and ROUGE-L scores
    combined_score = 0.5 * bleu_score + 0.5 * rouge_l_score
    return bleu_score, rouge_l_score, combined_score

# Iterative feedback loop to improve text generation
def iterative_feedback(product_name, description, reference_text, iterations=5):
    results = []  # List to store results for the comparison table

    for i in range(iterations):
        # Revised prompt
        prompt = (
            "Example of a good product description:\n"
            "'The XYZ USB Cable offers fast charging and data transfer capabilities, compatible with various devices. Its durable design ensures longevity, while customer support is always available.'\n\n"
            f"Generate a concise product description for:\n"
            f"Product Name: {product_name}\n"
            f"Key Features: {description}\n"
            "Focus on compatibility, charging speed, durability, security, warranty, and a catchy ending."
        )

        # Generate the text
        generated_text = generate_text(prompt)

        # Display the generated text
        print(f"\nIteration {i + 1}:")
        print("Generated Text:")
        print("--------------------------------------------------")
        print(generated_text.strip())

        # Compute automatic reward score
        bleu_score, rouge_l_score, reward = reward_function(generated_text, reference_text)
        print(f"Automatic Reward (BLEU + ROUGE-L): {reward:.4f}")

        # Get manual feedback
        manual_score = float(input("Rate the generated text on a scale of 1 to 10 (higher is better): "))
        print(f"Manual Feedback Score: {manual_score:.4f}\n")

        # Store results in the list
        results.append({
            'Iteration': i + 1,
            'BLEU Score': bleu_score,
            'ROUGE-L Score': rouge_l_score,
            'Manual Score': manual_score,
            'Combined Reward': reward
        })

        # Combine manual and automatic reward
        combined_reward = (0.7 * manual_score / 10) + (0.3 * reward)
        print(f"Combined Reward (Manual + Automatic): {combined_reward:.4f}\n")

        # Placeholder for adjusting model based on feedback (in actual use, you would fine-tune the model)
        print("Adjusting model based on feedback (placeholder)\n")
        print("-" * 50)

    # Create a DataFrame from results and display it
    results_df = pd.DataFrame(results)
    print("\nComparison Table of Scores:")
    print(results_df)

# Example usage: Processing all products
for index, row in df.iterrows():
    product_name = row['product_name']
    description = row['about_product']
    reference_text = row['review_content']  # Using review as a pseudo-reference for evaluation

    print(f"\nProcessing product: {product_name}\n")
    iterative_feedback(product_name, description, reference_text)

    # Uncomment this line to process all products
    break  # Remove this break to process all rows in the dataset





Processing product: Wayona Nylon Braided USB to Lightning Fast Charging and Data Sync Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini (3 FT Pack of 1, Grey)


Iteration 1:
Generated Text:
--------------------------------------------------
Example of a good product description:
'The XYZ USB Cable offers fast charging and data transfer capabilities, compatible with various devices. Its durable design ensures longevity, while customer support is always available.'

Generate a concise product description for:
Product Name: Wayona Nylon Braided USB to Lightning Fast Charging and Data Sync Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini (3 FT Pack of 1, Grey)
Key Features: High Compatibility : Compatible With iPhone 12, 11, X/XsMax/Xr,iPhone 8/8 Plus,iPhone 7/7 Plus,iPhone 6s/6s Plus,iPhone 6/6 Plus,iPhone 5/5s/5c/se,iPad Pro,iPad Air 1/2,iPad mini 1/2/3,iPod nano7,iPod touch and more apple devices.|Fast Charge&Data Sync : It can charge an