In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer , AutoModelForSeq2SeqLM , GenerationConfig , pipeline
from peft import PeftModel
import torch
from gtts import gTTS
import os
from datasets import load_dataset
from gtts import gTTS

In [2]:
id2label = {0: "Negative", 1: "Positive"}
label2id = {"Negative":0, "Positive":1}

In [3]:
dataset = load_dataset("yelp_review_full")

In [4]:
# Load the base model
base_model_checkpoint = "distilbert-base-uncased"
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained("Model")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
summarization_model_name = "sshleifer/distilbart-cnn-12-6"
summarization_model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name).to('cuda')
summarization_tokenizer = AutoTokenizer.from_pretrained(summarization_model_name)

In [6]:
# Load the PEFT model with the saved LoRA layers
model = PeftModel.from_pretrained(base_model, "Model")

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0-5): 6 x TransformerBlock(
              (attention): MultiHeadSelfAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.01, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=76

In [7]:
generation_config = GenerationConfig(max_new_tokens=250, do_sample=True, temperature=0.74)

In [None]:
text_list = [
    "That was a good movie.", 
    "i liked that movie.",
    "The camera quality is disappointing, with grainy pictures, slow autofocus, and washed-out colors.",
    "The product arrived late with damaged packaging, leading to a disappointing experience.",
    "it has some software issues and the memory card was crashed",
]

positives = []
negatives = []

# Step 1: Perform Sentiment Analysis and gather negative comments
for index, text in enumerate(text_list):
    text = text.strip()  # Remove any leading/trailing whitespace
    if not text:  # Skip empty lines
        continue

    inputs = tokenizer.encode(text, return_tensors="pt").to(device)
    logits = model(inputs).logits
    predictions = torch.max(logits, 1).indices
    sentiment = "Negative" if predictions.tolist()[0] == 0 else "Positive"

    if sentiment == "Positive":
        positives.append(text)
    else:
        negatives.append(f"Customer {len(negatives) + 1}: {text}")

In [None]:
# Combine all negative comments into a single block of text
if negatives:
    negative_comments = "\n".join(negatives)
else:
    negative_comments = "No negative comments."

# Human-crafted summary
human_summary = "Customers are frustrated with the poor battery life, subpar camera quality, and issues with shipping and packaging, it has software issues and memory size is small , leading to an overall disappointing experience with the product."

# Prompt creation for summarization
prompt = f"""
Summarize the following conversation in a more abstract way, focusing on the overall sentiment and key points.

{negative_comments}

Summary:
{human_summary}
"""

In [None]:
# Tokenize input for summarization
inputs = summarization_tokenizer(prompt, return_tensors='pt').to(device)

# Generate output from the summarization model with adjusted temperature and top-p sampling
output = summarization_tokenizer.decode(
    summarization_model.generate(
        inputs["input_ids"],
        generation_config=generation_config     
    )[0], 
    skip_special_tokens=True
)

num_positive = len(positives)
num_negative = len(negatives)


tts_text = f"The model-generated summary is: {output}. There are {num_positive} positive and {num_negative} negative comments."

In [17]:
# Generate the speech using gTTS
#tts = gTTS(tts_text, lang='en')

# Save the speech to a file
#tts.save('model_summary_audio.mp3')

# Optionally, play the speech directly
#os.system("mpg321 model_summary_audio.mp3")

dash_line = '-' * 100
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')
print(dash_line)

----------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation in a more abstract way, focusing on the overall sentiment and key points.

Customer 1: The camera quality is disappointing, with grainy pictures, slow autofocus, and washed-out colors.
Customer 2: The product arrived late with damaged packaging, leading to a disappointing experience.
Customer 3: it has some software issues and the memory card was crashed

Summary:
Customers are frustrated with the poor battery life, subpar camera quality, and issues with shipping and packaging, it has software issues and memory size is small , leading to an overall disappointing experience with the product.

----------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
Customers are frustrated with the poor battery life, subpar camera quality, and issues with shipping and packaging, it has 

In [18]:
# Assuming the model and tokenizer are already loaded and moved to GPU
model.to('cuda')

positives_comments = []
negatives_comments = []
all_comments = []

In [19]:
# Define the list of sentences for sentiment analysis
t_list = [
    "That was a good movie.", 
    "i liked that movie.",
    "The camera quality is disappointing, with grainy pictures, slow autofocus, and washed-out colors.",
    "The product arrived late with damaged packaging, leading to a disappointing experience.",
    "it has some software issues and the memory card was crashed",
]

print("Trained model predictions:")
print("----------------------------")

# Prepare the text that will be converted to speech
tts_text = ""

Trained model predictions:
----------------------------


In [20]:
# Perform sentiment analysis on each sentence
for text in t_list:
    # Tokenize the sentence and move it to GPU
    inputs = tokenizer.encode(text, return_tensors="pt").to("cuda")

    # Get the model's output logits
    logits = model(inputs).logits

    # Get the index of the highest logit (the predicted sentiment class)
    predictions = torch.max(logits, 1).indices

    # Convert the prediction index to the corresponding label
    sentiment_label = id2label[predictions.tolist()[0]]
    
    
    # Print the sentence along with its predicted sentiment
    result_text = f"{text} - {sentiment_label}"
    print(result_text)

    all_comments.append(result_text)
    
    if sentiment_label == "Positive":
        positives_comments.append(sentiment_label)
    else:
        negatives_comments.append(sentiment_label)

    # Append the result to the TTS text
    tts_text += result_text + ". "

That was a good movie. - Positive
i liked that movie. - Positive
The camera quality is disappointing, with grainy pictures, slow autofocus, and washed-out colors. - Negative
The product arrived late with damaged packaging, leading to a disappointing experience. - Negative
it has some software issues and the memory card was crashed - Negative


In [None]:
rate = (len(positives_comments) / len(all_comments) * 100)
rate_text = f"{int(rate)}% of the users liked this product"
tts_text += rate_text

# Convert the text to speech using gTTS
#tts = gTTS(tts_text, lang='en')

# Save the speech to an MP3 file
#tts.save('sentiment_analysis_results.mp3')

# Optionally, play the speech directly (this command works on Unix-based systems)
#os.system("mpg321 sentiment_analysis_results.mp3")

# If you are on Windows, you can play the file with the default media player
# os.startfile('sentiment_analysis_results.mp3')

In [20]:
generation_config = GenerationConfig(max_new_tokens=200, do_sample=True, temperature=0.76)

In [22]:
# Self-made dialogue and summary
self_made_dialogue = """
Person A: Hey, have you finished the report for our project?
Person B: Not yet, I'm still working on it. I should have it done by tomorrow.
Person A: Great! Do you need any help with it?
Person B: I think I'm good, but I'll let you know if I run into any issues. Thanks!.
"""

self_made_summary = "Person B is still working on the project report and expects to finish it by tomorrow. Person A offers help, but Person B doesn't need it at the moment."

# Prompt creation
prompt = f"""
Summarize the following conversation.

{self_made_dialogue}

Summary:
"""
#{self_made_summary}

# Tokenize input
inputs = summarization_tokenizer(prompt, return_tensors='pt').to("cuda")

# Generate output from the model
output = summarization_tokenizer.decode(
    summarization_model.generate(
        inputs["input_ids"], 
        generation_config=generation_config
    )[0], 
    skip_special_tokens=True
)

# Print results
dash_line = '-' * 100
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{self_made_summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

----------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.


Person A: Hey, have you finished the report for our project?
Person B: Not yet, I'm still working on it. I should have it done by tomorrow.
Person A: Great! Do you need any help with it?
Person B: I think I'm good, but I'll let you know if I run into any issues. Thanks!.


Summary:

----------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
Person B is still working on the project report and expects to finish it by tomorrow. Person A offers help, but Person B doesn't need it at the moment.

----------------------------------------------------------------------------------------------------
MODEL GENERATION - ZERO SHOT:
 The conversation on a project report is called "a project report" Human constructions may be critical of the project's co-workers. The team members di