In [2]:
from transformers import pipeline

# Initialize sentiment-analysis pipeline using a pre-trained BERT model
sentiment_analyzer = pipeline("sentiment-analysis")

# Function to calculate importance score based on sentiment analysis
def analyze_query_importance(query):
    # 1. Sentiment Analysis using BERT
    sentiment = sentiment_analyzer(query)[0]
    
    label = sentiment['label']
    score = sentiment['score']

    # 2. Calculate the importance score based on sentiment
    if label == 'NEGATIVE':
        base_score = 7  # Higher base score for negative sentiment (shows dissatisfaction)
    elif label == 'POSITIVE':
        base_score = 3  # Lower score for positive sentiment
    else:
        base_score = 5  # Neutral sentiment base score
    
    # 3. Use the sentiment score confidence to adjust the importance score (e.g., increase for higher confidence)
    importance_score = base_score + (score * 3)  # Scale the score
    
    # Ensure the score stays within bounds (e.g., 0-10)
    importance_score = max(0, min(importance_score, 10))

    return round(importance_score, 2)

# Test the function with different user queries
queries = [
    "I need this report urgently, it's really important!",
    "This response is completely unacceptable. I'm very upset!",
    "Could you send me the file when you get the chance?",
    "This is a critical priority for us.",
    'Nice Job'
]

for query in queries:
    score = analyze_query_importance(query)
    print(f"Query: \"{query}\" -> Importance Score: {score}")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Query: "I need this report urgently, it's really important!" -> Importance Score: 5.99
Query: "This response is completely unacceptable. I'm very upset!" -> Importance Score: 10.0
Query: "Could you send me the file when you get the chance?" -> Importance Score: 10.0
Query: "This is a critical priority for us." -> Importance Score: 5.99
Query: "Nice Job" -> Importance Score: 6.0


In [7]:
import torch
from transformers import BertTokenizer, BertModel
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Function to extract BERT embeddings for a given query
def get_bert_embedding(query):
    inputs = tokenizer(query, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    
    # The hidden state of the last layer [batch_size, sequence_length, hidden_size]
    # Extract the embedding of the [CLS] token for sentence-level embedding
    cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze()
    
    return cls_embedding

# Function to classify sentiment and importance based on BERT embeddings
def analyze_query_importance(query, sentiment_reference, importance_reference):
    # 1. Get BERT embeddings for the input query
    query_embedding = get_bert_embedding(query)
    
    # 2. Compute similarity with reference sentiment and importance embeddings
    sentiment_sim = cosine_similarity([query_embedding], sentiment_reference)
    importance_sim = cosine_similarity([query_embedding], importance_reference)
    
    # 3. Classify sentiment based on similarity scores
    sentiment_label = 'POSITIVE' if sentiment_sim[0][1] > sentiment_sim[0][0] else 'NEGATIVE'
    sentiment_score = max(sentiment_sim[0])  # Use the max similarity as the score

    # 4. Classify importance based on similarity scores
    importance_label = 'IMPORTANT' if importance_sim[0][1] > importance_sim[0][0] else 'NOT IMPORTANT'
    importance_score = max(importance_sim[0])  # Use the max similarity as the score
    
    # 5. Calculate final importance score (combining sentiment and importance)
    base_score = 7 if sentiment_label == 'NEGATIVE' else 3
    if importance_label == 'IMPORTANT':
        base_score += 2  # Boost score for important queries
    
    # Adjust the score based on sentiment/importance confidence
    importance_score_final = base_score + (sentiment_score * 3)
    importance_score_final = max(0, min(importance_score_final, 10))  # Ensure score is between 0 and 10
    
    return round(importance_score_final, 2)

# Prepare reference embeddings for sentiment and importance classification
# These could be example phrases you know are positive/negative, important/not important
positive_query = "I am happy with the results."
negative_query = "I am very unhappy with the service."
important_query = "This is a critical priority task."
not_important_query = "No rush, take your time."

# Get BERT embeddings for reference queries
sentiment_reference = [
    get_bert_embedding(negative_query),
    get_bert_embedding(positive_query)
]

importance_reference = [
    get_bert_embedding(not_important_query),
    get_bert_embedding(important_query)
]

# Test the function with different user queries
queries = [
    "I need this report urgently, it's really important!",
    "This response is completely unacceptable. I'm very upset!",
    "Could you send me the file when you get the chance?",
    "This is a critical priority for us.",
]

for query in queries:
    score = analyze_query_importance(query, sentiment_reference, importance_reference)
    print(f"Query: \"{query}\" -> Importance Score: {score}")




Query: "I need this report urgently, it's really important!" -> Importance Score: 7.63
Query: "This response is completely unacceptable. I'm very upset!" -> Importance Score: 10
Query: "Could you send me the file when you get the chance?" -> Importance Score: 7.7
Query: "This is a critical priority for us." -> Importance Score: 7.75


In [11]:
!pip install summarizer

Collecting summarizer
  Downloading summarizer-0.0.7.tar.gz (280 kB)
     ---------------------------------------- 0.0/280.1 kB ? eta -:--:--
     -- ------------------------------------- 20.5/280.1 kB ? eta -:--:--
     ------------- ------------------------ 102.4/280.1 kB 1.5 MB/s eta 0:00:01
     ------------------------------ ------- 225.3/280.1 kB 1.7 MB/s eta 0:00:01
     -------------------------------------- 280.1/280.1 kB 1.6 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting nltk
  Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)
Installing collected packages: nltk, summarizer
  Running setup.py install for summarizer: started
  Running setup.py install for summarizer: finished with status 'done'
Successfully installed nltk-3.9.1 summarizer-0.0.7


  DEPRECATION: summarizer is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559

[notice] A new release of pip is available: 23.0.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
# Install summarizer if not installed
# pip install summarizer transformers

from summarizer import Summarizer

# The text you want to summarize
body = '''
       Scientists say they have discovered a new species of orangutans on Indonesia’s island of Sumatra.
       The population differs in several ways from the two existing orangutan species found in Sumatra and the neighboring island of Borneo.
       The orangutans were found inside North Sumatra’s Batang Toru forest, the science publication Current Biology reported.
       Researchers named the new species the Tapanuli orangutan. They say the animals are considered a new species because of genetic, skeletal and tooth differences.
       Michael Kruetzen is a geneticist with the University of Zurich who has studied the orangutans for several years. He said he was excited to be part of the unusual discovery of a new great ape in the present day. He noted that most great apes are currently considered endangered or severely endangered.
       Gorillas, chimpanzees and bonobos also belong to the great ape species.
       Orangutan – which means person of the forest in the Indonesian and Malay languages - is the world’s biggest tree-living mammal. The orange-haired animals can move easily among the trees because their arms are longer than their legs. They live more lonely lives than other great apes, spending a lot of time sleeping and eating fruit in the forest.
       The new study said fewer than 800 of the newly-described orangutans exist. Their low numbers make the group the most endangered of all the great ape species.
       They live within an area covering about 1,000 square kilometers. The population is considered highly vulnerable. That is because the environment which they depend on is greatly threatened by development.
       Researchers say if steps are not taken quickly to reduce the current and future threats, the new species could become extinct “within our lifetime.”
       Research into the new species began in 2013, when an orangutan protection group in Sumatra found an injured orangutan in an area far away from the other species. The adult male orangutan had been beaten by local villagers and died of his injuries. The complete skull was examined by researchers.
       Among the physical differences of the new species are a notably smaller head and frizzier hair. The Tapanuli orangutans also have a different diet and are found only in higher forest areas.
       There is no unified international system for recognizing new species. But to be considered, discovery claims at least require publication in a major scientific publication.
       Russell Mittermeier is head of the primate specialist group at the International Union for the Conservation of Nature. He called the finding a “remarkable discovery.” He said it puts responsibility on the Indonesian government to help the species survive.
       Matthew Nowak is one of the writers of the study. He told the Associated Press that there are three groups of the Tapanuli orangutans that are separated by non-protected land. He said forest land needs to connect the separated groups.
       In addition, the writers of the study are recommending that plans for a hydropower center in the area be stopped by the government.
       It also recommended that remaining forest in the Sumatran area where the orangutans live be protected.
       I’m Bryan Lynn.
'''

# Initialize the BERT summarizer model
bert_model = Summarizer()

# Generate the summary with a minimum length of 60 characters
bert_summary = ''.join(bert_model(body, min_length=60))

# Print the generated summary
print("BERT Summary:")
print(bert_summary)


TypeError: 'Summarizer' object is not callable