In [2]:
#!pip install nltk
#Tokenization and Sentence Splitting:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize, sent_tokenize

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Tokenize and split sentences for each response
tokenized_responses = []
sentences_per_response = []

for response in responses:
    # Tokenize into words
    words = word_tokenize(response)
    tokenized_responses.append(words)

    # Split into sentences
    sentences = sent_tokenize(response)
    sentences_per_response.append(sentences)

# Print tokenized words and split sentences for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Tokenized words:", tokenized_responses[i])
    print("Split into sentences:", sentences_per_response[i])
    print()




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Response: The flight was on time. However, the food was terrible.
Tokenized words: ['The', 'flight', 'was', 'on', 'time', '.', 'However', ',', 'the', 'food', 'was', 'terrible', '.']
Split into sentences: ['The flight was on time.', 'However, the food was terrible.']

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Tokenized words: ['It', 'was', 'crowded', 'and', 'hot', ',', 'and', 'the', 'humidity', 'was', 'awful', '.', 'The', 'gate', 'area', 'did', "n't", 'have', 'enough', 'seats', ',', 'and', 'the', 'flight', 'was', 'delayed', ',', 'making', 'customers', 'very', 'uncomfortable', '.']
Split into sentences: ['It was crowded and hot, and the humidity was awful.', "The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable."]

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will a

In [3]:
# Define positive and negative keywords
#Keyword Matching:
positive_keywords = ["comfortable", "on time", "friendly", "good service"]
negative_keywords = ["delayed", "crowded", "awful", "disappointed"]

# Function to calculate sentiment score based on keyword matching
def calculate_sentiment_score(sentence):
    sentiment_score = 0
    # Check for positive keywords
    for keyword in positive_keywords:
        if keyword in sentence:
            sentiment_score += 1  # Increment sentiment score for each positive keyword found
    # Check for negative keywords
    for keyword in negative_keywords:
        if keyword in sentence:
            sentiment_score -= 1  # Decrement sentiment score for each negative keyword found
    return sentiment_score

# Sentiment analysis using keyword matching for each response
sentiment_scores = []

for response in responses:
    # Split response into sentences
    sentences = sent_tokenize(response)
    total_score = 0
    # Calculate sentiment score for each sentence
    for sentence in sentences:
        score = calculate_sentiment_score(sentence.lower())  # Convert to lowercase for case-insensitive matching
        total_score += score
    sentiment_scores.append(total_score)

# Print sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Sentiment Score:", sentiment_scores[i])
    print()
#Positive and negative keywords are defined.
#A function calculate_sentiment_score() is created to compute the sentiment score for a given sentence based on the presence of positive and negative keywords.
#The sentiment score for each sentence is calculated and aggregated for each response.
#Finally, the sentiment scores for each response are printed. Positive sentiment scores indicate a positive sentiment, while negative scores indicate a negative sentiment.


Response: The flight was on time. However, the food was terrible.
Sentiment Score: 1

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Sentiment Score: -2

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Sentiment Score: -2

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Sentiment Score: -1

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Sentiment Score: 0



In [5]:
#Tokenization and Sentiment Analysis:
import nltk
nltk.download('vader_lexicon')
from nltk.tokenize import sent_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to calculate sentiment score using VADER
def calculate_sentiment_score(sentence):
    # Get sentiment scores for the sentence
    sentiment_scores = sid.polarity_scores(sentence)
    # Return compound score
    return sentiment_scores['compound']

# Sentiment analysis using tokenization and VADER for each response
sentiment_scores = []

for response in responses:
    # Split response into sentences
    sentences = sent_tokenize(response)
    total_score = 0
    # Calculate sentiment score for each sentence
    for sentence in sentences:
        score = calculate_sentiment_score(sentence)
        total_score += score
    sentiment_scores.append(total_score)

# Print sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Sentiment Score:", sentiment_scores[i])
    print()


Response: The flight was on time. However, the food was terrible.
Sentiment Score: -0.4767

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Sentiment Score: -1.0436999999999999

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Sentiment Score: -1.1772

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Sentiment Score: -0.1655

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Sentiment Score: -0.09519999999999995



[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [6]:
# Define positive and negative phrases or patterns
#Rule-Based Approach
positive_patterns = ["on time", "friendly staff", "comfortable flight"]
negative_patterns = ["delayed", "awful food", "uncomfortable seats"]

# Function to calculate sentiment score based on rule-based approach
def calculate_sentiment_score(sentence):
    sentiment_score = 0
    # Check for positive patterns
    for pattern in positive_patterns:
        if pattern in sentence:
            sentiment_score += 1  # Increment sentiment score for each positive pattern found
    # Check for negative patterns
    for pattern in negative_patterns:
        if pattern in sentence:
            sentiment_score -= 1  # Decrement sentiment score for each negative pattern found
    return sentiment_score

# Sentiment analysis using rule-based approach for each response
sentiment_scores = []

for response in responses:
    # Split response into sentences
    sentences = sent_tokenize(response)
    total_score = 0
    # Calculate sentiment score for each sentence
    for sentence in sentences:
        score = calculate_sentiment_score(sentence.lower())  # Convert to lowercase for case-insensitive matching
        total_score += score
    sentiment_scores.append(total_score)

# Print sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Sentiment Score:", sentiment_scores[i])
    print()


Response: The flight was on time. However, the food was terrible.
Sentiment Score: 1

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Sentiment Score: -1

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Sentiment Score: 0

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Sentiment Score: 0

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Sentiment Score: 0



In [7]:
#MachineLearning Models
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from nltk.tokenize import sent_tokenize

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Annotate each sentence with a sentiment score
annotated_data = []
for response in responses:
    sentences = sent_tokenize(response)
    # Assign a sentiment score based on the presence of positive and negative words
    for sentence in sentences:
        if "on time" in sentence or "friendly" in sentence or "comfortable" in sentence:
            sentiment_score = 1.0
        elif "delayed" in sentence or "terrible" in sentence or "awful" in sentence:
            sentiment_score = -1.0
        else:
            sentiment_score = 0.0
        annotated_data.append((sentence, sentiment_score))

# Separate sentences and sentiment scores
sentences, sentiment_scores = zip(*annotated_data)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sentences, sentiment_scores, test_size=0.2, random_state=42)

# Vectorize the sentences using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train_vectorized, y_train)

# Predict sentiment scores for test data
y_pred = model.predict(X_test_vectorized)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
#We annotate each sentence in the provided responses with a sentiment score based on the presence of positive and negative words.
#We split the annotated data into training and testing sets.
#We vectorize the sentences using TF-IDF.
#We train a linear regression model using the training data.
#We predict sentiment scores for the test data using the trained model.
#Finally, we evaluate the model's performance using mean squared error (MSE) as the evaluation metric.


Mean Squared Error: 0.40502996779082995


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from nltk.tokenize import sent_tokenize

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Annotate each sentence with aspect-specific sentiment scores
annotated_data = []
for response in responses:
    sentences = sent_tokenize(response)
    for sentence in sentences:
        # Assign sentiment scores based on specific aspects mentioned in each sentence
        aspect_sentiment_scores = {}
        if "on time" in sentence:
            aspect_sentiment_scores["punctuality"] = 1.0
        else:
            aspect_sentiment_scores["punctuality"] = 0.0
        if "comfortable" in sentence:
            aspect_sentiment_scores["comfort"] = 1.0
        else:
            aspect_sentiment_scores["comfort"] = 0.0
        if "food" in sentence or "terrible" in sentence or "awful" in sentence:
            aspect_sentiment_scores["food_quality"] = -1.0
        else:
            aspect_sentiment_scores["food_quality"] = 0.0
        annotated_data.append((sentence, aspect_sentiment_scores))

# Separate sentences and aspect-specific sentiment scores
sentences, aspect_sentiment_scores = zip(*annotated_data)

# Convert aspect-specific sentiment scores to a list of dictionaries
aspect_sentiment_scores_list = list(aspect_sentiment_scores)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sentences, aspect_sentiment_scores_list, test_size=0.2, random_state=42)

# Vectorize the sentences using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train a linear regression model for each aspect
models = {}
for aspect in ["punctuality", "comfort", "food_quality"]:
    # Extract aspect-specific sentiment scores for training data
    y_train_aspect = [scores[aspect] for scores in y_train]
    # Train a linear regression model
    model = LinearRegression()
    model.fit(X_train_vectorized, y_train_aspect)
    models[aspect] = model

# Predict aspect-specific sentiment scores for test data
y_pred = {}
for aspect, model in models.items():
    y_pred[aspect] = model.predict(X_test_vectorized)

# Evaluate the model for each aspect
mse = {}
for aspect in ["punctuality", "comfort", "food_quality"]:
    y_test_aspect = [scores[aspect] for scores in y_test]
    mse[aspect] = mean_squared_error(y_test_aspect, y_pred[aspect])
    print("Aspect:", aspect)
    print("Mean Squared Error:", mse[aspect])
    print()
#We annotate each sentence in the provided responses with aspect-specific sentiment scores based on the presence of specific aspects mentioned in each sentence.
#We split the annotated data into training and testing sets.
#We vectorize the sentences using TF-IDF.
#We train a linear regression model for each aspect using the training data.
#We predict aspect-specific sentiment scores for the test data using the trained models.
#Finally, we evaluate the models' performance for each aspect using mean squared error (MSE) as the evaluation metric.


Aspect: punctuality
Mean Squared Error: 0.3333333333333333

Aspect: comfort
Mean Squared Error: 0.0539099585215691

Aspect: food_quality
Mean Squared Error: 0.12312194330811421



In [14]:
#Agreegate Sentiment Scores
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Initialize Sentiment Intensity Analyzer (SIA)
sia = SentimentIntensityAnalyzer()

# Calculate aggregate sentiment scores for each response
aggregate_sentiment_scores = []

for response in responses:
    # Tokenize response into sentences
    sentences = sent_tokenize(response)
    # Calculate sentiment score for each sentence and average them
    total_score = sum(sia.polarity_scores(sentence)['compound'] for sentence in sentences)
    average_score = total_score / len(sentences) if len(sentences) > 0 else 0
    aggregate_sentiment_scores.append(average_score)

# Print aggregate sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Aggregate Sentiment Score:", aggregate_sentiment_scores[i])
    print()
#We import the SentimentIntensityAnalyzer from NLTK for sentiment analysis.
#We tokenize each response into sentences using NLTK's sent_tokenize function.
#We calculate the sentiment score for each sentence using the Sentiment Intensity Analyzer (SIA) and average them to get the aggregate sentiment score for the response.
#Finally, we print out the aggregate sentiment scores for each


Response: The flight was on time. However, the food was terrible.
Aggregate Sentiment Score: -0.23835

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Aggregate Sentiment Score: -0.5218499999999999

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Aggregate Sentiment Score: -0.3924

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Aggregate Sentiment Score: -0.08275

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Aggregate Sentiment Score: -0.047599999999999976



In [15]:
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Custom sentiment lexicons (word: sentiment_score)
custom_lexicons = {
    'on time': 1,
    'terrible': -1,
    'crowded': -1,
    'awful': -1,
    'uncomfortable': -1,
    'love': 1,
    'disappointed': -1
}

# Initialize sentiment score for each review
aggregate_sentiment_scores = []

# Function to calculate sentiment score for a sentence using custom lexicons
def calculate_sentiment_score(sentence):
    tokens = word_tokenize(sentence.lower())
    sentiment_score = 0
    for token in tokens:
        if token in custom_lexicons:
            sentiment_score += custom_lexicons[token]
    return sentiment_score

# Calculate aggregate sentiment score for each review
for response in responses:
    sentences = sent_tokenize(response)
    total_score = 0
    for sentence in sentences:
        total_score += calculate_sentiment_score(sentence)
    aggregate_sentiment_scores.append(total_score)

# Print aggregate sentiment scores for each review
for i, response in enumerate(responses):
    print("Response:", response)
    print("Aggregate Sentiment Score:", aggregate_sentiment_scores[i])
    print()
#We define custom sentiment lexicons with associated sentiment scores.
#We tokenize each response into sentences using NLTK's sent_tokenize function and tokenize each sentence into words using word_tokenize.
#We calculate the sentiment score for each sentence by summing the sentiment scores of the words present in the custom lexicons.
#Finally, we print out the aggregate sentiment scores for each response.


Response: The flight was on time. However, the food was terrible.
Aggregate Sentiment Score: -1

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Aggregate Sentiment Score: -3

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Aggregate Sentiment Score: -3

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Aggregate Sentiment Score: 0

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Aggregate Sentiment Score: 0



In [16]:
#Handling Negotiations and Intensifiers
from nltk.tokenize import sent_tokenize, word_tokenize

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Custom sentiment lexicons (word: sentiment_score)
custom_lexicons = {
    'on time': 1,
    'terrible': -1,
    'crowded': -1,
    'awful': -1,
    'uncomfortable': -1,
    'love': 1,
    'disappointed': -1
}

# Initialize aggregate sentiment scores for all responses
aggregate_sentiment_scores = []

# Function to calculate sentiment score for a sentence using custom lexicons and handling negation and intensifiers
def calculate_sentiment_score(sentence):
    tokens = word_tokenize(sentence.lower())
    sentiment_score = 0
    negation = False
    for token in tokens:
        if token == 'not' or token == "no":
            negation = True
        elif token in custom_lexicons:
            score = custom_lexicons[token]
            if negation:
                score *= -1  # Reverse the sentiment score for negated words
                negation = False  # Reset negation flag
            sentiment_score += score
    return sentiment_score

# Calculate aggregate sentiment score for each response
for response in responses:
    sentences = sent_tokenize(response)
    total_score = 0
    for sentence in sentences:
        total_score += calculate_sentiment_score(sentence)
    aggregate_sentiment_scores.append(total_score)

# Print aggregate sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Aggregate Sentiment Score:", aggregate_sentiment_scores[i])
    print()
#To incorporate handling of negation and intensifiers in sentiment analysis, we can modify the sentiment scoring based on the presence of negation words (e.g., "not", "no") and intensifiers (e.g., "very", "extremely"). Here's how you can implement it for all responses:
#We define custom sentiment lexicons with associated sentiment scores.
#We tokenize each response into sentences using NLTK's sent_tokenize function and tokenize each sentence into words using word_tokenize.
#We calculate the sentiment score for each sentence by summing the sentiment scores of the words present in the custom lexicons and handle negation by reversing the sentiment score of words following negation words.
#Finally, we print out the aggregate sentiment scores for each response.


Response: The flight was on time. However, the food was terrible.
Aggregate Sentiment Score: -1

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Aggregate Sentiment Score: -3

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Aggregate Sentiment Score: -3

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Aggregate Sentiment Score: 0

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Aggregate Sentiment Score: 0



In [18]:
#Contextual Analysis
import nltk
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Custom sentiment lexicons (word: sentiment_score)
custom_lexicons = {
    'on time': 1,
    'terrible': -1,
    'crowded': -1,
    'awful': -1,
    'uncomfortable': -1,
    'love': 1,
    'disappointed': -1
}

# Initialize aggregate sentiment scores for all responses
aggregate_sentiment_scores = []

# Function to calculate sentiment score for a sentence using custom lexicons and context analysis
def calculate_sentiment_score(sentence):
    tokens = word_tokenize(sentence.lower())
    tagged_tokens = nltk.pos_tag(tokens)
    sentiment_score = 0
    negation = False
    for token, tag in tagged_tokens:
        if token == 'not' or token == "no":
            negation = True
        elif token in custom_lexicons:
            score = custom_lexicons[token]
            if negation:
                score *= -1  # Reverse the sentiment score for negated words
                negation = False  # Reset negation flag
            if tag.startswith('JJ'):  # Adjective
                score *= 2  # Double the sentiment score for adjectives
            sentiment_score += score
    return sentiment_score

# Calculate aggregate sentiment score for each response
for response in responses:
    sentences = sent_tokenize(response)
    total_score = 0
    for sentence in sentences:
        total_score += calculate_sentiment_score(sentence)
    aggregate_sentiment_scores.append(total_score)

# Print aggregate sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Aggregate Sentiment Score:", aggregate_sentiment_scores[i])
    print()
#For contextual analysis, we can consider the sentiment of a word based on its context within the sentence. We'll use NLTK's Part-of-Speech (POS) tagging to identify the context of each word and adjust the sentiment score accordingly. Here's how you can implement it for all responses:
#We tokenize each response into sentences using NLTK's sent_tokenize function and tokenize each sentence into words using word_tokenize.
#We use NLTK's Part-of-Speech (POS) tagging to identify the context of each word in the sentence.
#We adjust the sentiment score based on the context of each word. Adjectives (words tagged with 'JJ') are given double the sentiment score.
#Finally, we print out the aggregate sentiment scores for each response.


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Response: The flight was on time. However, the food was terrible.
Aggregate Sentiment Score: -2

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Aggregate Sentiment Score: -5

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Aggregate Sentiment Score: -5

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Aggregate Sentiment Score: 0

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Aggregate Sentiment Score: 0



In [19]:
#Normalize Scores
from nltk.tokenize import sent_tokenize, word_tokenize

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Custom sentiment lexicons (word: sentiment_score)
custom_lexicons = {
    'on time': 1,
    'terrible': -1,
    'crowded': -1,
    'awful': -1,
    'uncomfortable': -1,
    'love': 1,
    'disappointed': -1
}

# Initialize aggregate sentiment scores for all responses
aggregate_sentiment_scores = []

# Function to calculate sentiment score for a sentence using custom lexicons
def calculate_sentiment_score(sentence):
    tokens = word_tokenize(sentence.lower())
    sentiment_score = 0
    negation = False
    for token in tokens:
        if token == 'not' or token == "no":
            negation = True
        elif token in custom_lexicons:
            score = custom_lexicons[token]
            if negation:
                score *= -1  # Reverse the sentiment score for negated words
                negation = False  # Reset negation flag
            sentiment_score += score
    return sentiment_score

# Calculate aggregate sentiment score for each response
for response in responses:
    sentences = sent_tokenize(response)
    total_score = 0
    for sentence in sentences:
        total_score += calculate_sentiment_score(sentence)
    aggregate_sentiment_scores.append(total_score)

# Normalize sentiment scores to a range between -5 and 5
min_score = min(aggregate_sentiment_scores)
max_score = max(aggregate_sentiment_scores)
normalized_scores = [((score - min_score) / (max_score - min_score)) * 10 - 5 for score in aggregate_sentiment_scores]

# Print normalized sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Normalized Sentiment Score:", normalized_scores[i])
    print()

#To normalize sentiment scores, we'll scale the aggregate sentiment scores for each response to a range between -5 and 5. Here's how you can implement it for all responses:
#We calculate the aggregate sentiment score for each response using the custom sentiment lexicons.
#We normalize the sentiment scores to a range between -5 and 5 by scaling the scores based on the minimum and maximum scores.
#Finally, we print out the normalized sentiment scores for each response.

Response: The flight was on time. However, the food was terrible.
Normalized Sentiment Score: 1.666666666666666

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Normalized Sentiment Score: -5.0

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Normalized Sentiment Score: -5.0

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Normalized Sentiment Score: 5.0

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Normalized Sentiment Score: 5.0



In [20]:
!pip install transformers
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Sample responses
responses = [
    "The flight was on time. However, the food was terrible.",
    "It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.",
    "The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.",
    "Crepes and pasta. I love United, but this trip I was disappointed.",
    "On-time departure and arrival, effective boarding. Negative: one restroom was out of order."
]

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

# Function to calculate sentiment score for a given text
def calculate_sentiment_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    scores = torch.softmax(outputs.logits, dim=1).detach().numpy()[0]
    sentiment_score = (scores.argmax().item() - 2) * scores.max().item()
    return sentiment_score

# Calculate sentiment score for each response
sentiment_scores = [calculate_sentiment_score(response) for response in responses]

# Print sentiment scores for each response
for i, response in enumerate(responses):
    print("Response:", response)
    print("Sentiment Score:", sentiment_scores[i])
    print()
#We import necessary modules from the transformers library.
#We load a pre-trained sentiment analysis model and tokenizer from Hugging Face's model hub.
#We define a function to calculate the sentiment score for a given text using the loaded model and tokenizer.
#We calculate the sentiment score for each response using the defined function.
#Finally, we print out the sentiment scores for each response.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

Response: The flight was on time. However, the food was terrible.
Sentiment Score: -0.45169350504875183

Response: It was crowded and hot, and the humidity was awful. The gate area didn't have enough seats, and the flight was delayed, making customers very uncomfortable.
Sentiment Score: -1.2442688941955566

Response: The food was awful. It looked awful and tasted awful. I will be sending a letter and will attach pictures.
Sentiment Score: -1.766735553741455

Response: Crepes and pasta. I love United, but this trip I was disappointed.
Sentiment Score: -0.4240681231021881

Response: On-time departure and arrival, effective boarding. Negative: one restroom was out of order.
Sentiment Score: 0.0

