In [1]:
import pandas as pd
import ast
import re

# Assuming your data is in a CSV file named 'your_file.csv'
df = pd.read_csv('../../../data/textblob/dp_textblob_3_star.csv')

# Function to get the polarity based on the threshold
def get_polarity(topic_number, threshold=0):
    if topic_number == threshold:
        return "Neutral"
    elif topic_number > threshold:
        return "Positive"
    else:
        return "Negative"

# Create a new DataFrame to store aspect, topic number, and sentiment
new_df = pd.DataFrame(columns=['topic_number', 'aspect', 'sentiment'])

# Iterate through each row and update the new_df
for index, row in df.iterrows():
    aspect_sentiment_str = row['aspect_sentiment']
    
    # Add quotes around words in the string representation
    aspect_sentiment_str_fixed = re.sub(r'(\b\w+\b)', r'"\1"', aspect_sentiment_str)
    
    try:
        aspect_sentiment_list = ast.literal_eval(aspect_sentiment_str_fixed)
        for aspect_entry in aspect_sentiment_list:
            aspect = aspect_entry[0]
            topic_number = aspect_entry[2]
            polarity = df.at[index, f'topic_{topic_number}']
            sentiment = get_polarity(polarity)
            
            # Append to new_df
            new_df.loc[len(new_df)] = {'topic_number': topic_number, 'aspect': aspect, 'sentiment': sentiment}
            
            # print(f"Aspect: {aspect}, Topic Number: {topic_number}, Sentiment: {sentiment}")
    except Exception as e:
        print(f"Error processing row {index}: {e}")

# Group by 'topic_number' and 'sentiment', then count occurrences
textblob_sentiment_counts = new_df.groupby(['topic_number', 'sentiment']).size().unstack(fill_value=0)

# Display the counts
print("Sentiment Counts:")
# Summing up the values for each sentiment across all topics
total_sentiments = textblob_sentiment_counts.sum(axis=0)

# Display the result
print(total_sentiments)
print(textblob_sentiment_counts.sum())


Sentiment Counts:
sentiment
Negative     6161
Neutral      4780
Positive    35894
dtype: int64
sentiment
Negative     6161
Neutral      4780
Positive    35894
dtype: int64


In [3]:
import pandas as pd
import ast

# Assuming your data is in a CSV file named 'your_other_file.csv'
other_df = pd.read_csv('../../aspect_modelling/lda/3star_pyABSA_updated.csv')

# Create a new DataFrame to store aspect, topic number, and sentiment
other_new_df = pd.DataFrame(columns=['topic_number', 'sentiment'])

# Iterate through each row and update the other_new_df
for index, row in other_df.iterrows():
    topics_str = row['topics']
    
    try:
        topics_list = ast.literal_eval(topics_str)
        for topic_entry in topics_list:
            topic_number = topic_entry[0]
            sentiment = topic_entry[1]
            
            # Append to other_new_df
            other_new_df.loc[len(other_new_df)] = {'topic_number': topic_number, 'sentiment': sentiment}
            
            #print(f"Topic Number: {topic_number}, Sentiment: {sentiment}")
    except Exception as e:
        print(f"Error processing row {index}: {e}")


# Group by 'topic_number' and 'sentiment', then count occurrences
pyabsa_sentiment_counts = other_new_df.groupby(['topic_number', 'sentiment']).size().unstack(fill_value=0)

# Display the counts
print("Sentiment Counts:")
# Summing up the values for each sentiment across all topics
total_sentiments_pyabsa = pyabsa_sentiment_counts.sum(axis=0)

# Display the result
print(total_sentiments_pyabsa)

Sentiment Counts:
sentiment
Negative     8398
Neutral      4855
Positive    19449
dtype: int64


In [4]:
pyabsa_sentiment_counts

# Calculate the total number of sentiments for each topic
pyabsa_sentiment_counts['Total'] = pyabsa_sentiment_counts.sum(axis=1)

# Calculate the percentage of positive, neutral, and negative sentiments for each topic
df_percentage = pyabsa_sentiment_counts[['Negative', 'Neutral', 'Positive']].div(pyabsa_sentiment_counts['Total'], axis=0) * 100

# Calculate the total number of sentiments for each topic
pyabsa_sentiment_counts['Total'] = pyabsa_sentiment_counts.sum(axis=1)

# Calculate the percentage representation of each topic with respect to the 'Total' column
pyabsa_sentiment_counts['Percentage'] = pyabsa_sentiment_counts['Total'] / pyabsa_sentiment_counts['Total'].sum() * 100

# Print the result
print(pyabsa_sentiment_counts["Percentage"])

# Print the result
print(df_percentage)

topic_number
0    14.950156
1    15.641245
2    29.915601
3    39.492997
Name: Percentage, dtype: float64
sentiment      Negative    Neutral   Positive
topic_number                                 
0             38.494580  14.726938  46.778482
1             34.134897   8.973607  56.891496
2             14.831851  24.358581  60.809568
3             25.698800  10.011614  64.289586


In [5]:
textblob_sentiment_counts

# Calculate the total number of sentiments for each topic
textblob_sentiment_counts['Total'] = textblob_sentiment_counts.sum(axis=1)

# Calculate the percentage of positive, neutral, and negative sentiments for each topic
df_percentage = textblob_sentiment_counts[['Negative', 'Neutral', 'Positive']].div(textblob_sentiment_counts['Total'], axis=0) * 100

# Calculate the total number of sentiments for each topic
textblob_sentiment_counts['Total'] = textblob_sentiment_counts.sum(axis=1)

# Calculate the percentage representation of each topic with respect to the 'Total' column
textblob_sentiment_counts['Percentage'] = textblob_sentiment_counts['Total'] / textblob_sentiment_counts['Total'].sum() * 100

# Print the result
print(textblob_sentiment_counts["Percentage"])

# Print the result
print(df_percentage)


topic_number
0    28.378350
1    15.430768
2    29.578307
3    26.612576
Name: Percentage, dtype: float64
sentiment      Negative    Neutral   Positive
topic_number                                 
0             17.733805  10.774208  71.491987
1             17.711360  12.370278  69.918362
2              9.333718   8.351982  82.314300
3              9.876444  10.405969  79.717587
