In [1]:
import pandas as pd
import ast
import re

# Assuming your data is in a CSV file named 'your_file.csv'
df = pd.read_csv('../../../data/textblob/dp_textblob_4_star.csv')

# Function to get the polarity based on the threshold
def get_polarity(topic_number, threshold=0):
    if topic_number == threshold:
        return "Neutral"
    elif topic_number > threshold:
        return "Positive"
    else:
        return "Negative"

# Create a new DataFrame to store aspect, topic number, and sentiment
new_df = pd.DataFrame(columns=['topic_number', 'aspect', 'sentiment'])

# Iterate through each row and update the new_df
for index, row in df.iterrows():
    aspect_sentiment_str = row['aspect_sentiment']
    
    # Add quotes around words in the string representation
    aspect_sentiment_str_fixed = re.sub(r'(\b\w+\b)', r'"\1"', aspect_sentiment_str)
    
    try:
        aspect_sentiment_list = ast.literal_eval(aspect_sentiment_str_fixed)
        for aspect_entry in aspect_sentiment_list:
            aspect = aspect_entry[0]
            topic_number = aspect_entry[2]
            polarity = df.at[index, f'topic_{topic_number}']
            sentiment = get_polarity(polarity)
            
            # Append to new_df
            new_df.loc[len(new_df)] = {'topic_number': topic_number, 'aspect': aspect, 'sentiment': sentiment}
            
            # print(f"Aspect: {aspect}, Topic Number: {topic_number}, Sentiment: {sentiment}")
    except Exception as e:
        print(f"Error processing row {index}: {e}")

# Group by 'topic_number' and 'sentiment', then count occurrences
textblob_sentiment_counts = new_df.groupby(['topic_number', 'sentiment']).size().unstack(fill_value=0)

# Display the counts
print("Sentiment Counts:")
textblob_sentiment_counts


Sentiment Counts:


sentiment,Negative,Neutral,Positive
topic_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1527,1611,11464
1,507,767,6481
2,982,2041,11905
3,1825,1904,14189
4,361,1420,4529


In [2]:
import pandas as pd
import ast

# Assuming your data is in a CSV file named 'your_other_file.csv'
other_df = pd.read_csv('../../aspect_modelling/lda/4star_pyABSA_updated.csv')

# Create a new DataFrame to store aspect, topic number, and sentiment
other_new_df = pd.DataFrame(columns=['topic_number', 'sentiment'])

# Iterate through each row and update the other_new_df
for index, row in other_df.iterrows():
    topics_str = row['topics']
    
    try:
        topics_list = ast.literal_eval(topics_str)
        for topic_entry in topics_list:
            topic_number = topic_entry[0]
            sentiment = topic_entry[1]
            
            # Append to other_new_df
            other_new_df.loc[len(other_new_df)] = {'topic_number': topic_number, 'sentiment': sentiment}
            
            #print(f"Topic Number: {topic_number}, Sentiment: {sentiment}")
    except Exception as e:
        print(f"Error processing row {index}: {e}")


# Group by 'topic_number' and 'sentiment', then count occurrences
pyabsa_sentiment_counts = other_new_df.groupby(['topic_number', 'sentiment']).size().unstack(fill_value=0)

# Display the counts
print("Sentiment Counts:")
pyabsa_sentiment_counts

Sentiment Counts:


sentiment,Negative,Neutral,Positive
topic_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1128,2671,5723
1,2585,1254,5661
2,1602,1044,5019
3,150,62,436
4,2318,1915,10204


In [3]:
textblob_sentiment_counts

# Calculate the total number of sentiments for each topic
textblob_sentiment_counts['Total'] = textblob_sentiment_counts.sum(axis=1)

# Calculate the percentage of positive, neutral, and negative sentiments for each topic
df_percentage = textblob_sentiment_counts[['Negative', 'Neutral', 'Positive']].div(textblob_sentiment_counts['Total'], axis=0) * 100

# Calculate the total number of sentiments for each topic
textblob_sentiment_counts['Total'] = textblob_sentiment_counts.sum(axis=1)

# Calculate the percentage representation of each topic with respect to the 'Total' column
textblob_sentiment_counts['Percentage'] = textblob_sentiment_counts['Total'] / textblob_sentiment_counts['Total'].sum() * 100

# Print the result
print(textblob_sentiment_counts["Percentage"])

# Print the result
print(df_percentage)


topic_number
0    23.738072
1    12.607091
2    24.268041
3    29.128802
4    10.257994
Name: Percentage, dtype: float64
sentiment      Negative    Neutral   Positive
topic_number                                 
0             10.457472  11.032735  78.509793
1              6.537718   9.890393  83.571889
2              6.578242  13.672294  79.749464
3             10.185289  10.626186  79.188526
4              5.721078  22.503962  71.774960


In [4]:
pyabsa_sentiment_counts

# Calculate the total number of sentiments for each topic
pyabsa_sentiment_counts['Total'] = pyabsa_sentiment_counts.sum(axis=1)

# Calculate the percentage of positive, neutral, and negative sentiments for each topic
df_percentage = pyabsa_sentiment_counts[['Negative', 'Neutral', 'Positive']].div(pyabsa_sentiment_counts['Total'], axis=0) * 100

# Calculate the total number of sentiments for each topic
pyabsa_sentiment_counts['Total'] = pyabsa_sentiment_counts.sum(axis=1)

# Calculate the percentage representation of each topic with respect to the 'Total' column
pyabsa_sentiment_counts['Percentage'] = pyabsa_sentiment_counts['Total'] / pyabsa_sentiment_counts['Total'].sum() * 100

# Print the result
print(pyabsa_sentiment_counts["Percentage"])

# Print the result
print(df_percentage)

topic_number
0    22.795174
1    22.742507
2    18.349612
3     1.551278
4    34.561429
Name: Percentage, dtype: float64
sentiment      Negative    Neutral   Positive
topic_number                                 
0             11.846251  28.050830  60.102920
1             27.210526  13.200000  59.589474
2             20.900196  13.620352  65.479452
3             23.148148   9.567901  67.283951
4             16.055967  13.264529  70.679504
