In [None]:
pip install textblob

In [None]:
import pandas as pd
from textblob import TextBlob

In [None]:
# Read the CSV file into a DataFrame
df1 = pd.read_csv('Science_edge.csv')

df2 = pd.read_csv('Politics_edge.csv')


In [None]:
# Add an edge list identifier column
df1['edge_list'] = 'science'
df2['edge_list'] = 'politics'

In [None]:
# Merge dataframes
merged_df = pd.concat([df1,df2])

In [None]:

# Filter edge list 1
df_edge_list1 = merged_df[merged_df['edge_list'] == 'science']

# Filter edge list 2
df_edge_list2 = merged_df[merged_df['edge_list'] == 'politics']

# Group by 'topics' for edge list 1
grouped_topics1 = df_edge_list1.groupby('topic_title')

# Group by 'topics' for edge list 2
grouped_topics2 = df_edge_list2.groupby('topic_title')

# Calculate the average sentiment scores for each topic in edge list 1
average_sentiment1 = grouped_topics1['sentiment_score'].mean()

# Calculate the average sentiment scores for each topic in edge list 2
average_sentiment2 = grouped_topics2['sentiment_score'].mean()

# Compare the average sentiment scores of each topic
comparison = pd.concat([average_sentiment1, average_sentiment2], axis=1)
comparison.columns = ['Average Sentiment Score (Edge List 1)', 'Average Sentiment Score (Edge List 2)']

# Print the comparison
print(comparison)

In [None]:
# Print all topics in the order of variance in sentiment
print(comparison.index)


In [None]:
import matplotlib.pyplot as plt

In [None]:
# Plot the comparison DataFrame as a bar plot with specified figure size
comparison.plot(kind='bar', figsize=(10, 6))

# Set the title of the plot
plt.title('Comparison of Average Sentiment Scores')

# Set the label for the x-axis
plt.xlabel('topic_title')

# Set the label for the y-axis
plt.ylabel('Average Sentiment Score')

# Rotate the x-axis labels for better readability
plt.xticks(rotation=45)

# Add a legend to the plot
plt.legend()

# Adjust the layout for better spacing
plt.tight_layout()

# Save the plot as an image file with high DPI (dots per inch)
plt.savefig('figure.png', dpi=300)

# Display the plot
plt.show()


In [None]:
# Sort by sentiment difference in descending order
comparison = comparison.sort_values('Sentiment Difference', ascending=False)

# Select the top N topics with the largest sentiment difference
N = 20  # Change the number 10 to your desired value
top_topics = comparison.head(N)

# Plot the comparison for the top topics
top_topics.plot(kind='bar', y=['Average Sentiment Score (Science)', 'Average Sentiment Score (Politics)'], figsize=(10, 6))
plt.title('Comparison of Average Sentiment Scores')
plt.xlabel('Topics')
plt.ylabel('Average Sentiment Score')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Merge the two grouped DataFrames
comparison = pd.merge(grouped_topics1, grouped_topics2, on='repeated_word', how='outer')
comparison.columns = ['Average Sentiment Score (Science)', 'Average Sentiment Score (Politics)']

# Calculate the absolute difference in sentiment scores
comparison['Sentiment Difference'] = abs(comparison['Average Sentiment Score (Science)'] - comparison['Average Sentiment Score (Politics)'])

# Sort by sentiment difference in descending order
comparison = comparison.sort_values('Sentiment Difference', ascending=False)

# Select the top N repeated words with the largest

N = 20  
top_repeated_words = comparison.head(N)

# Plot the comparison
top_repeated_words.plot(kind='bar', y=['Average Sentiment Score (Science)', 'Average Sentiment Score (Politics)'], figsize=(10, 6))
plt.title('Comparison of Average Sentiment Scores (Science vs Politics)')
plt.xlabel('Repeated Words')
plt.ylabel('Average Sentiment Score')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.savefig('figure.png',dpi=300)
plt.show()