In [None]:
pip install textblob

In [None]:
import pandas as pd
from textblob import TextBlob

In [None]:
df = pd.read_csv('edge_list.csv')
df.head()

In [None]:
# Define a function to calculate sentiment score using TextBlob
def get_sentiment(body):
    # Create a TextBlob object from the input text (body)
    blob = TextBlob(body)
    
    # Calculate the polarity of the sentiment (range from -1 to 1)
    sentiment = blob.sentiment.polarity
    
    # Return the calculated sentiment score
    return sentiment

# Apply the get_sentiment function to the 'body' column of the DataFrame and create a new column 'sentiment_score'
df['sentiment_score'] = df['body'].apply(get_sentiment)


In [None]:
score_stats = df['score'].describe()
sentiment_stats = df['sentiment_score'].describe()

# Print the summary statistics for 'score'
print("Score Statistics:")
print(score_stats)

# Print the summary statistics for 'sentiment'
print("\nSentiment Statistics:")
print(sentiment_stats)

In [None]:
correlation = df['score'].corr(df['sentiment_score'])

# Print the correlation coefficient
print("Correlation coefficient:", correlation)

In [None]:
import matplotlib.pyplot as plt

# Assuming your DataFrame is named 'df' with columns 'score' and 'sentiment'
x = df['score']
y = df['sentiment_score']

# Plot the scatter plot
plt.scatter(x, y)
plt.xlabel('Score')
plt.ylabel('Sentiment')
plt.title('Relationship between Score and Sentiment')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Convert 'topics' column values to strings
df['topics'] = df['topics'].astype(str)

# Group the data by 'topics' and calculate average score and sentiment
grouped_data = df.groupby('topics').agg({'score': 'mean', 'sentiment_score': 'mean'}).reset_index()

# Visualize average scores for different topics using a bar plot
plt.figure(figsize=(12, 6))
plt.bar(grouped_data['topics'], grouped_data['score'])
plt.xlabel('Topic')
plt.ylabel('Average Score')
plt.title('Average Score by Topic')
plt.xticks(rotation=90)
plt.show()

# Visualize average sentiments for different topics using a bar plot
plt.figure(figsize=(12, 6))
plt.bar(grouped_data['topics'], grouped_data['sentiment_score'])
plt.xlabel('Topic')
plt.ylabel('Average Sentiment')
plt.title('Average Sentiment by Topic')
plt.xticks(rotation=90)
plt.show()


In [None]:
unique_topics = df['topics'].unique()
unique_topics

In [None]:
import numpy as np
from scipy.stats import ttest_ind, f_oneway

# Select the data for each topic
topic1_scores = df[np.isin(df['topics'], ['topic1','topic2','topic3'])]['score']
topic2_scores = df[np.isin(df['topics'], ['topic1','topic2','topic3'])]['score']

In [None]:
# T-Test
t_statistic, p_value = ttest_ind(topic1_scores, topic2_scores)

# ANOVA
f_statistic, p_value = f_oneway(topic1_scores, topic2_scores)


In [None]:
if p_value < 0.05:
    print("There is a significant difference between the topics.")
else:
    print("There is no significant difference between the topics.")

In [None]:
import numpy as np
from scipy.stats import ttest_ind

# List of unique topics
topics = df['topics'].unique()

# Empty list to store significantly different topic pairs
significant_topics = []

# Iterate through each topic pair
for i in range(len(topics)):
    for j in range(i+1, len(topics)):
        # Select the scores for the two topics
        topic1_scores = df[df['topics'] == topics[i]]['score']
        topic2_scores = df[df['topics'] == topics[j]]['score']
        
        # Perform t-test
        t_statistic, p_value = ttest_ind(topic1_scores, topic2_scores)
        
        # Check if p-value is less than significance level (e.g., 0.05)
        if p_value < 0.05:
            significant_topics.append((topics[i], topics[j]))
            
# Print the significantly different topic pairs
for pair in significant_topics:
    print(f"There is a significant difference between topics: {pair[0]} and {pair[1]}")
