In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob

from sklearn.feature_extraction.text import TfidfVectorizer

# Load spaCy's English NLP model
nlp = spacy.load('en_core_web_md')

# Get English stop words
stop_words = spacy.lang.en.stop_words.STOP_WORDS

# Function to preprocess text
def preprocess(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if token.text not in stop_words])

# Read the excel file
df = pd.read_csv('./data.csv') 

# Preprocess the 'Bot1' and 'Bot2' columns
df['Bot1_processed'] = df['Bot1'].apply(preprocess)
df['Bot2_processed'] = df['Bot2'].apply(preprocess)

# Calculate similarity
df['similarity'] = df.apply(lambda row: nlp(row['Bot1_processed']).similarity(nlp(row['Bot2_processed'])), axis=1)

mean = df['similarity'].mean()
median = df['similarity'].median()
max_value = df['similarity'].max()
min_value = df['similarity'].min()
std_dev = df['similarity'].std()

print(f"Mean: {mean}")
print(f"Median: {median}")
print(f"Max: {max_value}")
print(f"Min: {min_value}")
print(f"Standard Deviation: {std_dev}")

In [None]:
# Sentiment
def get_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity


df['Bot1s'] = df['Bot1'].apply(get_sentiment)
df['Bot2s'] = df['Bot2'].apply(get_sentiment)

In [None]:
from scipy.stats import fisher_exact
import numpy as np

# Bot1 table
contingency_table = [[11,26],[3, 34]]

odds_ratio, p_value = fisher_exact(contingency_table)

print(f'Odds Ratio: {odds_ratio}')
print(f'P-value: {p_value}')

# Extracting values from the contingency table
a, b = contingency_table[0]
c, d = contingency_table[1]

# Calculating Standard Error (SE) for Odds Ratio
SE = np.sqrt(1/a + 1/b + 1/c + 1/d)

# Calculating 95% Confidence Interval for Odds Ratio
lower_95CI = odds_ratio * np.exp(-1.96 * SE)
upper_95CI = odds_ratio * np.exp(1.96 * SE)


print(f'Odds Ratio: {odds_ratio}')
print(f'P-value: {p_value}')
print(f'95% CI: ({lower_95CI}, {upper_95CI})')

In [None]:
from scipy.stats import fisher_exact
# Bot2 table
contingency_table = [ [17, 20],[2, 35]]

odds_ratio, p_value = fisher_exact(contingency_table)

print(f'Odds Ratio: {odds_ratio}')
print(f'P-value: {p_value}')

# Extracting values from the contingency table
a, b = contingency_table[0]
c, d = contingency_table[1]

# Calculating Standard Error (SE) for Odds Ratio
SE = np.sqrt(1/a + 1/b + 1/c + 1/d)

# Calculating 95% Confidence Interval for Odds Ratio
lower_95CI = odds_ratio * np.exp(-1.96 * SE)
upper_95CI = odds_ratio * np.exp(1.96 * SE)

print(f'Odds Ratio: {odds_ratio}')
print(f'P-value: {p_value}')
print(f'95% CI: ({lower_95CI}, {upper_95CI})')