# **Risk or Opportunity: Public Discourse on the Dangers of AI**
Sonia Nicoletti (A12922110)

Social Media and Social Network Analysis Project

## **Importing the dataset**

In [None]:
import pandas as pd
!pip install bertopic
from bertopic.representation import ZeroShotClassification
from bertopic import BERTopic
from transformers import pipeline
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, cohen_kappa_score
import chardet
from google.colab import files

In [None]:
# Importing the dataset
file_name = 'risks_comments.csv'
df = pd.read_csv(file_name)
len(df)

## **Sentiment analysis of AI videos**

### Method 1: cardiffnlp/twitter-roberta-base-sentiment-latest

In [None]:
# 0 -> Negative; 1 -> Neutral; 2 -> Positive
sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

def analyze_sentiment(text):
    try:
        result = sentiment_pipe(text)[0]
        return result['label'], result['score']
    except Exception as e:
        return None, None

In [None]:
# Test with 100 entries
%%time
df_s = df.sample(n=100)
df_s['sentiment_label'], df_s['sentiment_score'] = zip(*df_s['textOriginal'].apply(analyze_sentiment))
df_s.head()

In [None]:
# Predicting all the entries
%%time
df['sentiment_label'], df['sentiment_score'] = zip(*df['textOriginal'].apply(analyze_sentiment))
df.head()

In [None]:
# Relabelling the columns
df['manual_classification'] = df['goldstandard'].replace({0: 'negative', 1: 'neutral', 2: 'positive'})

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=['negative', 'neutral', 'positive'], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

### Method 2: distilbert-base-multilingual-cased-sentiments-student

In [None]:
# labels: 'positive', 'neutral', 'negative'
sentiment_pipe = pipeline(model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", return_all_scores=True)

def analyze_sentiment(text):
    try:
        result = sentiment_pipe(text)[0]
        return result[0]['label'], result[0]['score'], result[1]['label'], result[1]['score'], result[2]['label'], result[2]['score']
    except Exception as e:
        return None, None, None, None, None, None

In [None]:
# Test with 100 entries
df_s = df.sample(n=100)
df_s['positive'], df_s['positive_score'], df_s['neutral'], df_s['neutral_score'], df_s['negative'], df_s['negative_score'] = zip(*df_s['textOriginal'].apply(analyze_sentiment))
df_s.head()

In [None]:
# Run the model on all the data
df['positive'], df['positive_score'], df['neutral'], df['neutral_score'], df['negative'], df['negative_score'] = zip(*df['textOriginal'].apply(analyze_sentiment))
df.head()

In [None]:
# Add a column for the sentiment with the highest score
def determine_label(row):
    max_score = ''
    if row['positive_score'] is not None and row['neutral_score'] is not None and row['negative_score'] is not None :
      max_score = max(row['positive_score'], row['neutral_score'], row['negative_score'])
    if max_score == row['positive_score']:
        return 'positive'
    elif max_score == row['neutral_score']:
        return 'neutral'
    elif max_score == row['negative_score']:
        return 'negative'
    else:
        return 'NA'

df['sentiment_label'] = df.apply(determine_label, axis=1)
df.head()

In [None]:
# Relabelling the columns
df['manual_classification'] = df['goldstandard'].replace({0: 'negative', 1: 'neutral', 2: 'positive'})

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=['negative', 'neutral', 'positive'], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

### Method 3: bertweet-base-sentiment-analysis

In [None]:
# labels: 'POS', 'NEG', 'NEU'
sentiment_pipe = pipeline("text-classification", model='finiteautomata/bertweet-base-sentiment-analysis')

def analyze_sentiment(text):
    try:
        result = sentiment_pipe(text)[0]
        return result['label'], result['score']
    except Exception as e:
        return None, None

In [None]:
# Test with 15000 entries
df_s = df.sample(n=15000)
df_s['Bertweet_sentiment'], df_s['Bertweet_score'] = zip(*df_s['textOriginal'].apply(analyze_sentiment))
df_s.head()

In [None]:
# Run the model on all the data
df['Bertweet_sentiment'], df['Bertweet_score'] = zip(*df['textOriginal'].apply(analyze_sentiment))
df.head()

In [None]:
# Relabelling the columns (with two lables: negative and neutral)
df['manual_classification'] = df['goldstandard'].replace({0: 'negative', 1: 'neutral', 2: 'neutral'})
df['sentiment_label'] = df['Bertweet_sentiment'].replace({'NEG': 'negative', 'NEU': 'neutral', 'POS': 'neutral'})

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=['negative', 'neutral'], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral'], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral'], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

In [None]:
# Relabelling the columns
df['manual_classification'] = df['goldstandard'].replace({0: 'negative', 1: 'neutral', 2: 'positive'})
df['sentiment_label'] = df['Bertweet_sentiment'].replace({'NEG': 'negative', 'NEU': 'neutral', 'POS': 'positive'})

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=['negative', 'neutral', 'positive'], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

### Method 4: bart-large-mnli

In [None]:
pipe = pipeline(model="facebook/bart-large-mnli")

labels = [ "dangerous", "neutral" ]

def analyse_sentiment(text):
    output = pipe(text,
                  candidate_labels = labels,
                  hypothesis_template = 'This comment says AI is {}'
                 )
    labs = output["labels"]
    return labs[0]

In [None]:
# Test with 100 entries
%%time
df_s = df.sample(n=100)
df_s["y_pred"] = df_s.textOriginal.apply(analyse_sentiment)

In [None]:
# Run the model on all the data
%%time
df["y_pred"] = df.textOriginal.apply(analyse_sentiment)
df.head()

In [None]:
# Relabelling the columns
df['manual_classification'] = df['goldstandard'].replace({0: 'negative', 1: 'neutral', 2: 'positive'})
df['sentiment_label'] = df['y_pred'].replace({'dangerous': 'negative', 'neutral': 'neutral', 'beneficial': 'positive'})

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=['negative', 'neutral', 'positive'], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral', 'positive'], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

In [None]:
# Relabelling the columns with only two values (negative, neutral/positive)
df['manual_classification'] = df['goldstandard'].replace({0: 'negative', 1: 'neutral', 2: 'neutral'})
df['sentiment_label'] = df['y_pred'].replace({'dangerous': 'negative', 'neutral': 'neutral'})

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=['negative', 'neutral'], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral'], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=['negative', 'neutral'], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

## **Topic Model of Risks videos (LDA)**

In [None]:
import pandas as pd
file_name = 'drive/MyDrive/AI Risks Datasets/risks_comments.csv'
df = pd.read_csv(file_name)
df.head()

In [None]:
# Remove the not useful columns
df = df.drop(columns=['channelId', 'videoId', 'textDisplay', 'authorDisplayName', 'authorProfileImageUrl', 'authorChannelUrl', 'authorChannelId.value', 'canRate', 'viewerRating', 'likeCount', 'publishedAt', 'updatedAt', 'id', 'parentId', 'moderationStatus', 'created_at'], axis=1)
df.head()

In [None]:
# Remove the entries that are null (not string)
df = df.loc[df['textOriginal'].apply(lambda x: isinstance(x, str))]
len(df)

In [None]:
# Filter comments that explicitly mention AI
keywords = ['AI', 'A.I.', 'A.I', 'artificial intelligence', 'AIs']
pattern = '|'.join(keywords)
df = df[df['textOriginal'].str.contains(pattern, case=False)]
len(df)

In [None]:
# Remove punctuation/lower casing
import re
# Remove punctuation
df['paper_text_processed'] = df['textOriginal'].map(lambda x: re.sub('[,\.!?]', '', x))
# Convert the titles to lowercase
df['paper_text_processed'] = df['paper_text_processed'].map(lambda x: x.lower())
# Print out the first rows of df
df['paper_text_processed'].head()

In [None]:
# Word Cloud
# Import the wordcloud library
from wordcloud import WordCloud
# Join the different processed titles together.
long_string = ','.join(list(df['paper_text_processed'].values))
# Create a WordCloud object
wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=3, contour_color='steelblue')
# Generate a word cloud
wordcloud.generate(long_string)
# Visualize the word cloud
wordcloud.to_image()

In [None]:
# Prepare data for LDA Analysis
import gensim
from gensim.utils import simple_preprocess
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
stop_words.extend(['from', 'to', 'subject', 're', 'one', 'like', 'video', 'comment', 'much', \
                   'please', 'lol', 'really', 'first', 'would', 'could', 'should', 'going', 'get', \
                   'take', 'are', 'is', 'what', 'know', 'even', 'something', 'way', 'really', 'say', \
                   'thing', 'anything', 'talk', 'actually', 'still', 'also', 'yet', 'let', 'make', \
                   'set', 'more', 'other', 'yes', 'no', 'im', 'thanks', 'thank', 'oh', 'ah', 'gonna', \
                   'yeah', 'ok', 'thought', 'tho', 'though', 'okay', 'look', 'much', 'looks', 'looking', \
                   'imma', 'hey', 'hi', 'likes', 'views', 'that', 'cant', 'doesnt', 'does', 'keep', 'tell', \
                   'dont', 'take', 'etc', 'say', 'says', 'said', 'told', 'well', 'just', 'come', 'came', \
                   'do', 'not', 'isnt', 'can', 'use', 'need', 'many', 'lot', 'made', 'want', 'think', 'will'])
def sent_to_words(sentences):
    for sentence in sentences:
        # deacc=True removes punctuations
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc))
             if word not in stop_words] for doc in texts]
data = df.paper_text_processed.values.tolist()
data_words = list(sent_to_words(data))
# remove stop words
data_words = remove_stopwords(data_words)
print(data_words[:1][0][:30])

In [None]:
import gensim.corpora as corpora
# Create Dictionary
id2word = corpora.Dictionary(data_words)
# Create Corpus
texts = data_words
# Term Document Frequency
corpus = [id2word.doc2bow(text) for text in texts]
# View
print(corpus[:1][0][:30])

In [None]:
# LDA model training
from pprint import pprint
# number of topics
num_topics = 10
# Build LDA model
lda_model = gensim.models.LdaMulticore(corpus=corpus,
                                       id2word=id2word,
                                       num_topics=num_topics)
# Print the Keyword in the 10 topics
pprint(lda_model.print_topics())
doc_lda = lda_model[corpus]

In [None]:
# Analyzing LDA model results
!pip install pyLDAvis
!pip install pandas==1.5.3
!mkdir results
import pyLDAvis.gensim
import pickle
import pyLDAvis
import os

# Visualize the topics
pyLDAvis.enable_notebook()
LDAvis_data_filepath = os.path.join('./results/ldavis_prepared_'+str(num_topics))
# # this is a bit time consuming - make the if statement True
# # if you want to execute visualization prep yourself
if 1 == 1:
    LDAvis_prepared = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
    with open(LDAvis_data_filepath, 'wb') as f:
        pickle.dump(LDAvis_prepared, f)

pyLDAvis.save_html(LDAvis_prepared, './results/ldavis_prepared_'+ str(num_topics) +'.html')
LDAvis_prepared

## **Classification of Risks videos comments**

### Method 1: bart-large-mnli

In [None]:
pipe = pipeline(model="facebook/bart-large-mnli")

labels = [ "job loss", "privacy and security", "bias", "regulatory and legal challenges", \
          "manipulation and misinformation", "future catastrophic risks", "accidents", "plagiarism in art", "other"]

def analyse_sentiment(text):
    output = pipe(text,
                  candidate_labels = labels,
                  hypothesis_template = 'This comment says AI is dangerous because of {}' # here you add the hypothesis - {} indicates where labels are added
                 )
    labs = output["labels"]
    return labs[0]

In [None]:
# Test with 100 entries
%%time
df_s = df.sample(n=100)
df_s["y_pred"] = df_s.textOriginal.apply(analyse_sentiment)

In [None]:
# Run the model on all the data
%%time
df["y_pred"] = df.textOriginal.apply(analyse_sentiment)
df.head()

In [None]:
# Relabelling the columns
df['manual_classification'] = df['goldstandard'].replace({0: "other", 1: "job loss", 2: "privacy and security", 3: "bias", 4: "regulatory and legal challenges", \
          5: "manipulation and misinformation", 6: "future catastrophic risks", 7: "accidents", 8: "plagiarism in art"})
df['sentiment_label'] = df['y_pred']

# Printing the scores
print("F1 Score is: ",  f1_score(df.manual_classification.astype(str), df.sentiment_label.astype(str), labels=["job loss", "privacy and security", "bias", "regulatory and legal challenges", "manipulation and misinformation", "future catastrophic risks", "accidents", "plagiarism in art", "other"], average='weighted'))
print("Precision is: ", precision_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=["job loss", "privacy and security", "bias", "regulatory and legal challenges", "manipulation and misinformation", "future catastrophic risks", "accidents", "plagiarism in art", "other"], average='weighted') )
print("Recall is: ",    recall_score(df.manual_classification.astype(str), df.sentiment_label.astype(str),  labels=["job loss", "privacy and security", "bias", "regulatory and legal challenges", "manipulation and misinformation", "future catastrophic risks", "accidents", "plagiarism in art", "other"], average='weighted')  )
print("Kappa is: ",    cohen_kappa_score(df.manual_classification.astype(str), df.sentiment_label.astype(str)))

### Method 2: Dictionary method

In [None]:
# Remove the entries that are null (not string)
df = df.loc[df['textOriginal'].apply(lambda x: isinstance(x, str))]
# Filter comments that explicitly mention AI
keywords = ['AI', 'A.I.', 'A.I', 'artificial intelligence', 'AIs']
pattern = '|'.join(keywords)
df = df[df['textOriginal'].str.contains(pattern, case=False)]
len(df)

In [None]:
# Keywords for each category
keywords_1 = ['Employment', 'Occupation', 'Career', 'Job', 'Economic impact', 'Labor', 'Labour', 'Unemployment', 'Workforce', 'Displacement']
keywords_2 = ['Security', 'Privacy', 'Data protection', 'Confidentiality', 'Cybersecurity', 'Vulnerability', 'Vulnerabilities', 'Encryption', 'GDPR', 'Data sourcing', 'Hacking', 'Hacker', 'Open source']
keywords_3 = ['Bias', 'Discrimination', 'Race', 'Discriminatory', 'Fairness', 'Social justice', 'Equity']
keywords_4 = ['Regulatory', 'Legal', 'Corporation', 'Government', 'Gov', 'Law', 'Tax', 'Regulation', 'Liability', 'Accountability', 'Governance', 'Regulate']
keywords_5 = ['Manipulation', 'Misinformation', 'Deepfake', 'Faux news', 'Fake news', 'Disinformation']
keywords_6 = ['Future', 'Existential', 'Uncontrollable', 'Loss of control', 'Uncontrolled', 'Catastrophic', 'Catastrophe', 'Takeover', 'Out of control', 'Extinction', 'Control us', 'Apocalypse', 'Extinction', 'Apocalyptic', 'Screwed', 'Doomed', 'Depopulation', 'Destroy', 'Destruction', 'Annihilated', 'Disaster', 'Kill']
keywords_7 = ['Accident', 'Autonomous vehicle', 'Car', 'Self-driving', 'Self driving', 'Crash', 'Incident', 'Road', 'Collision', 'Vehicle', 'Drive', 'Driving']
keywords_8 = ['Plagiarism', 'Art', 'Artist', 'Artwork', 'Imitation', 'Copyright', 'Intellectual property', 'Ownership']

keyword_list = [keywords_1, keywords_2, keywords_3, keywords_4, keywords_5, keywords_6, keywords_7, keywords_8]
num_comments = [0, 0, 0, 0, 0, 0, 0, 0] # number of comments mentioning at least one of the keywords for each category

# Convert text in 'textOriginal' column and keywords to lowercase
df['textOriginal'] = df['textOriginal'].str.lower()
for i, list in enumerate(keyword_list):
  keyword_list[i] = [keyword.lower() for keyword in  keyword_list[i]]

for i, list in enumerate(keyword_list):
  num_comments[i] = df['textOriginal'].str.contains('|'.join(keyword_list[i])).sum()

print(num_comments)

In [None]:
# Visualize the results
import matplotlib.pyplot as plt

bar_titles = ['Job loss', 'Security', 'Bias', 'Regulations', 'Misinformation', 'Existential risks', 'Accidents', 'Plagiarism']
sorted_data, sorted_titles = zip(*sorted(zip(num_comments, bar_titles)))

plt.bar(sorted_titles, sorted_data, color='#4287f5')

plt.xlabel('Categories')
plt.ylabel('Number of videos')
plt.title('Most discussed risks in the titles')
plt.xticks(rotation=90)

plt.show()

# **Downloading the new dataset**

In [None]:
# Downloading the new dataset
file_name = 'risks_comments_labeled.csv'
df_s.to_csv(file_name, index=False)
files.download(file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>