<a href="https://colab.research.google.com/github/AfsanehHabibi/reddit-conversation-quality/blob/main/Reasoning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
!pip install anytree



In [42]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [43]:
base_path = "/content/drive/MyDrive/University/RedditData/"

In [44]:
import pickle

# Reading from a file using Pickle
with open(f"{base_path}conversations_with_reasoning.pkl", 'rb') as file:
    conversations = pickle.load(file)

# Print the loaded variables
print("Len conversations:", len(conversations))

Len conversations: 32990


In [45]:
import spacy

nlp = spacy.load("en_core_web_sm")

def contains_reasoning(text):
    doc = nlp(text)

    # Check for specific part-of-speech patterns indicating reasoning
    reasoning_patterns = ["because", "since", "therefore", "due to", "as a result", "consequently", "thus",
                          "for this reason", "in conclusion", "owing to", "on account of", "resulting in",
                          "so", "hence", "in light of", "accordingly", "on the grounds that"]

    for token in doc:
        if token.text.lower() in reasoning_patterns:
            return True

    return False

In [46]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [47]:
VERBOSE = 3000

In [48]:
import pickle

def write_conversations_with_reasoning():
  with open(f"{base_path}conversations_reasoning.pkl", 'wb') as file:
    pickle.dump(conversations, file)
  print("Variables written to the file.")

In [49]:
def extract_comments(conversations, num_break_points):
  flat_comments_list = []
  break_points_index = []
  counter = 0
  break_length = int(len(conversations)/num_break_points)
  print("break len ", break_length)
  for id in conversations:
    if counter % break_length == 0:
      break_points_index.append(len(flat_comments_list))
    counter += 1
    comments = conversations[id]['comments']
    for comment in comments:
      flat_comments_list.append(comment)

  return flat_comments_list, break_points_index

In [50]:
import pickle

def write_reasoning_dic(reasoning_dic, file_path):
  with open(file_path, 'wb') as file:
      pickle.dump(reasoning_dic, file)
  print("Variables written to the file.")

In [51]:
import pickle

def merge_conversations_with_reasoning(conversations, file_pathes, method):
  reasoning_dic = dict()
  for file_path in file_pathes:
    with open(file_path, 'rb') as file:
      reasoning_part = pickle.load(file)
      reasoning_dic.update(reasoning_part)

  for id in conversations:
    comments = conversations[id]['comments']
    for comment in comments:
      if not(comment['body'] == '[deleted]' or comment['body'] == '[removed]'):
        if method != "flasher":
          comment['has_reasoning'+method] = reasoning_dic[comment['id']]
        else:
          comment['has_reasoning'] = reasoning_dic[comment['id']]
  with open(f"{base_path}conversations_with_reasoning.pkl", 'wb') as file:
    pickle.dump(conversations, file)
  print("Variables written to the file.")

In [57]:
def reasoning_based_on_one_shot_classification(texts):
  scores = []
  labels = ['Reasoning', "WithoutReasoning"]

  for text in texts:
      result = classifier(text, labels, multi_label=False)
      for label, score in zip(result["labels"], result["scores"]):
          if label == "Reasoning":
            scores.append(score)
  return scores

In [60]:
def calculate_reasoning_for_corpus(corpus, method):
  reasoning_dic = dict()
  comments_text = []
  comments_id_dic = dict()
  for comment in corpus:
      if not(comment['body'] == '[deleted]' or comment['body'] == '[removed]'):
          if method == "flasher":
            has_reasoning = contains_reasoning(comment['body'])
            reasoning_dic[comment['id']] = has_reasoning
          elif method == "one-shot":
            comments_text.append(comment['body'])
            comments_id_dic[comment['id']] = len(comments_text) - 1
  if method == "one-shot":
    scores = reasoning_based_on_one_shot_classification(comments_text)
    for comment in corpus:
      if not(comment['body'] == '[deleted]' or comment['body'] == '[removed]'):
        reasoning_dic[comment['id']] = scores[comments_id_dic[comment['id']]]
  return reasoning_dic

In [54]:
import os

def calculate_reasoning(conversations, num_of_parts, rewrite, method):
  flat_comments, break_points_index = extract_comments(conversations, num_of_parts)
  print("len ", len(break_points_index))
  break_points_index.append(len(flat_comments))
  file_pathes = []
  for i in range(1, len(break_points_index)):
    print("range ", break_points_index[i-1], break_points_index[i])
    file_path = f"{base_path}comments_reasoning_{break_points_index[i-1]}_{break_points_index[i]}.pkl"
    if method != "flasher":
      file_path = f"{base_path}comments_{method}_reasoning_{break_points_index[i-1]}_{break_points_index[i]}.pkl"
    file_pathes.append(file_path)
    print("path ", file_path)
    if rewrite or not os.path.exists(file_path):
      print("do")
      corpus = flat_comments[break_points_index[i-1]:break_points_index[i]]
      reasoning_dic = calculate_reasoning_for_corpus(corpus, method)
      write_reasoning_dic(reasoning_dic, file_path)
  merge_conversations_with_reasoning(conversations, file_pathes, method)

In [55]:
calculate_reasoning(conversations, 10, False, "flasher")
write_conversations_with_reasoning()

break len  3299
len  10
range  0 53506
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_0_53506.pkl
range  53506 116967
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_53506_116967.pkl
range  116967 178801
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_116967_178801.pkl
range  178801 234319
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_178801_234319.pkl
range  234319 300115
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_234319_300115.pkl
range  300115 357006
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_300115_357006.pkl
range  357006 424960
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_357006_424960.pkl
range  424960 478226
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_424960_478226.pkl
range  478226 533555
path  /content/drive/MyDrive/University/RedditData/comments_reasoning_478226_533555.pkl
range  53

In [61]:
calculate_reasoning(conversations, 10, False, "one-shot")
write_conversations_with_reasoning()

break len  3299
len  10
range  0 53506
path  /content/drive/MyDrive/University/RedditData/comments_one-shot_reasoning_0_53506.pkl
do


KeyboardInterrupt: 