In [2]:
import ollama
from ollama import chat
from ollama import ChatResponse

system_prompt = """You are an AI for causal reasoning designed to detect causality between events. Your task is to classify the event one causes the event two, returnin a structured JSON format with the class "causal" or "non_causal". The class needs to be the strings "causal" or "non_causal".

Your output should be formatted as a JSON object. Below is an example of the expected output structure:

```json
{
  "class": "causal"
}
```"""

user_prompt = """
"Please read the following event text pairs and detect causality between the events. Your response should be the class representing if the event one causes the event two, in the form of a JSON, in which, the class can be "causal" or "non_causal", with the following structure:

```json
{
  "class": "non_causal"
}
```

Important: Your output must be in JSON format only. No additional text, explanations, or comments are allowed. Do not include any other information outside of the JSON structure.

Now, apply the same logic to the following event pairs.

### Event pairs to analyze:
Event one: {cause}
Event two: {effect}
"""

def isCauseEffect(model, system_prompt, user_prompt):
  response: ChatResponse = chat(model=model, messages=[
    {
      'role': 'system',
      'content': system_prompt,
    },
    {
      'role': 'user',
      'content': user_prompt,
    },],options= {
                  'temperature': 0,
                  'num_ctx': 10240,
                  'seed': 81
                  }
                                )
  return response['message']['content'].strip()

In [4]:
import json
r = isCauseEffect('phi3:14b',system_prompt,user_prompt.replace('{cause}','New York Democrats cancel 2020 primary, kicking Bernie Sanders off the ballot').replace('{effect}','Bernie Sanders campaign says New York should lose its delegates after cancellation of presidential primary'))
r = r.replace('```json', '')
r = r.replace('```', '')
json_obj = json.loads(r)
json_obj

{'class': 'causal'}

In [None]:
import pandas as pd

datasets = ['FinCausal', 'Risk', 'Headlines', 'Twitter'] 

for model in ['llama3:8b', 'llama3:70b', 'phi3:14b', 'gemma2:27b', 'qwen2:7b']:
    print(model)
    for dataset in datasets:
        print(dataset)
        df = pd.read_csv('./datasets/' + dataset + '.csv') 
        for index, row in df.iterrows():
            if (dataset == 'Headlines' and (index == 450 or index == 1737 or index == 1870 or index == 2198 or index == 2322)) or (dataset == 'Twitter' and (index == 34 or index == 173 or index == 322 or index == 425 or index == 657 or index == 758 or index == 773)): continue
            while True:
                r = isCauseEffect(model,system_prompt,user_prompt.replace('{cause}',str(row[' Cause'])).replace('{effect}',str(row[' Effect'])))
                r = r.replace('```json', '')
                r = r.replace('```', '')
                try:
                    json_obj = json.loads(r)
                    if json_obj['class'] == 'causal' or json_obj['class'] == 'non_causal':
                        df.at[index,'llm_class'] = json_obj['class']
                        break
                    else:
                        print(row[' Cause'])
                        print(row[' Effect'])
                        print(index,r)
                except:
                    print(r)
        df.to_csv('results/' + dataset + '_' + model + '.csv')

In [11]:
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import StratifiedKFold
import pandas as pd
import networkx as nx

def generate_homogeneous_graph(df, model, llm_class = False, cause_col = ' Cause', effect_col = ' Effect'):
   
   graph = nx.DiGraph()
   df[cause_col] = df[cause_col].astype(str)
   df[effect_col] = df[effect_col].astype(str)

   df['Embedding_' + cause_col] = list(model.encode(df[cause_col]))
   df['Embedding_' + effect_col] = list(model.encode(df[effect_col]))

   for _, row in df.iterrows():
      cause = row[cause_col]
      effect = row[effect_col]
      
      graph.add_edge('event:' + cause, 'relation: ' + cause + '_' + effect)
      graph.add_edge('relation: ' + cause + '_' + effect, 'event:' + effect)
      
      graph.nodes['event:' + cause]['label'] = 'aux'
      graph.nodes['event:' + effect]['label'] = 'aux'
      graph.nodes['relation: ' + cause + '_' + effect]['label'] = row['Label']
      if llm_class:
        graph.nodes['event:' + cause]['llm_label'] = 'aux'
        graph.nodes['event:' + effect]['llm_label'] = 'aux'
        graph.nodes['relation: ' + cause + '_' + effect]['llm_label'] = row['llm_class']

      graph.nodes['event:' + cause]['embedding'] = np.asarray(row['Embedding_' + cause_col], dtype=np.float64)
      graph.nodes['event:' + effect]['embedding'] = np.asarray(row['Embedding_' + effect_col], dtype=np.float64)
      graph.nodes['relation: ' + cause + '_' + effect]['embedding'] = np.mean([np.asarray(row['Embedding_' + cause_col], dtype=np.float64),np.asarray(row['Embedding_' + effect_col], dtype=np.float64)], axis=0)

   return graph
              

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['causal', 'non_causal'])
    disp.plot(cmap=plt.cm.Blues)
    plt.show()

def fold_analysis(graph):
    df_egae = pd.DataFrame()

    df_egae['y'] = [graph.nodes[node]['label'] for node in graph.nodes()]
    df_egae['x'] = [node for node in graph.nodes()]
    df_egae['y_llm'] = [graph.nodes[node]['llm_label'] for node in graph.nodes()]

    df = df_egae[df_egae['y'] != 'aux'].reset_index(drop=True)

    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    y_trues, y_preds = [], []
    for _, test_index in kf.split(df['x'], df['y']):

        y_true = df[df.index.isin(test_index)]['y'].to_list()
        
        y_pred = df[df.index.isin(test_index)]['y_llm'].to_list()

        #f1_macro = classification_report(y_true, y_pred, output_dict=True)['macro avg']['f1-score']

        y_trues = np.concatenate([y_trues,y_true])
        y_preds = np.concatenate([y_preds,y_pred])

    #plot_confusion_matrix(y_trues, y_preds)

    return round(classification_report(y_trues, y_preds, output_dict=True)['macro avg']['f1-score'], 3)

In [13]:
from sentence_transformers import SentenceTransformer
import pickle as pkl

m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

for model in ['llama3:8b', 'phi3:14b', 'gemma2:27b', 'qwen2:7b', 'llama3:70b']:
    print(model)
    for dataset in ['Risk', 'Twitter', 'Headlines', 'FinCausal']:
        df = pd.read_csv('results/' + dataset + '_' + model + '.csv') 
        df = df.dropna().reset_index(drop=True)
        graph = generate_homogeneous_graph(df, m, llm_class=True)
        f1_macro = fold_analysis(graph)
        print(round(f1_macro, 3), end=" & ")
    print('\n')

llama3:8b
0.727 & 0.57 & 0.61 & 0.761 & 

phi3:14b
0.659 & 0.509 & 0.626 & 0.695 & 

gemma2:27b
0.72 & 0.63 & 0.734 & 0.822 & 

qwen2:7b
0.691 & 0.472 & 0.654 & 0.719 & 

llama3:70b
0.726 & 0.623 & 0.728 & 0.767 & 

