<a href="https://colab.research.google.com/github/IsaacFigNewton/Text-to-Timeline/blob/main/Sample_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Download Packages

In [None]:
import nltk
nltk.download('wordnet')
nltk.download('stopwords')

In [None]:
# Broken libraries I've tried: AllenNLP, Coreferee, neuralcoref, spacy-experimental, stanford-corenlp (only api available)
!pip install fastcoref
!pip install intervaltree
!pip install rdflib flufl.enum

In [None]:
!git clone https://github.com/IsaacFigNewton/Text-to-Timeline.git
!wget https://raw.githubusercontent.com/IsaacFigNewton/fredlib-updated/refs/heads/main/fredlib.py

# Import Libraries

In [None]:
import sys
import os
import json
import spacy
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
import math
import string
import numpy as np
import requests
import json
import pandas as pd
import collections
import pprint

import networkx as nx
from networkx import Graph as NXGraph
import matplotlib.pyplot as plt
import statistics
import collections

import fredlib as fred
from rdflib import Graph as RDFGraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph

from collections import defaultdict, deque
from intervaltree import IntervalTree
from intervaltree import Interval as ITInterval

from nltk.corpus import stopwords
from fastcoref import FCoref, spacy_component
from fastcoref_coref_resolution import resolve_text

In [None]:
# Add the timeline reconstruction modules directory to the Python path
module_path = os.path.abspath('./Text-to-Timeline/modules')
if module_path not in sys.path:
    sys.path.append(module_path)

from pipeline import *
from utils import *
from clean_rdf_graph import *
from timeline_construction import get_timeline

In [None]:
from google.colab import userdata
fred_api_key = userdata.get("fred_api")

In [None]:
file_prefix = "test_"
rdf_temp_prefix = "boxer.owl: temp_"
stop_words = stopwords.words('english')

# Config

In [None]:
tests = [
    "The frog jumped over the goose. Mr. Holmes is gay. Then the frog fell into the abyss. The goose followed the frog into the abyss and after that ate a different frog.",
    # "Although he was very busy with his work, Peter had had enough of it. He and his wife decided they needed a holiday. They travelled to Spain because they loved the country very much.",
    # "John met Paul after he finished work. He suggested they grab a drink.",
    # "The book was on the table when Sarah handed it to Mary. She smiled and thanked her.",
    # "Tom told Jerry that he had failed the exam.",
    # "The city council refused the demonstrators a permit because they feared violence.",
    # "Anna told Lucy that her idea was brilliant.",
    # "The scientist interviewed the assistant while she was setting up the experiment.",
    # "After the dog bit the man, he ran away.",
    # "The mechanic fixed the car while it was raining. He was soaked by the end.",
    # "David thanked Michael after his birthday party.",
    # "Karen lent her book to Julie because she needed it for class."
]

In [None]:
default_nlp_model = spacy.load("en_core_web_sm")

fastcoref_model = FCoref()

coref_resolution_model = spacy.load("en_core_web_sm", exclude=["parser", "lemmatizer", "ner", "textcat"])
coref_resolution_model.add_pipe("fastcoref")

In [None]:
with open('./Text-to-Timeline/maps/tags.json', 'r', encoding='utf-8') as f:
    all_tags = json.load(f)
rel_pos_tags = all_tags["rel_pos_tags"]
with open('./Text-to-Timeline/maps/temporal_relations/allen_intervals.json', 'r', encoding='utf-8') as f:
    temporal_relations_map = json.load(f)
temporal_relations_map = {k: (v.get("start"), v.get("end")) for k, v in temporal_relations_map.items()}
with open('./Text-to-Timeline/maps/temporal_relations/predicate_map.json', 'r', encoding='utf-8') as f:
    TEMPORAL_PREDICATE_MAP = json.load(f)

# Test entity relation extraction, ambiguation

In [None]:
test_infos = []
for test in tests:
  test_info = get_text_info_json(text=test,
                                 nlp_model=default_nlp_model,
                                 fastcoref_model=fastcoref_model,
                                 coref_resolution_model=coref_resolution_model)
  test_infos.append(test_info)

In [None]:
plot_graph_from_edge_list(test_infos[0]["edges"])

In [None]:
test_infos[0]["ambiguated"]

# Test FRED Timeline Extraction

In [None]:
test_0 = resolve_text(
    tests[0],
    coref_resolution_model=coref_resolution_model
)
G0 = get_fred_nx_digraph(test_0, "test_0.rdf", fred_api_key)
propagate_types(G0)

prune_subgraph_types(
    g=G0,
    node_types_to_drop={
      "org#ont#framenet#abox#frame:",
      "owl: Theme",
      "owl: Cotheme"
    },
    edge_types_to_drop={
      'owl: equivalentClass',
      'owl: hasDeterminer',
      'owl: differentFrom',
      'cotheme'
    }
)

G0 = disambiguate_predicates(G0,
                             predicate_map=TEMPORAL_PREDICATE_MAP,
                             prefix=rdf_temp_prefix)

In [None]:
timeline = get_timeline(
    g=G0,
    drop_event_edges=False,
    rel_pos_tags=rel_pos_tags,
    temporal_relations_map=temporal_relations_map,
    prefix=rdf_temp_prefix
)

plot_interval_tree(timeline, grid=False)

In [None]:
test_0