<a href="https://colab.research.google.com/github/LeanKhan/quiztory/blob/main/NG_History_Timeline_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Algorithm:

- Break down text into POS tags
- Extract the VBD in the sentence if any
- Get base for that word. This is the thing that "happened" on that Date
- Get NER for text. Extract the date entity. 
- **Construct question**
- Remove date and "in" like preposition before date.
- Add "When " to beginning of question. Replace VBD to base form.
- *profit*



Install deps

In [None]:
import spacy

# import nltk
import re
import requests

# from nltk.tokenize import word_tokenize
# from nltk.tag import pos_tag
# from nltk.stem import WordNetLemmatizer

# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('words')
# nltk.download('wordnet')
# nltk.download('maxent_ne_chunker')

# install wikipedia article api wrapper
import sys
!{sys.executable} -m pip install wikipedia
!{sys.executable} -m pip install dateparser
!{sys.executable} -m pip install daterangeparser

# fast api
!{sys.executable} -m pip install colabcode
!{sys.executable} -m pip install fastapi
!{sys.executable} -m pip install jinja2

# node
!npm install -g localtunnel

# import installed deps
import wikipedia as wp
from dateparser import parse
from dateparser import parse as parse_range



Setup Server

Initialize text

In [2]:
def extract_all_events(text, extract_function):
  all_events = []
  questions = []
  processed = 0
  # Process the events
  for processed,line in enumerate(text.splitlines()):
    events = extract_function(line)
    all_events = all_events + events
    if processed % 100 == 0:
      print('Processed: {}'.format(processed))

  print("Extracted {} events.".format(len(all_events)))
  # use event[2] to forumulate the question...

  # Print out the events
  for event in sorted(all_events, key=lambda e: e[0]):
    questions.append({"q": "When did {}?".format(event[2]), "ans": event[1]})
  
  return questions

Get doc ready

Get NER and get date as well as preposition

In [None]:
for ent in doc.ents:
  print("{} -> {}".format(ent.text,ent.label_))

In [None]:
spacy.displacy.render(doc, style="ent",jupyter=True)

Construct question

In [None]:
import IPython

IPython.display.HTML(spacy.displacy.render(doc,style="dep", page=True, options={"compact":True}))

Dependency Parsing, thank you Jesus!

In [3]:
def extract_events_regex(line):
  matches = []
  # capture thee digit and four digit years (1975) and ranges (1975-1976)
  found = re.findall('In (\d\d\d\d?[/\–]?\d?\d?\d?\d?),? ?([^\\.]*)', line)
  try:
    matches = matches + list(map(lambda f: (f[0] if len(f[0])>3 else "0"+f[0] ,f[0],f[1]),found))
  except:
   return []
  return matches

def dep_subtree(token, dep):
  deps =[child.dep_ for child in token.children]
  child=next(filter(lambda c: c.dep_==dep, token.children), None)
  if child != None:
    return " ".join([c.text for c in child.subtree])
  else:
    return ""

# to remove citations, e.g. "[91]" as this makes problems with spaCy
p = re.compile(r'\[\d+\]')

def extract_events_spacy(line):
  line=p.sub('', line)
  events = []
  doc = nlp(line)
  for ent in filter(lambda e: e.label_=='DATE',doc.ents):
    # print(ent.text)
    try:
      start, end = parse_range(ent.text)
      # start = ent.text
    except Exception as e:
      # continue
      # could not parse the dates, hence ignore it
      try:
        # start = parse(ent.text)
        start = ent.text
      except:
        print('Could not => ', e)
        continue

    current = ent.root
    while current.dep_ != "ROOT":
      current = current.head
    desc = " ".join(filter(None,[
                                 dep_subtree(current,"nsubj"),
                                 dep_subtree(current,"nsubjpass"),
                                 dep_subtree(current,"auxpass"),
                                 dep_subtree(current,"amod"),
                                 dep_subtree(current,"det"),
                                 current.text, 
                                 dep_subtree(current,"acl"),
                                 dep_subtree(current,"dobj"),
                                 dep_subtree(current,"attr"),
                                 dep_subtree(current,"advmod")]))
    events = events + [(start,ent.text,desc)]
  return events

All together now!

In [None]:
# extract_all_events(summary,extract_events_spacy)

# extract_events_spacy("The Protestant Reformation was the first successful challenge to the Catholic Church and began in 1521 - 1600 as Luther was outlawed at the Diet of Worms after his refusal to repent. ")


Get templates! Thank you Jesus!

In [None]:
# raw_text="""Nigeria gained her independence in 1960."""
# raw_text = """Frank Roffolo was killed on December 2019"""
# response = requests.get('https://raw.githubusercontent.com/qualicen/timeline/master/history_of_germany.txt')

# summary = wp.summary("Colonial Nigeria")
# summary = response.content

# # Create NER text
# text = NER(raw_text)

# lem = WordNetLemmatizer()


In [4]:
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from jinja2 import Template

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.on_event("startup")
def load_model():
    global nlp
    nlp = spacy.load("en_core_web_sm")

@app.get('/')
def index():
    return {'message': 'This is the homepage of the API '}

@app.get('/home', response_class=HTMLResponse)
def home():
    template_string = requests.get('https://raw.githubusercontent.com/LeanKhan/congenial-lamp/main/index.html').text

    template = Template(template_string)

    return template.render()


@app.get('/questions', response_class=HTMLResponse)
def questions(article: str):
    template_string = requests.get('https://raw.githubusercontent.com/LeanKhan/congenial-lamp/main/questions.html').text

    template = Template(template_string)

    global wp_page

    try:
      wp_page = wp.page(article)
    except:
      # raise error
      return {"error": True}
    
    # doc = nlp(summary)

    qs = extract_all_events(wp_page.summary,extract_events_spacy)

    return template.render(page=wp_page, questions=qs)


@app.get('/api/questions')
def get_questions(article: str):

    global wp_page

    try:
      wp_page = wp.page(article)
    except:
      # raise error
      return {"error": True}
    
    # doc = nlp(summary)

    qs = extract_all_events(wp_page.summary,extract_events_spacy)

    return {'success': True, 'questions': qs, 'links': wp_page.links, 'images': wp_page.images, 'title': wp_page.title, 'url': wp_page.url}




In [None]:
!npm i -g fastify
!npm i -g fastify-cli

Run Server! Thank you Jesus!

In [None]:
code = """
module.paths.push("/tools/node/lib/node_modules");
const localtunnel = require('localtunnel');

(async () => {
  const tunnel = await localtunnel({ port: 5000 });

  // the assigned public url for your tunnel
  // i.e. https://abcdefgjhij.localtunnel.me
  tunnel.url;
  console.log('Public URL at => ', tunnel.url);

  tunnel.on('close', () => {
    // tunnels are closed
    console.log('Closeeeed tunnel');
  });
})();
"""

fast_code = """
module.paths.push("/tools/node/lib/node_modules");

// CommonJs
const fastify = require('fastify')({
  logger: true
})

// Declare a route
fastify.get('/', function (request, reply) {
  reply.send({ hello: 'world' })
})

// Run the server!
fastify.listen(5000, function (err, address) {
  if (err) {
    fastify.log.error(err)
    process.exit(1)
  }
  // Server is now listening on ${address}
})
"""

# with open("tunnel.js", "w") as js:
#   js.write(code)

with open("fast.js", "w") as js:
  js.write(fast_code)


api_code = """
# from fastapi import FastAPI
# import uvicorn

# app = FastAPI()

print(dir())


# @app.get("/")
# async def root():
#     return {"message": "Hello World"}

# uvicorn.run(app, host="127.0.0.1", port=5000, workers=1)
"""

with open("api.py", "w") as file:
  file.write(api_code)


# !nohup node ./fast.js & lt --port 5000 --subdomain quiztory-1234 & tail -f nohup.out
!python ./api.py
# !nohup python ./api.py & lt --port 5000 --subdomain quiztory-1234 & tail -f nohup.out

# nest_asyncio.apply()

# uvicorn.run(app, host="127.0.0.1", port=5000, workers=1)

# !npm -g ls --depth=0 --parseable

['__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__']


In [None]:
# TESTT
# !wget https://raw.githubusercontent.com/abhishekq61/tunnel-client/master/linux/staqlab-tunnel.zip

# !unzip ./staqlab-tunnel.zip

import subprocess
import nest_asyncio
import uvicorn
import os

# subprocess.run(["!node", "port=10000", "hostname=fish1234"], stdout=subprocess.PIPE, shell=True)

# !sudo lsof -i -P -n | grep LISTEN

# !sudo lsof -t -i tcp:5000 | xargs kill -9
# !nohup lt --port 3000 --subdomain quiztory-1234 & tail -f nohup.out
# !nohup lt --port 3000 --subdomain quiztory-1234 & uvicorn .:app --reload & tail -f nohup.out

# os.system("!nohup lt --port 3000 --subdomain quiztory-1234 & tail -f nohup.out")

# nest_asyncio.apply()

# uvicorn.run(app, host="127.0.0.1", port=3000, workers=1)


# !uvicorn main:app --reload

# uvicorn.run(app, host="127.0.0.1", port=5000, workers=1)

# !./staqlab-tunnel port=10000


CompletedProcess(args=['!node', 'port=10000', 'hostname=fish1234'], returncode=127, stdout=b'')

In [None]:
from colabcode import ColabCode

server = ColabCode(port=10000, code=False)

server.run_app(app=app)





INFO:     Started server process [68]
INFO:     Waiting for application startup.


Public URL: NgrokTunnel: "https://53ab-35-201-209-198.ngrok.io" -> "http://localhost:10000"


INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:10000 (Press CTRL+C to quit)


INFO:     41.223.139.40:0 - "GET /home HTTP/1.1" 200 OK
INFO:     41.223.139.40:0 - "GET /style.css HTTP/1.1" 404 Not Found
INFO:     41.223.139.40:0 - "GET /script.js HTTP/1.1" 404 Not Found
INFO:     41.223.139.40:0 - "GET /style.css HTTP/1.1" 404 Not Found
INFO:     41.223.139.40:0 - "GET /script.js HTTP/1.1" 404 Not Found
Processed: 0
Extracted 21 events.
INFO:     41.223.139.40:0 - "GET /questions?article=History+of+Nigeria HTTP/1.1" 200 OK
INFO:     41.223.139.40:0 - "GET /style.css HTTP/1.1" 404 Not Found
INFO:     41.223.139.40:0 - "GET /script.js HTTP/1.1" 404 Not Found
Processed: 0
Extracted 4 events.
INFO:     41.223.139.40:0 - "GET /questions?article=Nok+Culture HTTP/1.1" 200 OK
INFO:     41.223.139.40:0 - "GET /style.css HTTP/1.1" 404 Not Found
INFO:     41.223.139.40:0 - "GET /script.js HTTP/1.1" 404 Not Found
Processed: 0
Extracted 8 events.
INFO:     41.223.139.40:0 - "GET /questions?article=Nigerian+Civil+War HTTP/1.1" 200 OK
INFO:     41.223.139.40:0 - "GET /style.css