In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


### Set working paths and load JSON

What it does
- Defines:
  - `BASE_DIR` (root of your Drive mount)
  - `working_dir` (project subfolder inside `BASE_DIR`) — edit to your folder
  - `json_filename` (the displacy JSON file produced by the pipeline)
- Changes current working directory to the project folder and opens the JSON file into `data`.
- Initializes `i = 0` which is used as the index of the entry to inspect.

Expectations about the JSON
- `data` should be a list of entries. Each entry typically contains:
  - `file_name` (source file id)
  - `paragraph` (text)
  - `ents` (list of displacy entity dicts: `{ "start": int, "end": int, "label": "PER"|"LOC", "text": str, ... }`)

Tip
- Edit `working_dir` and `json_filename` to match your Drive layout before running.

In [None]:
import os
import json

BASE_DIR = '/content/drive/MyDrive'
#CHANGE THIS TO YOUR INTERNAL GDRIVE FOLDER
working_dir = 'Projects/NER.ManualValidation'
json_filename = "FE.66.displacy.json"

os.chdir(os.path.join(BASE_DIR, working_dir))
with open(os.path.join(BASE_DIR, working_dir, json_filename), "r", encoding="utf-8") as f:
    data = json.load(f)

i=0

### Render an entry with spaCy displacy and inspect entities

What it does
- Imports `spacy.displacy`.
- Selects `entry = data[i]` and sets a display title including the file name.
- Renders the entry using `displacy.render(..., manual=True)` (manual mode expects the `{"text": ..., "ents": [...]}` format).
- Increments `i` so you can step through entries.
- Prints the `ents` list as pretty JSON for quick debugging.

Usage tips
- To view the next entry run the cell again (it increments `i`).
- If `displacy.render` fails, confirm the selected `entry` has keys `"text"` (or the expected top-level format) and `"ents"` where each ent has `start`/`end` offsets.
- For batch inspection consider wrapping the render/print calls in a short loop or using `display(HTML(...))` for more control in notebooks.

In [43]:
from spacy import displacy

entry = data[i]
entry['title'] = f'Entry: {i} Located in: {entry["file_name"]}'
displacy.render(data[i], style="ent", jupyter=True, manual=True)
i=i+1

print(json.dumps(entry['ents'], indent=2))

[
  {
    "text": "Adam Loftum",
    "start": 0,
    "end": 11,
    "label": "PER"
  },
  {
    "text": "Dublin",
    "start": 32,
    "end": 38,
    "label": "LOC"
  },
  {
    "text": "Ireland",
    "start": 89,
    "end": 96,
    "label": "LOC"
  },
  {
    "text": "Irelande",
    "start": 89,
    "end": 97,
    "label": "LOC"
  },
  {
    "text": "James Pourdon",
    "start": 121,
    "end": 134,
    "label": "PER"
  },
  {
    "text": "Dudley Lo",
    "start": 167,
    "end": 176,
    "label": "PER"
  },
  {
    "text": "Dudley Lof",
    "start": 167,
    "end": 177,
    "label": "PER"
  },
  {
    "text": "Edward Lo",
    "start": 187,
    "end": 196,
    "label": "PER"
  },
  {
    "text": "Adam Lo",
    "start": 209,
    "end": 216,
    "label": "PER"
  },
  {
    "text": "Thomas Loft",
    "start": 227,
    "end": 238,
    "label": "PER"
  },
  {
    "text": "Thomas Loftus",
    "start": 227,
    "end": 240,
    "label": "PER"
  },
  {
    "text": "Henry Loftus",
    "start": 