In [1]:
import sys

!{sys.executable} -m pip install spacy
!{sys.executable} -m spacy download en_core_web_sm
!{sys.executable} -m pip install tracery


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [3]:
import spacy
from collections import Counter
from spacy.lang.en.stop_words import STOP_WORDS
import random

# Tracery
import tracery
from tracery.modifiers import base_english

# spaCy 
nlp = spacy.load("en_core_web_sm")

# text file
file_path = "harry_potter1_summary.txt"

with open(file_path, "r", encoding="utf-8") as f:
    text = f.read()

print(text[:500], "\n...\n") 


Harry Potter and the Philosopher’s Stone 

Chapter 1 – The Boy Who Lived The story begins with the Dursley family
living ordinary lives on Privet Drive. Vernon Dursley notices strange
people in cloaks celebrating across the city. That night, Albus
Dumbledore meets Professor McGonagall and Hagrid. They bring baby Harry
Potter, the only survivor of an attack by Lord Voldemort. Harry is left
on the Dursleys’ doorstep with a letter explaining his tragic past.

Chapter 2 – The Vanishing Glass Ten yea 
...



In [5]:
doc = nlp(text)

tokens_alpha = [t for t in doc if t.is_alpha]

# Lemmatize, lowercase, remove stopwords
clean_words = [
    t.lemma_.lower()
    for t in tokens_alpha
    if not t.is_stop
]

# Frequency counter after preprocessing
freq_clean = Counter(clean_words)

print("< Top 20 frequent content words >")
for w, c in freq_clean.most_common(20):
    print(w, ":", c)


< Top 20 frequent content words >
harry : 32
chapter : 17
hermione : 7
stone : 6
ron : 6
snape : 6
hagrid : 5
voldemort : 5
hogwarts : 5
letter : 4
explain : 4
mirror : 4
philosopher : 3
life : 3
dumbledore : 3
meet : 3
professor : 3
mysterious : 3
guard : 3
magical : 3


In [8]:
# extract entities (characters, places ETC)
characters = set()
places = set()
other_ents = set()

for ent in doc.ents:
    if ent.label_ == "PERSON":
        characters.add(ent.text)
    elif ent.label_ in ("GPE", "LOC"):
        places.add(ent.text)
    else:
        other_ents.add(ent.text)

print("< CHARACTERS >\n", characters)
print("\n< PLACES >\n", places)
print("\n< OTHER ENTITIES (sample) >\n", list(other_ents)[:20])


< CHARACTERS >
 {'Harry\nPotter', 'Nicolas Flamel', 'Ron', 'Quirrell', 'Quidditch Harry', 'Nicolas Flamel Harry', 'Harry, Ron, Hermione', 'Harry Potter', 'Ron’s', 'Dudley', 'Harry', 'Charlie', 'Norbert', 'Ron Weasley', 'McGonagall', 'Gryffindor', 'Harry’s Hogwarts', 'Dursley', 'Vernon Dursley', 'Hermione Granger', 'Hogwarts', 'the Hogwarts\nExpress', 'Dursleys'}

< PLACES >
 {'London', 'Hagrid'}

< OTHER ENTITIES (sample) >
 ['first', 'Chapter 13', 'Snape', 'Chapter 1', 'Ollivanders', 'Draco Malfoy', 'Chapter 12', 'the Philosopher’s Stone', 'Hermione', 'Chapter 9', 'Snape’s', 'Chapter 16', 'the Sorting Ceremony', 'Ten years later', 'Norwegian', 'Chapter 8', 'Chapter 7', 'Chapter 15', 'Chapter 17', 'Privet Drive']


In [10]:
# Collect all nouns (lemmatized)
nouns = [
    t.lemma_
    for t in doc
    if t.pos_ == "NOUN" and t.is_alpha and not t.is_stop
]

# Frequency count of nouns
freq_nouns = Counter(nouns)

# Top 50 nouns used as object candidates
important_nouns = [w for w, c in freq_nouns.most_common(50)]

print("<Important Nouns (Top 50) >")
print(important_nouns)


<Important Nouns (Top 50) >
['chapter', 'letter', 'life', 'family', 'identity', 'parent', 'school', 'item', 'vault', 'friendship', 'student', 'duel', 'dog', 'trapdoor', 'match', 'mirror', 'friend', 'dragon', 'story', 'people', 'cloak', 'city', 'night', 'baby', 'survivor', 'attack', 'doorstep', 'past', 'year', 'cousin', 'visit', 'zoo', 'glass', 'snake', 'enclosure', 'vanish', 'incident', 'panic', 'location', 'hut', 'island', 'midnight', 'burst', 'admission', 'wizard', 'death', 'hand', 'world', 'supply', 'goblin']


In [12]:
# Convert sets into lists 
character_list = list(characters) or ["Harry", "Ron", "Hermione", "Dumbledore"]
place_list = list(places) or ["Hogwarts", "the forest", "the castle", "the Great Hall"]
noun_list = important_nouns or ["stone", "wand", "mirror", "forest", "troll", "dragon"]

# Additional hand-made action and event lists for variety
actions = [
    "guarded the #object#",
    "whispered about the #object#",
    "fought bravely for the #object#",
    "tried to protect the #object#",
    "discovered a secret about the #object#"
]

events = [
    "#hero# and #friend# went to #place# at night.",
    "In #place#, #hero# faced #enemy# alone.",
    "During a quiet evening in #place#, #hero# found the #object#.",
    "#hero# realized that #enemy# was after the #object# in #place#."
]

reactions = [
    "#hero# felt afraid but determined.",
    "#hero# was confused, yet curious.",
    "#hero# knew there was no way back.",
    "#hero# decided to trust #friend#.",
    "#hero# understood that everything had changed."
]

# Tracery grammar dictionary
grammar = {
    "origin": [
        "#scene#",
        "#scene# #reaction#",
        "#event# #reaction#"
    ],
    "scene": [
        "At #place#, #hero# and #friend# #action#",
        "Near #place#, #hero# discovered that #enemy# had followed them",
        "Inside #place#, #hero# could feel the magic of the #object#"
    ],
    "event": events,
    "reaction": reactions,
    "hero": character_list,
    "friend": character_list,
    "enemy": character_list,
    "place": place_list,
    "object": noun_list,
    "action": actions
}

# Build Tracery grammar
hp_grammar = tracery.Grammar(grammar)
hp_grammar.add_modifiers(base_english)


In [13]:
# Generate random HarryPotter-Style sentences
def generate_hp_sentences(n=10):
    """Generate n random Harry Potter–style sentences using Tracery."""
    for i in range(n):
        sentence = hp_grammar.flatten("#origin#")
        print(f"{i+1}. {sentence}\n")

# Generate 10 sample sentences
generate_hp_sentences(10)


1. the Hogwarts
Express realized that Ron was after the survivor in Hagrid. Harry understood that everything had changed.

2. Inside Hagrid, Harry could feel the magic of the hut Harry decided to trust Nicolas Flamel.

3. In Hagrid, Charlie faced Nicolas Flamel alone. Dursleys decided to trust Dudley.

4. Inside Hagrid, Harry could feel the magic of the mirror

5. Inside London, Harry Potter could feel the magic of the enclosure

6. In London, Quidditch Harry faced Harry’s Hogwarts alone. Nicolas Flamel Harry understood that everything had changed.

7. During a quiet evening in London, Harry Potter found the match. Charlie understood that everything had changed.

8. Near Hagrid, Nicolas Flamel discovered that Harry Potter had followed them

9. At London, Hermione Granger and Gryffindor discovered a secret about the mirror Charlie felt afraid but determined.

10. At London, Ron’s and Quidditch Harry tried to protect the baby

