In [13]:
!pip install langchain sentence-transformers faiss-cpu



In [14]:
!pip install -U langchain-community



In [15]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from sentence_transformers import SentenceTransformer

# Load Sentence Transformer model (no API key required)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [16]:
import os

# Path to the directory containing your story files
story_files = ['a-mother.txt', 'sorrow.txt', 'the-lantern-keepers.txt', 'the-poor-relations-story.txt', 'the-schoolmistress.txt']

# Read the stories into a list of strings
stories = []
for file in story_files:
    with open(file, 'r') as f:
        stories.append(f.read())

# Print the first story for verification
print(stories[0])

A Mother

Mr Holohan, assistant secretary of the Eire Abu Society, had been walking up and down Dublin for nearly a month, with his hands and pockets full of dirty pieces of paper, arranging about the series of concerts. He had a game leg and for this his friends called him Hoppy Holohan. He walked up and down constantly, stood by the hour at street corners arguing the point and made notes; but in the end it was Mrs Kearney who arranged everything.

Miss Devlin had become Mrs Kearney out of spite. She had been educated in a high-class convent, where she had learned French and music. As she was naturally pale and unbending in manner she made few friends at school. When she came to the age of marriage she was sent out to many houses where her playing and ivory manners were much admired. She sat amid the chilly circle of her accomplishments, waiting for some suitor to brave it and offer her a brilliant life. But the young men whom she met were ordinary and she gave them no encouragement, 

In [17]:
# Compute the embeddings for each story
embeddings = embedding_model.embed_documents(stories)

In [18]:
import faiss
import numpy as np

# Convert embeddings to numpy array for FAISS compatibility
embedding_matrix = np.array(embeddings)

# Initialize a FAISS index
index = faiss.IndexFlatL2(embedding_matrix.shape[1])  # L2 distance metric for similarity

# Add the embeddings to the index
index.add(embedding_matrix)

In [21]:
def query_character_info(character_name, k=1):
    # Embed the query (character name or description)
    query_embedding = embedding_model.embed_query(character_name)

    # Perform the search in the FAISS index
    D, I = index.search(np.array([query_embedding]), k)  # D = distances, I = indices

    # Retrieve the most similar story
    most_similar_story_idx = I[0][0]
    most_similar_story = stories[most_similar_story_idx]

    return most_similar_story

# Example usage: Search for a character by name
character_name = "Callum"
result = query_character_info(character_name)
print(result)

The Lantern Keepers

In the quiet town of Bramblewick, perched atop rolling hills blanketed in wildflowers, stood a lighthouse far from any sea. Its lantern burned nightly, a mystery to all who came to the town. Locals called it "The Beacon of Bramblewick," though no one could recall why it was first lit.

The keeper of the lantern, a stooped man named Callum, had tended to it for decades. Callum rarely spoke to anyone, his gnarled hands and thick spectacles more suited to the constant care of the light. His only companion was a small black cat, Ember, who followed him like a shadow.

One late summer evening, Eliza, a young artist new to Bramblewick, climbed the hill to sketch the lighthouse. She found Callum outside, polishing the brass lantern. He glanced up, his face etched with lines like an old map.

“You’re new,” he said.

“I am,” Eliza replied, smiling. “What’s the light for?”

Callum hesitated before answering. “To guide those who are lost.”

Intrigued, Eliza decided to learn m

In [22]:
import json

def extract_structured_info(character_name, story):
    # Example extraction: manually extract relationships and role from the story (this can be improved)
    relationships = []
    if character_name in story:
        # This is a simplified assumption where you manually parse relationships
        relationships.append({"name": "Arya Stark", "relation": "Sister"})
        relationships.append({"name": "Eddard Stark", "relation": "Father"})

    # Create a structured JSON object
    character_info = {
        "name": character_name,
        "storyTitle": "A Song of Ice and Fire",  # Assume a title or extract from the story text
        "summary": story[:500],  # First 500 characters as a summary (can be improved)
        "relations": relationships,
        "characterType": "Protagonist"  # Can be dynamically assigned based on story parsing
    }

    return json.dumps(character_info, indent=4)

# Example usage: Get structured data for the character
structured_info = extract_structured_info(character_name, result)
print(structured_info)

{
    "name": "Callum",
    "storyTitle": "A Song of Ice and Fire",
    "summary": "The Lantern Keepers\n\nIn the quiet town of Bramblewick, perched atop rolling hills blanketed in wildflowers, stood a lighthouse far from any sea. Its lantern burned nightly, a mystery to all who came to the town. Locals called it \"The Beacon of Bramblewick,\" though no one could recall why it was first lit.\n\nThe keeper of the lantern, a stooped man named Callum, had tended to it for decades. Callum rarely spoke to anyone, his gnarled hands and thick spectacles more suited to the constant care of t",
    "relations": [
        {
            "name": "Arya Stark",
            "relation": "Sister"
        },
        {
            "name": "Eddard Stark",
            "relation": "Father"
        }
    ],
    "characterType": "Protagonist"
}


In [24]:
import os
import json
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import faiss
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

# 1. Load SentenceTransformer Model (Free, Open-Source Model)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 2. Define Helper Functions to Read Story Files and Process Them
def load_stories(story_files):
    stories = []
    for file in story_files:
        with open(file, 'r') as f:
            content = f.readlines()
            story_title = content[0].strip()  # First line is the title
            story_text = "".join(content[1:]).strip()  # Rest is the story
            stories.append((story_title, story_text))
    return stories

# 3. Compute Embeddings for Stories
def compute_embeddings(stories):
    embeddings = []
    for title, text in stories:
        story_embedding = embedding_model.embed_documents([text])
        embeddings.append(story_embedding[0])
    return np.array(embeddings)

# 4. Create FAISS Index
def create_faiss_index(embeddings):
    index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance metric for similarity
    index.add(embeddings)
    return index

# 5. Query FAISS Index and Retrieve Story Details
def query_character_info(character_name, index, stories, k=1):
    query_embedding = embedding_model.embed_query(character_name)
    D, I = index.search(np.array([query_embedding]), k)  # D = distances, I = indices

    # Retrieve the most similar story
    most_similar_story_idx = I[0][0]
    most_similar_story = stories[most_similar_story_idx]

    return most_similar_story

# 6. Set up Langchain to Extract Structured Information using an Open Source Model (No API key required)
def extract_structured_info_from_story(character_name, story_title, story_text):
    # Set up the prompt template for the character extraction task
    prompt = """
    You are an expert in analyzing story texts. Your task is to extract structured details about a character from the story. The details you need to provide are:

    - The character's name
    - A summary of the character's story (the main plot involving the character)
    - The character's relationships with other characters
    - The role of the character (e.g., protagonist, antagonist, side character)

    Story Title: {story_title}
    Story Text: {story_text}

    Character Name: {character_name}
    """

    # Initialize Langchain with ChatGPT Model for extracting details
    chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)  # Open source model setup
    chain = LLMChain(llm=chat_model, prompt=PromptTemplate.from_template(prompt))

    # Generate the output for the character details
    response = chain.run({
        "story_title": story_title,
        "story_text": story_text,
        "character_name": character_name
    })

    return response

# 7. Main Logic to Execute Query and Return Structured Information
def process_stories_and_generate_info(story_files, character_name):
    # Load stories
    stories = load_stories(story_files)

    # Compute embeddings for the stories
    embeddings = compute_embeddings(stories)

    # Create FAISS index
    index = create_faiss_index(embeddings)

    # Query for character info from FAISS
    result = query_character_info(character_name, index, stories)

    # Extract structured info for the character from the best matched story
    story_title, story_text = result
    structured_info = extract_structured_info_from_story(character_name, story_title, story_text)

    return structured_info

# Example usage:
story_files = ['a-mother.txt', 'sorrow.txt', 'the-lantern-keepers.txt', 'the-poor-relations-story.txt', 'the-schoolmistress.txt']
character_name = "Callum"

structured_info = process_stories_and_generate_info(story_files, character_name)
print(structured_info)

  chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)  # Open source model setup


ValidationError: 1 validation error for ChatOpenAI
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'temperature': 0, 'model...ne, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error