In [31]:
import pandas as pd
import json
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph

import os
import glob
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [32]:
chat = ChatOpenAI(temperature=0)

In [33]:
template = (
    """Given the article about movie, Carefully read cast section of the article, and I want you to create:
         1. actor/actress name
         2. his/her role
         3. movie title
         - NEVER Impute missing values.
         - DO NOT MISS out any actor/actress related information. There're more than one actor in a movie
         return information in json format with fields name 'name' , 'role', 'movie' under element 'cast'
     """
)
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)

casts_df = pd.DataFrame()
txt_files = glob.glob("data/*.txt")
for file_path in txt_files:
    # Define the path to the file
    
    # Initialize a variable to store the content
    content = ""
    
    # Try to open and read the file
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        # get a chat completion from the formatted messages
        ret = chat.invoke(
            chat_prompt.format_prompt(
                text=content
            ).to_messages()
        )

        # Use json.loads() to convert the string to a Python dictionary
    
        json_object = json.loads(ret.content.replace("```json\n",'').replace("\n```",''))
        
        tmp_df = pd.DataFrame(json_object["cast"])
        casts_df = pd.concat([casts_df,tmp_df])
    
    except FileNotFoundError:
        content = "File not found."


In [34]:
casts_df

Unnamed: 0,name,role,movie
0,Christian Bale,Bruce Wayne / Batman,Batman Begins
1,Michael Caine,Alfred Pennyworth,Batman Begins
2,Liam Neeson,Henri Ducard / Ra's al Ghul,Batman Begins
3,Katie Holmes,Rachel Dawes,Batman Begins
4,Gary Oldman,James 'Jim' Gordon,Batman Begins
...,...,...,...
8,Roger Rees,Owens,The Prestige
9,W. Morgan Sheppard,Merrit,The Prestige
10,Samantha Mahurin,Jess Borden,The Prestige
11,Daniel Davis,Judge,The Prestige


In [30]:
graph = Neo4jGraph(url=os.getenv('NEO4J_URI'), username=os.getenv('NEO4J_USER'), password=os.getenv('NEO4J_PASSWORD'),database=os.getenv('NEO4J_DATABASE'))
for index, row in casts_df.iterrows():
    params = {
        "movie": row['movie'],
        "role": row['role'],
        "name": row['name'],
    }
    graph.query(
        """
    MERGE (m:movie {title: $movie})
    MERGE (n:cast {name: $name})
    MERGE (n)-[:ACTED_IN {role: $role}]->(m)

    """,
        params,
)
