<a href="https://colab.research.google.com/github/Mircus/WalkGPT/blob/main/WalkGPT_v1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from os import sched_get_priority_min
!pip install sparqlwrapper
!pip install torch torchvision
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-2.0.1+cu118.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-2.0.1+cu118.html
!pip install -q torch-cluster -f https://pytorch-geometric.com/whl/torch-2.0.1+cu118.html
!pip install -q torch-spline-conv -f https://pytorch-geometric.com/whl/torch-2.0.1+cu118.html
!pip install -q torch-geometric
!pip install transformers

# Necessary imports
import pandas as pd
import networkx as nx
import random
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from SPARQLWrapper import SPARQLWrapper, JSON

# Query DBpedia for actor-movie pairs
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
    SELECT ?actor ?actorLabel ?movie ?movieLabel ?abstract ?genreLabel
    WHERE {
        ?actor rdf:type dbo:Actor .
        ?movie rdf:type dbo:Film .
        ?movie dbo:starring ?actor .
        ?actor rdfs:label ?actorLabel .
        ?movie rdfs:label ?movieLabel .
        ?movie dbo:abstract ?abstract .
        ?movie dbo:genre ?genre .
        ?genre rdfs:label ?genreLabel .
        FILTER (LANG(?actorLabel) = 'en')
        FILTER (LANG(?movieLabel) = 'en')
        FILTER (LANG(?abstract) = 'en')
        FILTER (LANG(?genreLabel) = 'en')
    }
    LIMIT 1000
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
rows = results['results']['bindings']

# Create DataFrame
df = pd.DataFrame(rows)

# Cleaning up the dataframe
df['actor_name'] = df['actorLabel'].apply(lambda row: row['value'])
df['title_name'] = df['movieLabel'].apply(lambda row: row['value'])
df['abstract'] = df['abstract'].apply(lambda row: row['value'])
df['genre'] = df['genreLabel'].apply(lambda row: row['value'])
df = df[['actor_name', 'title_name', 'abstract', 'genre']]

# Create graph
G = nx.Graph()

# Add nodes and edges to the graph
for index, row in df.iterrows():
    G.add_node(row['actor_name'], type='actor')
    G.add_node(row['title_name'], type='movie')
    G.add_edge(row['actor_name'], row['title_name'],
               relation=f"{row['actor_name']} starred in {row['title_name']}, a {row['genre']}, which is about: {row['abstract']}")

# Define function for random walks
def generate_random_walks(G, num_walks=10, walk_length=10):
    walks = []
    nodes = list(G.nodes())
    for _ in range(num_walks):
        start_node = random.choice(nodes)
        walk = [start_node]
        for _ in range(walk_length):
            cur_node = walk[-1]
            neighbors = list(nx.all_neighbors(G, cur_node))
            neighbors = [n for n in neighbors if n not in walk[-2:]]  # prevent immediate backtracking
            if neighbors:
                walk.append(random.choice(neighbors))
            else:
                break
        walks.append(walk)
    return walks

# Generate random walks
random_walks = generate_random_walks(G)

# Convert walks into a seed text for the language model
seed_text = ". ".join([" ".join([G[actor][movie]['relation'] for actor, movie in zip(walk, walk[1:])]) for walk in random_walks])

#print(seed_text)

seed_text1 = """
Given the following details, imagine a suspenseful thriller movie where the narratives from each series are intertwined, and the actors mentioned take on new roles. Create a plot full of unexpected twists and resolutions:
""" + seed_text

print(seed_text1)

In [None]:
# Trim the seed text to a specified maximum length
max_length_seed = 500
if len(seed_text1) > max_length_seed:
    # Find the last period within our desired length and trim there
    end_position = seed_text1.rfind(".", 0, max_length_seed)
    if end_position > -1:
        seed_text1 = seed_text1[:end_position]

from transformers import AutoTokenizer, AutoModelForCausalLM

# Define a class for narrative generation
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define a class for narrative generation
class NarrativeGenerator:
    def __init__(self, model_name):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)

    def generate_narrative(self, seed_text, max_length=1500, temperature=0.8):
        # Tokenize the seed_text and truncate it to fit within the model's limit
        inputs = self.tokenizer.encode(seed_text, return_tensors='pt', truncation=True)
        outputs = self.model.generate(inputs, do_sample=True, max_length=max_length, temperature=temperature, num_return_sequences=1)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

# Instantiate the generator with the GPT-2 Medium model
narrative_generator = NarrativeGenerator('gpt2-medium')

# Generate a narrative from the seed text
narrative = narrative_generator.generate_narrative(seed_text1, max_length=1024)
print(narrative)

