In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/1k_stories_100_genre.csv', engine = 'python')

In [None]:
df['Formatted_input'] = "[GENRE : " + df ["genre"] + "]" + df['story']

In [None]:
print(df[["genre", "Formatted_input"]].head(3))

             genre                                    Formatted_input
0  Science Fiction  [GENRE : Science Fiction]In the year 2250, Ear...
1          Fantasy  [GENRE : Fantasy]In a land far away, where the...
2          Mystery  [GENRE : Mystery]Once upon a time, in a small,...


In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
# Data cleaning

def clean(text):
  text = str(text).lower()
  text = re.sub(r'\d+', '', text)
  text = re.sub(r'[^\w\s]', '', text)
  text = re.sub(r'\s+', '', text).strip()
  return text

In [None]:
# Tokenization and stopwords removal

stop_w = stopwords.words('english')

def tokenize_and_stopwords(text):
  tokens = word_tokenize(text)
  f_tokens = [tokens for tokens in tokens if tokens not in stop_w and tokens.isalpha()]
  return f_tokens


In [None]:
# Lemmatization

Lemma = WordNetLemmatizer()

def lemmatize(tokens):
  lemma_tokens = [Lemma.lemmatize(token) for token in tokens]
  return lemma_tokens

In [None]:
# Combining all steps in one pipeline

def preprocess_text(text):
  cleaned_text = clean(text)
  tokenized_text = tokenize_and_stopwords(cleaned_text)
  lemmatized_text = lemmatize(tokenized_text)
  return " ".join(lemmatized_text)

In [None]:
# Apply preprocessing to the story dataset

df["clean_story"] = df["story"].apply(preprocess_text)

In [None]:
print(df[["genre", "clean_story"]].head(5))

                  genre                                        clean_story
0       Science Fiction  intheyearearthhadmadesignificantstridesinspace...
1               Fantasy  inalandfarawaywherethesunshonebrighterandthegr...
2               Mystery  onceuponatimeinasmalltranquiltowncalledwhisper...
3  Historical Adventure  onceuponatimeinthethcenturyasmallvillagenestle...
4              Thriller  inthesundrenchedcoastalcityofstaugustineflorid...


In [None]:
# Feature Extraction

from sklearn.feature_extraction.text import TfidfVectorizer

texts = df["clean_story"]
vectorizer = TfidfVectorizer(max_features=5000)
x = vectorizer.fit_transform(texts)

print("feature matrix shape : ", x.shape)

feature matrix shape :  (1000, 996)


In [None]:
#GPT-2

'''from transformers import pipeline, set_seed

generator = pipeline("text-generation", model="gpt2")
set_seed(42)

#Example input
input_prompt = "[GENRE: suspense] Her heart raced as she unfolded the letter..."

#Generate story
story = generator(input_prompt, max_length=2500, do_sample=True, temperature=0.9)

# Display result
print("📝 Generated Story:\n", story[0]["generated_text"])'''

'from transformers import pipeline, set_seed\n\ngenerator = pipeline("text-generation", model="gpt2")\nset_seed(42)\n\n#Example input\ninput_prompt = "[GENRE: suspense] Her heart raced as she unfolded the letter..."\n\n#Generate story\nstory = generator(input_prompt, max_length=2500, do_sample=True, temperature=0.9)\n\n# Display result\nprint("📝 Generated Story:\n", story[0]["generated_text"])'

In [None]:
# Gemini

'''import google.generativeai as genai

genai.configure(api_key = "AIzaSyAcof4s82txyxLuFkF8N5K5WGivxPK6sqM")

#Load the Model
Model = genai.GenerativeModel("gemini-1.5-pro")

# Content generation

Output = Model.generate_content("[GENRE: Thriller] A knock echoed at midnight...")
print(Output.text)'''

'import google.generativeai as genai\n\ngenai.configure(api_key = "AIzaSyAcof4s82txyxLuFkF8N5K5WGivxPK6sqM")\n\n#Load the Model\nModel = genai.GenerativeModel("gemini-1.5-pro")\n\n# Content generation  \n\nOutput = Model.generate_content("[GENRE: Thriller] A knock echoed at midnight...")\nprint(Output.text)'

In [None]:
#!pip install anthropic

In [None]:
# Claude 4 sonnet

'''import anthropic

# 🔑 Set your API key
client = anthropic.Anthropic(api_key="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")

# 🔹 Build your prompt from extracted features
prompt = "I want to write a romantic thriller about Henry and Lisa, enemies who fall in love. Add suspense and give it a happy ending."

# 🔹 Send prompt to Claude
response = client.messages.create(
    model="claude-4-sonnet-20240620",
    max_tokens=3000,
    temperature=0.9,
    system="You are a genre-aware storytelling assistant.",
    messages=[
        {"role": "user", "content": prompt}
    ]
)

# 🔹 Display the generated story
print(response.content[0].text)'''


'import anthropic\n\n# 🔑 Set your API key\nclient = anthropic.Anthropic(api_key="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")\n\n# 🔹 Build your prompt from extracted features\nprompt = "I want to write a romantic thriller about Henry and Lisa, enemies who fall in love. Add suspense and give it a happy ending."\n\n# 🔹 Send prompt to Claude\nresponse = client.messages.create(\n    model="claude-4-sonnet-20240620",\n    max_tokens=3000,\n    temperature=0.9,\n    system="You are a genre-aware storytelling assistant.",\n    messages=[\n        {"role": "user", "content": prompt}\n    ]\n)\n\n# 🔹 Display the generated story\nprint(response.content[0].text)'

In [None]:
# GPT-4

import openai
import pandas as pd

# 🔑 Set your OpenAI API key
openai.api_key = "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"  # Replace with your actual key

# 🔹 Load your prompt dataset (already cleaned and feature-extracted)
df = pd.read_csv("/content/1k_stories_100_genre.csv")  # Update path if needed

# 🔹 Prepare a list to store outputs
generated_stories = []

# 🔁 Loop through prompts to generate stories
for idx, row in df.iterrows():
    prompt = row["formatted_input"]  # Column should include genre-tagged prompt

    # Call GPT-4 with structured prompt
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a genre-aware storytelling assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=3000,  # ~2500+ words depending on complexity
        temperature=0.9,
        top_p=1.0,
        frequency_penalty=0.2,
        presence_penalty=0.3
    )

    story = response['choices'][0]['message']['content']

    generated_stories.append({
        "genre": row["genre"],
        "prompt": prompt,
        "story": story
    })

# 🔹 Convert to DataFrame
gen_df = pd.DataFrame(generated_stories)

# 🔹 Save your generated anthology
gen_df.to_csv("/content/dreamquill_generated_stories.csv", index=False)

# 🔹 Preview the first few stories
print(gen_df.head(3))


KeyError: 'formatted_input'