In [None]:
import random
import re
import torch
import os
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

In [None]:
MODEL_TAG = "GPT2_335M_LORA8_AB_TEST"
SAVE_DIR = f"/content/drive/MyDrive/ab_test_outputs/{MODEL_TAG}"
os.makedirs(SAVE_DIR, exist_ok=True)

In [None]:
# Initialize model and tokenizer
model_name = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
peft_model_path = "/content/drive/MyDrive/SEP775NLP_Final_Project/Models/gpt2_335m_lorar8/checkpoint-750"

peft_config = PeftConfig.from_pretrained(peft_model_path)
base_model = AutoModelForCausalLM.from_pretrained("gpt2-medium")
model = PeftModel.from_pretrained(base_model, peft_model_path)
model.config.pad_token_id = model.config.eos_token_id



In [None]:
# Summarization pipeline
summarizer = pipeline(
    "summarization",
    model="google/pegasus-cnn_dailymail",  # 新闻摘要专用
    device=0  # 使用 GPU（如果 device=0 可用）
)

# 对应的 Tokenizer
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/pegasus-cnn_dailymail")
global summarizer,summarizer_tokenizer

config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Device set to use cpu


In [None]:
VERSIONS = ["Base", "WithSummary", "WithGuide", "Summary+Guide"]
GUIDES = ["Rachel:", "Monica:", "Phoebe:", "Joey:", "Chandler:", "Ross:"]

base_prompt_text = """The One With McMaster University Party
Written by: GPT2-medium
Produced by: Robert Carlock & Wendy Knoller

Episode Idea: Joey and Rachel are invited to speak at McMaster University. Joey mistakenly thinks it's a stand-up comedy gig.

[Scene: McMaster University Student Center. Rachel and Joey are talking]
Rachel: I can’t believe you brought a microphone.
Joey: You said it was a gig! Of course I brought a microphone!"""

scene_memory = "[Scene: McMaster University Student Center. Rachel and Joey are talking]"

In [None]:
# Extract scene and dialogue
def extract_scene_context(text, recent_lines=5):
    scenes = list(re.finditer(r'(\[Scene:.*?\])', text))

    if not scenes:
        return "", text, ""

    last_scene_tag = scenes[-1].group(1)
    scene_start_pos = scenes[-1].end()

    scene_text = text[scene_start_pos:].strip()

    lines = scene_text.split('\n')
    if GUIDES:
        dialogue_lines = [line.strip() for line in lines if any(guide in line for guide in GUIDES)]
    else:
        dialogue_lines = [line.strip() for line in lines if line.strip()]

    recent_dialogues_str = "\n".join(dialogue_lines[-recent_lines:])

    return last_scene_tag, scene_text, recent_dialogues_str

In [None]:
# Pick next speaker
def pick_next_speaker(recent_dialogues):
    speakers = [line.split(":")[0] + ":" for line in recent_dialogues if ":" in line]
    unique_speakers = list(set(speakers))
    candidates = [g for g in GUIDES if g not in unique_speakers]
    return random.choice(candidates) if candidates else random.choice(GUIDES)

In [None]:
# Generate summary
def generate_summary(text):
    max_input_tokens = summarizer_tokenizer.model_max_length  # 动态获取最大输入长度
    input_tokens = summarizer_tokenizer(text, return_tensors="pt", truncation=False)["input_ids"][0]

    # 如果超长，切段并分别摘要，每段生成中等长度摘要
    if len(input_tokens) > max_input_tokens:
        chunks = [input_tokens[i:i + max_input_tokens] for i in range(0, len(input_tokens), max_input_tokens)]
        summaries = [
            summarizer(summarizer_tokenizer.decode(chunk, skip_special_tokens=True),
                       max_length=max_input_tokens/4, min_length=max_input_tokens/6, do_sample=False)[0]["summary_text"]
            for chunk in chunks
        ]
        text = " ".join(summaries)

    # 最终生成短摘要和长摘要
    short_summary = summarizer(text, max_length=20, min_length=15, do_sample=False)[0]["summary_text"]
    long_summary = summarizer(text, max_length=80, min_length=70, do_sample=False)[0]["summary_text"]

    return short_summary, long_summary



In [None]:
# Prompt constructor
def build_prompt(version, last_scene_tag, scene_text, recent_dialogues, next_speaker):
    short_summary,long_summary = generate_summary(scene_text)

    trimmed_tag = last_scene_tag.rstrip(']')
    if version == "WithGuide":
        return last_scene_tag + "\n" + "\n".join(recent_dialogues) + "\n\n" + next_speaker
    elif version == "WithSummary":
        return trimmed_tag + short_summary+ "]" + "\n" + long_summary + "\n\n" + next_speaker
    elif version == "Summary+Guide":
        return trimmed_tag +short_summary + "]" + "\n" + long_summary + "\n" + "\n".join(recent_dialogues) + "\n\n" + next_speaker
    else:
        return recent_dialogues + "\n" + next_speaker

In [None]:
step_size = 256
num_steps = 2
final_outputs = {}

for version in VERSIONS:
    current_script = base_prompt_text

    for step in range(num_steps):
        last_scene_tag, scene_text, recent_dialogues = extract_scene_context(current_script)
        next_speaker = pick_next_speaker(recent_dialogues)

        if step == 0:
            current_script += "\n" + next_speaker
            prompt = current_script
        else:
            prompt = build_prompt(version, last_scene_tag, scene_text, recent_dialogues, next_speaker)

        inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True).to(device)
        print(prompt)
        print()
        print()
        print()
        output = model.generate(
            **inputs,
            max_new_tokens=step_size,
            do_sample=True,
            top_k=50,
            top_p=0.9,
            temperature=1.0,
            repetition_penalty=1.2,
            eos_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3
        )

        decoded = tokenizer.decode(output[0], skip_special_tokens=True)
        new_content = decoded[len(prompt):].strip()
        print("new_content")
        print(new_content)
        current_script += "\n" + new_content

    final_outputs[version] = current_script

    # 保存时检查重复内容
    if not any(line in current_script.split('\n')[-5:] for line in current_script.split('\n')[:-5]):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        with open(os.path.join(SAVE_DIR, f"{version}_{timestamp}.txt"), "w") as f:
            f.write(current_script)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The One With McMaster University Party
Written by: GPT2-medium
Produced by: Robert Carlock & Wendy Knoller

Episode Idea: Joey and Rachel are invited to speak at McMaster University. Joey mistakenly thinks it's a stand-up comedy gig.

[Scene: McMaster University Student Center. Rachel and Joey are talking]
Rachel: I can’t believe you brought a microphone.
Joey: You said it was a gig! Of course I brought a microphone!
Joey:



new_content
Oh really? That means we're not supposed that night, right!? But hey—it wasn't anything serious so... um.... how is this even okay in your life?! And besides—we both live with one other now—"you know," he begins his apology as if what happened has no bearing on him or anyone else there "but uhm… It seems like everyone keeps telling me about the weird people who work for Mr.[phrases redacted], but guess whose name just came up every time"? Anyway--what do these guys think they'll get out of having us here together tonight?? (they laugh nervously) Well d

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Rachel: I can’t believe you brought a microphone.
Joey: You said it was a gig! Of course I brought a microphone!
Joey:
Phoebe:





Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


new_content
[looking around] Did anybody notice the two guys in that big suit? Are they from here or did someone tell them not to be there?? Joey thinks no one is going anywhere with him right now and walks away, leaving Phineas shocked at how well he knew everything but still feels this need for knowledge of his past lives where only time has changed all their choices entirely...  And then suddenly out of nowhere Jason starts playing on an organ!!! As if these weren't bad enough - when Daphne finds her mother's body lying next To see everyone else she throws down some food (except Jimmy!) She brings up George who didn 't even mention anything about being gay/straight until maybe 20 seconds before Joe asked whether Genny liked girls as opposed just talking things over . The rest comes quickly after including Eric helping Lisa finish eating dinner ... although we also know Wendy actually had something to do so could give Michael money by getting rid off Johnny again , etc.. It makes me 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Scene: McMaster University Student Center. Rachel and Joey are talkingA small party has sprung into existence amongst McGill Students,.<n>It quickly becomes apparent how]
A small party has sprung into existence amongst McGill Students,.<n>They come prepared—their costumes include tinfoil hats stuffed with eggs...and an entire room full--even those who didn ts know them before take note.<n>It quickly becomes apparent how absurd such activity truly isn't because only idiots such such activity truly isn't because only idiots.

Chandler:





Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


new_content
Okay so here's my problem .<sup>In the future</h><br />I want you all to sit back down in your seats at <a href="http://www.(?:(?:\\\)?$/b_p" target= "_blank">the [[The Office of Management & Budget|OMB]]! Yours is a temporary job offer by The Business)</strong></n>" I'm going on record saying this too since it does have two possible outcomes:- You will be banned from Mizzou forever until someone solves that pesky math thing where they add up every number for example 10^{35}^1 + 1 - 35%+ 5-10\ldots = 11-(37)x33+(43)\), or even worse 3*13=(5)(3). It would almost certainly not work however once other factors were factored into the equation (like one could argue some degree more accurately than what we're now doing)- Oh well as long paul keeps working his magic he probably won' d get through whatever crap happens soon enough after our announcement day.- That sounds like something worth being happy about having happen right?! All while maintaining employment though oh boy if pe

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Scene: McMaster University Student Center. Rachel and Joey are talking]
R
a
c
h
e
l
:
 
I
 
c
a
n
’
t
 
b
e
l
i
e
v
e
 
y
o
u
 
b
r
o
u
g
h
t
 
a
 
m
i
c
r
o
p
h
o
n
e
.


J
o
e
y
:
 
Y
o
u
 
s
a
i
d
 
i
t
 
w
a
s
 
a
 
g
i
g
!
 
O
f
 
c
o
u
r
s
e
 
I
 
b
r
o
u
g
h
t
 
a
 
m
i
c
r
o
p
h
o
n
e
!


M
o
n
i
c
a
:

Joey:





Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


new_content
So you're telling me that, uh... That I'm not supposed to look like a boy? She's the one who said something about "you don't see girls on my campus." The guy at work told her it was stupid because she wasn' t an obvious male figure in his office. You think if there were guys here from high school or college just doing their thing they'd have any of us heard anything?! He didn''t want kids acting out; he wanted them being able ti use computers for entertainment . What is this idea we got today so far ? And even though your boyfriend has been spending time with someone new every weekend since last month ' , but now people keep asking why?? Why hasn ''he" decided after 2 weeks/months (when these men could be using computer equipment) - before all boys come into our room too-to go home together as friends !!!! We've had discussions already how hard *we* spend everyday sitting around playing games ... while **boyfriends** always seem to stop by ...... How do most teens hangout a

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Scene: McMaster University Student Center. Rachel and Joey are talkingJoe and Rachel take a look back at some of the most memorable moments in pop culture ]
Joe and Rachel take a look back at some of the most memorable moments in pop culture .<n>From Michael Jackson's return to the presidency to Donald Trump's election victory, this is a look at some of the biggest stories of the week .<n>Stay tuned for the next two days of this week's Instragram Live .
R
a
c
h
e
l
:
 
I
 
c
a
n
’
t
 
b
e
l
i
e
v
e
 
y
o
u
 
b
r
o
u
g
h
t
 
a
 
m
i
c
r
o
p
h
o
n
e
.


J
o
e
y
:
 
Y
o
u
 
s
a
i
d
 
i
t
 
w
a
s
 
a
 
g
i
g
!
 
O
f
 
c
o
u
r
s
e
 
I
 
b
r
o
u
g
h
t
 
a
 
m
i
c
r
o
p
h
o
n
e
!


R
a
c
h
e
l
:

Rachel:



new_content
The movie 'The Hunger Games' was actually quite funny; if you haven't seen it yet please be patient because its been pretty awesome on Imgur :D ! <strong><br />We decided that we should do something fun but also still relevant with our instagram post so let us know what your f