In [9]:
import openai
import pandas as pd
import time
from tqdm import tqdm

In [None]:
# 设置 API 密钥
openai.api_key = "####"  # ←←← 请替换为你的 OpenAI API key

In [None]:
# 读取文件
input_path = "~/original text.csv"
df = pd.read_csv(input_path)

In [4]:
# 只处理前 5 个段落
paragraphs = df["paragraph"].tolist()[:5]


In [5]:
# 定义 5 个 prompt 模板
prompts = [
    'Rewrite the following paragraph:\nParagraph: "{}"\nRewritten version:',
    'How would you rephrase this paragraph while preserving its original meaning? \nParagraph: "{}" \nRephrased version:',
    'Rephrase the following paragraph without changing the main content: \nParagraph: "{}"\nRephrased version:',
    'Rephrase the following paragraph while preserving its meaning. Follow these steps:\n1️⃣ Split the paragraph into individual sentences.\n2️⃣ Rephrase each sentence naturally while keeping the overall flow.\n3️⃣ Combine the rephrased sentences into a coherent paragraph.\n\nParagraph: "{}"\nRephrased version:',
    'Imagine you are an advanced language model capable of rephrasing text while preserving its original meaning. If this were your paragraph, how would you naturally rephrase it?\n\nParagraph: "{}"\nYour rephrased version:'
]

In [10]:
# 存储结果
results = {f"Rephrased_{i+1}": [] for i in range(5)}

# 调用 GPT-4o 对前 5 个段落执行 5 种改写
for idx, paragraph in enumerate(tqdm(paragraphs, desc="Processing paragraphs")):
    print(f"Processing paragraph {idx + 1}")
    for i, prompt_template in enumerate(prompts):
        prompt = prompt_template.format(paragraph)
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": prompt}],
                temperature=1.0
            )
            reply = response['choices'][0]['message']['content'].strip()
        except Exception as e:
            print(f"Error: {e}")
            reply = ""
        results[f"Rephrased_{i+1}"].append(reply)
        time.sleep(1.2)

Processing paragraphs:   0%|          | 0/5 [00:00<?, ?it/s]

Processing paragraph 1


Processing paragraphs:  20%|██        | 1/5 [00:35<02:21, 35.40s/it]

Processing paragraph 2


Processing paragraphs:  40%|████      | 2/5 [01:04<01:35, 31.75s/it]

Processing paragraph 3


Processing paragraphs:  60%|██████    | 3/5 [01:33<01:00, 30.40s/it]

Processing paragraph 4


Processing paragraphs:  80%|████████  | 4/5 [02:04<00:30, 30.55s/it]

Processing paragraph 5


Processing paragraphs: 100%|██████████| 5/5 [02:37<00:00, 31.51s/it]


In [None]:
# 合并结果进 DataFrame 并打印前 5 行
for i in range(5):
    df[f"Rephrased_{i+1}"] = results[f"Rephrased_{i+1}"]

In [None]:
# 打印前 5 行结果
print(df.head(5)[["paragraph", "Rephrased_1", "Rephrased_2", "Rephrased_3", "Rephrased_4", "Rephrased_5"]])

# 也可以选择保存为测试文件：
# df.head(5).to_csv("preview_rephrased_5x5.csv", index=False)