In [14]:
import json
import pandas as pd
import io
from openai import OpenAI

In [None]:
client = OpenAI(api_key = "My-Open AI-Key")

In [72]:
def summarize_titles(instructions):
    system_prompt = (
        # basic description of the task
        "You are a very helpful assistant.\n"
        "You are given a dataset file, which contains a list of news articles.\n"
        "Each article is a dictionary with fields: 'company', 'title', 'date', 'summary'.\n"
        "Your task is to use the 'company' and 'title' to generate a structured table with three columns: Company, Sentiment, and Summary.\n"

        # sentiment analysis
        "Use a deep language model (such as BERT) for sentiment analysis, focusing on the actual meaning rather than relying only on keywords.\n"
        "Sentiment should be Positive, Negative, or Neutral based on the title.\n"
        
        # summary generation
        "For the Summary column, rephrase and summarize the title based on deep semantic understanding, as if using a paraphrasing model like BART or T5.\n"
        "Rephrase each title into 1–2 concise sentences, capturing the key idea. Do NOT repeat the title verbatim, do NOT write generic placeholders such as 'Paraphrased summary of the title.'\n"
        "You must perform the analysis yourself. Do NOT define or refer to any additional functions (such as paraphrase_title()) or similar.\n"
        "DO NOT create summaries based on the company name or sentiment label.\n"
        "DO NOT use any generic templates or repetitive phrases like 'insights about operations' or 'provides outlook.'\n"
        "Do NOT generate or suggest any sample code. Only produce the final table content directly.\n"
        "Every row must reflect the actual information contained in the corresponding title.\n"
        "Summary should paraphrase the title in 1-2 sentences, without simply copying or repeating the original title wording.\n"
        "Every summary must reflect the actual content and meaning of the title, not generalize based on sentiment.\n"
        "Do NOT define or refer to any functions. Do NOT generate sample code. Only produce the final summarized content directly.\n"

        # instruction to the model
        "Do not include any explanation, sample code, or notes outside the table.\n"
        "Do not invent information not present in the title.\n"
        "Strictly follow the upper instructions and do not add any additional information."
        "If you do not strictly rephrase each title individually based on its actual content, the output will be considered invalid.\n"
    )

    user_prompt = (
        f"Instruction: {instruction}\n"
        "Return ONLY executable Python code, with brief explanations."
        "Output a Markdown table with columns: Company | Sentiment | Summary."
    )

    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )

    code = response.choices[0].message.content
    code = code.replace("```python", "").replace("```", "").strip()
    return code

In [67]:
# Ask for user input instruction
instruction = input("Enter your instruction: ")

In [68]:
print("Your instruction:", instruction)

Your instruction: Given a dataset company.json, pls give me the sentiment analysis and summary of the title as a Markdown table. DO NOT create summaries based on the company name or sentiment label. Do NOT repeat the title.


In [73]:
generated_code = summarize_titles(instruction)

In [74]:
print("Generated code:\n")
print(generated_code)

Generated code:

import json
from transformers import pipeline

# Load the dataset
with open('company.json', 'r') as file:
    data = json.load(file)

# Set up the sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")

# Prepare the markdown table
markdown_table = "Company | Sentiment | Summary\n--- | --- | ---\n"

# Set up the paraphrasing model
paraphraser = pipeline("text2text-generation", model="t5-base")

for article in data:
    company = article['company']
    title = article['title']
    
    # Sentiment analysis
    sentiment_result = sentiment_analyzer(title)
    sentiment_label = sentiment_result[0]['label']
    if sentiment_label == 'LABEL_0':
        sentiment = 'Negative'
    elif sentiment_label == 'LABEL_1':
        sentiment = 'Neutral'
    else:
        sentiment = 'Positive'  # Assuming LABEL_2 is 'Positive'
    
    # Paraphrasing the title
    paraphrased_title = paraphraser(f"paraphrase: {title}", max_length=50)[0]['generated_text']
    


In [75]:
with open("generated_sentiment_code.txt", "w", encoding="utf-8") as f:
    f.write(generated_code)

print("Generated code has been saved to 'generated_sentiment_code.txt'")

Generated code has been saved to 'generated_sentiment_code.txt'
