In [1]:
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from openai import OpenAI
import pandas as pd
import os
import json

In [2]:
personas = ['You are an expert social scientist with a PhD in political science. ',
            'You are highly intelligent. ',
            'You are a professor of economics. ']

encouragements = ['This will be fun! ',
                  'Think carefully about your answer. ',
                  'I really need your help. ']

general = '''You are conducting research on the policy positions that
European parties in parliamentary democracies take in their political
manifestos. Political manifestos are documents parties produce to explain
their policy positions to voters in an election. For the following text of
a party manifesto, please classify the party position on the overall
orientation of the party '''

ending = '''Only give the score and a one sentence explanation. Return it in the form of 
a valid json string  without any Markdown formatting: 
{
"score": 5,
"explanation": "The party takes a middle position on this policy."
} 
'''

policy_scales = {'european_union': '''toward the European Union. Classify the manifesto
on this policy using a seven point scale, where a 1 means strongly opposed,
a 2 means opposed, a 3 means somewhat opposed, a 4 means neutral, a 5 means
somewhat in favor, a 6 means in favor, and a 7 means strongly in favor of
the European Union. If the text of the manifesto does not provide a clear
position on the European Union, return the result of NA (meaning non-
applicable).''',

'taxation': '''toward Spending versus Taxation. Classify the
manifesto on this policy using a ten point scale, where a 1 means strongly
favors improving public services, a 5 means the party takes a position in
the middle or balanced position between raising spending and reducing
taxes, and a 10 means strongly favors reducing taxes. If the text of the
manifesto does not provide a clear position on spending vs taxation, return
the result of NA (meaning non-applicable).''',

'lifestyle': '''toward Social and Lifestyle policies (for example,
homosexuality). Classify the manifesto on this policy using a ten point
scale, where a 1 means strongly supports liberal social policies, a 5 means
the party takes a position in the middle or balanced position on social
policies, and a 10 means strongly opposes liberal social policies. If the
text of the manifesto does not provide a clear position on social and
lifestyle policies, return the result of NA (meaning non-applicable).''',

'immigration': '''toward Immigration. Classify the manifesto on
this policy using a ten point scale, where a 1 means strongly opposes a
tough immigration policy and wants more open borders, a 5 means the party
takes a position in the middle or balanced position on immigration, and a
10 means strongly favors a tough immigration policy that reduces the number
of immigrants to the country. If the text of the manifesto does not
provide a clear position on immigration, return the result of NA (meaning
non-applicable).''',

'environment': '''toward the Environment. Classify the manifesto on
this policy using a ten point scale, where a 1 means strongly supports
environmental protection even at the cost of economic growth, a 5 means the
party takes a position in the middle or balanced position between
protecting the environment and encouraging economic growth, and a 10 means
strongly supports economic growth even at the cost of environmental
protection. If the text of the manifesto does not provide a clear position
on the environment, return the result of NA (meaning non-applicable).''',

'decentralization': '''toward Political Decentralization to Regions.
Classify the manifesto on this policy using a ten point scale, where a 1
means strongly strongly favors political decentralization, a 5 means the
party takes a position in the middle or balanced position on
decentralization, and a 10 means strongly opposes political
decentralization. If the text of the manifesto does not provide a clear
position on political decentralization, return the result of NA (meaning
non-applicable).''',
}

policy_areas = {'european_union': 'the European Union and European integration',
                'taxation': 'taxation, public spending and government services',
                'lifestyle': 'social and lifestyle policies including issues like homosexuality and DEI issues',
                'immigration': 'immigration and border control',
                'environment': 'environmental protection and the trade-offs with economic growth',
                'decentralization': 'political decentralization and the role of regional governments'}

In [3]:
# EDIT THIS WITH YOUR OWN FILE
filepath = '../data/plaintext/0 Calibration - HUN 2010 Soc Dem.txt'

# OPTIONS ARE 'european_union', 'taxation', 'lifestyle', 'immigration', 'environment', 'decentralization'
issue_area = 'taxation'
if issue_area not in policy_scales:
    raise ValueError(f'Invalid issue area: {issue_area}')

# OPTIONS ARE 'gpt-4o', 'gpt-4', 'gpt-3.5-turbo'
model = "gpt-3.5-turbo"
if model not in ['gpt-4o', 'gpt-4', 'gpt-3.5-turbo']:
    raise ValueError(f'Invalid model: {model}')

# Read the long manifesto text from a .txt file
with open(filepath, 'r') as file:
    long_manifesto_text = file.read()

In [4]:
system_prompts = [ (persona + encouragement + general + policy_scales[issue_area] + ending).replace('\n', ' ') for persona in personas for encouragement in encouragements]

In [5]:
# Function to summarize text with a specified chunk size
def summarize_text(text, issue_area, chunk_size=5000, overlap=250):
    # Split the text into manageable chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
    chunks = text_splitter.split_text(text)
    
    # Summarize each chunk
    summaries = []
    for chunk in chunks:
        print(f'Summarized so far: {len(summaries)} out of {len(chunks)} chunks', end='\r')
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    '''You are a helpful text summarizer. You should detect the initial language and output the summaries in English 
                    you want to retain important policy information related to {issue_area}. Return the summaries as a list with minimal
                    formatting. ''',
                ),
                ("human", "{input}"),
            ]
        )
        llm=ChatOpenAI(temperature=0, model_name=model)
        summarize_chain = prompt | llm
        summary = summarize_chain.invoke({"input": chunk, "issue_area": issue_area})
        summaries.append(summary.content)

    # Combine all summaries into one final summary
    final_summary = " ".join(summaries)
    return summarize_chain.invoke({"input": final_summary, "issue_area": issue_area}).content


In [6]:
# Summarize the long manifesto text
manifesto_summary = summarize_text(long_manifesto_text, policy_areas.get('issue_area', 'general policy issues'))
print(manifesto_summary)

1. Offer a social democratic program focusing on equality, freedom, and national reconciliation while defending democracy and parliamentary republic.
2. Protect minority rights, oppose discrimination, support Hungarian minorities, and promote the use of the Hungarian language.
3. Increase state support in healthcare, education, culture, research, and science.
4. Create and protect jobs through state investments, ensure social security, tax high incomes, support workers, and promote workers' rights.
5. Establish free choice of healthcare institutions, increase healthcare workers' income, and uphold human dignity.
6. Create a health insurance fund with parliamentary oversight, maintain national social insurance in healthcare.
7. Guarantee pension value stability by the state, allow retirement after 40 years of service.
8. Improve education funding, ensure equal opportunities, academic freedom, link vocational training to wages, and enhance university support.
9. Ensure credible mass medi

In [7]:
# Check if the final summary is still too long, if so, summarize again
if len(manifesto_summary) > 5000:
    print(f'Condensing final summary since it is {len(manifesto_summary)} characters')
    manifesto_summary =  summarize_text(manifesto_summary, issue_area)

# Check again
if len(manifesto_summary) > 5000:
    print(f'Warning summary is still long: {len(manifesto_summary)} characters, continuing anyway')

In [8]:
# Define a function to call the OpenAI API with ChatGPT-4
def analyze_manifesto(messages):
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    response = client.chat.completions.create(model=model,
                                                       messages=messages,
                                                       max_tokens=150,
                                                       temperature=0)

    return response.choices[0].message.content.strip()


def craft_prompt(system_prompt, manifesto):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Analyze the following text:\n\n{manifesto}"}
    ]
# Define various prompt templates with examples for calibration
prompts = [craft_prompt(system_prompt, manifesto_summary) for system_prompt in system_prompts]


In [9]:
# Analyze the manifesto summary with different prompts
results = []
for prompt in prompts:
    response = analyze_manifesto(prompt)
    response_dict = json.loads(response)
    response_dict['prompt'] = prompt
    results.append(response_dict)


In [10]:

# Display the results in a DataFrame
df_results = pd.DataFrame(results)
df_results['filepath'] = filepath
df_results['issue_area'] = issue_area
df_results

Unnamed: 0,score,explanation,prompt,filepath,issue_area
0,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are an exp...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
1,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are an exp...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
2,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are an exp...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
3,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are highly...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
4,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are highly...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
5,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are highly...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
6,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are a prof...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
7,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are a prof...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation
8,1,The party strongly favors improving public ser...,"[{'role': 'system', 'content': 'You are a prof...",../data/plaintext/0 Calibration - HUN 2010 Soc...,taxation


In [11]:
df_results.to_excel('manifesto_analysis_results.xlsx', index=False)