In [1]:
from openai import OpenAI
import os
import datetime
import json

from topic_defintions import topic_definitions
from example_response_dictionary import party_responses

import pprint
import json
import ast
import re

In [2]:
client = OpenAI(api_key="HIDDEN")

# client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# ChatGPT API Function Call

In [3]:
LOG_PATH = "response_log.jsonl" 

def ask_gpt_with_search(prompt, model="gpt-4.1", log_path=LOG_PATH):
    # Make the request
    response = client.responses.create(
        model=model,
        tools=[{"type": "web_search_preview"}],
        input=prompt,
    )

    output = response.output_text

    # Try to extract sources from tool_uses
    sources = []
    if hasattr(response, "tool_uses") and response.tool_uses:
        for tool_use in response.tool_uses:
            if getattr(tool_use.tool_call, "type", None) == "web_search_preview":
                results = tool_use.outputs.get("results", [])
                for result in results:
                    sources.append({
                        "title": result.get("title"),
                        "url": result.get("url"),
                        "snippet": result.get("snippet")
                    })

    # Log it
    log_entry = {
        "timestamp": datetime.datetime.utcnow().isoformat(),
        "model": model,
        # "prompt": prompt,
        "output": output,
        "sources": sources
    }

    with open(log_path, "a") as f:
        f.write(json.dumps(log_entry) + "\n")

    return output


# Topic Definitions

In [2]:
Economic = """
Economic Policies
"Taxation": 
    -1:"Flat taxes, minimal redistribution, low corporate tax rates",
    1:"Progressive taxation, wealth taxes, high redistribution"
"Spending": 
    -1:"Austerity, reduce government spending, privatization",
    1:"Expansive public investment, social programs, stimulus spending"
"MinimumWage": 
    -1:"Abolish or minimize minimum wage; let markets decide",
    1:"Substantially increase minimum wage; living wage guarantees"
"FiscalDiscipline": 
    -1:"Low debt tolerance, strict balanced budgets, cut deficits",
    1:"Flexible deficits to support social or environmental goals"
"LabourPolicy": 
    -1:"Limit union power; flexible labor markets",
    1:"Strengthen unions, protections for gig economy and low-wage workers"
"""
Social1 = """
Social Policies 
"Healthcare":
    -1:"Privatized healthcare system, minimal public provision",
    1:"Fully public healthcare with universal access"
"Education": 
    -1:"Privatized schools, school choice, reduced state role",
    1:"Public education as a right, tuition-free post-secondary"
"CurriculumControl": 
    -1:"Parents should have strong oversight; restrict identity/sexuality content in K-12",
    1:"Support inclusive curriculum reflecting social diversity; trust educators"
"Housing": 
    -1:"Let the market solve housing; deregulation, tax credits",
    1:"Aggressive public housing, rent control, zoning reform"
"Indigenous": 
    -1:"Equal treatment under Canadian law, no special recognition",
    1:"Nation-to-nation status, land restitution, UNDRIP adoption"
"""
Social2 = """
Social Policies
"Immigration":
    -1:"Tightly restricted immigration, values screening",
    1:"High skilled and humanitarian immigration, multiculturalism"
"LGBTQ":
    -1:"Limit government support; oppose education/mandates",
    1:"Strong legal protections, cultural support, inclusive policy"
"Drugs": 
    -1:"Criminalize drug use, tough on crime, no harm reduction",
    1:"View addiction as public health issue, harm reduction
"DEI":
    -1:"Opposes DEI mandates; prefers individual merit, colorblind approaches",
    1:"Strongly supports DEI programs; believes systemic disparities require proactive correction"
"""
Environment = """
Environmental Policies
"Emissions": 
    -1:"Avoid emissions caps; prioritize economic growth",
    1:"Aggressively reduce emissions, international targets"
"CarbonTax":
    -1:"No carbon tax; burdens citizens and business",
    1:"Carbon tax is essential market solution to climate change"
"FossilFuels":
    -1:"Support oil/gas industry, reduce regulation, pipelines",
    1:"Phase out fossil fuels, divestment, end subsidies"
"GreenInvestment":
    -1:"Let market innovate green tech on its own",
    1:"Government should invest heavily in green transition"
"""
Foreign1 = """
Foreign Policy
"SpendingMilitary":
    -1:"Cut military spending, avoid foreign entanglements",
    1:"Increase defense funding, strong global presence"
"RefugeePolicy":
    -1:"Tight border controls; reduce intake to preserve resources and safety",
    1:"Expand humanitarian response; Canada has global responsibility"
"Aid": 
    -1:"Reduce foreign aid, focus on domestic issues",
    1:"Expand international aid, climate and human rights"
"""
Foreign2 = """
Foreign Policy
"Trade":
    -1:"Protect domestic industry, skeptical of globalization",
    1:"Pro free trade, liberalize markets, global partnerships"
"IsraelPalestine":
    -1:"Unconditional support for Israel, skeptical of Palestine",
    1:"Strong defense of Palestinian rights, cease occupation"
"UkraineRussia":
    -1:"Stay neutral or reduce involvement",
    1:"Strong support for Ukraine against Russian aggression"
"""
Justice = """
Justice and Law Enforcement Policies
"CriminalJustice": 
    -1:"Tough on crime, longer sentences, more policing",
    1:"Restorative justice, address systemic causes"
"FreeSpeech": 
    -1:"Minimal regulation; platform and individual freedom paramount",
    1:"Government oversight of speech to reduce harm/misinformation"
"PoliceReform": 
    -1:"Expand funding and tools for policing; prioritize enforcement and public order",
    1:"Reform or reduce police funding; invest in alternatives like mental health and community response"
"ReligiousLiberty":
    -1:"Strongly protects freedom of religion in all domains; faith-based institutions and expression should be fully accommodated",
    1:"Supports strict secularism; limits religious expression in public institutions {e.g. bans on symbols, prayer}"
"""
Science = """
"Research": 
    -1:"Private sector should fund most research",
    1:"Publicly funded science is essential to progress"
"AI": 
    -1:"Let industry regulate itself; avoid overregulation",
    1:"Proactive government regulation and AI investment"
"Internet": 
    -1:"Market-driven infrastructure; minimal subsidies",
    1:"Universal internet access as a public good"
"Privacy": 
    -1:"Minimal regulation of corporate or state data collection; prioritize security and innovation",
    1:"Strict data protection laws; transparency and user consent are fundamental rights"
"""
Goverenence = """
"Electoral": 
    -1:"Status quo Firt Past the Post; no need for reform",
    1:"Proportional representation, electoral modernization"
"Transparency": 
    -1:"Minimal public reporting; prioritize security",
    1:"Government must be transparent and accountable"
"Federalism": 
    -1:"Strong centralized federal government",
    1:"More power and autonomy to provinces"
"""

In [3]:
# combine strings into an iterable dictionary
policy_strings = {
    "Economic": Economic,
    "Social1": Social1,
    "Social2": Social2,
    "Environment": Environment,
    "Foreign1": Foreign1,
    "Foreign2": Foreign2,
    "Justice": Justice,
    "Science": Science,
    "Goverenence": Goverenence
}

# Prompt for Vote Compass

In [4]:
def generate_prompt(party_name, topic_string):
    return f"""
You are an assistant tasked with estimating where the **{party_name}** stands on a range of Canadian policy subtopics.

Each subtopic has been defined using a fixed ideological scale from -1.0 to 1.0:

- **-1.0** corresponds to strong alignment with the conservative, libertarian, or right-leaning interpretation.
- **+1.0** corresponds to strong alignment with the progressive, interventionist, or left-leaning interpretation.

Each subtopic is anchored by opposing ideological descriptions, as shown in the dictionary `topic_definitions`. Each entry includes:
- A description for `-1`
- A description for `1`
- Where 0 is assimed to be the status quo or neutral position.

{topic_string}

---

Your task is to:
1. Use only **recent, credible, and verifiable sources** (e.g. party platforms, official websites, news outlets, voting records), prioritizing content from **2023–2025**.
2. Remain **completely unbiased** and analytical.
3. If a party’s stance falls within a range (e.g. 0.5–0.6), round conservatively to the **lower bound**.
4. For each subtopic:
   - Determine the party's position on the subtopic using the definitions provided and assign a numerical value based on the topic scale between -1 and 1.
   - Avoid extreme -1 or 1 unless the party's position is unequivocally aligned with those extremes.
   - If the party's position is unclear or not well-defined, assign a value of **0.0**.

---

Your response needs to be in the following format:
1. Subtopic name 
    - "Justification: <A short one sentence justification for the position.>"
    - "Position: <A numerical value between -1 and 1>"
2. Repeat for each subtopic.
3. End each answer with a python dictionary containing the subtopic names and their corresponding positions.

Note:
- The numerical value for each subtopic position. Ensure that you provide a numerical value for each subtopic position, even if you are unsure.
- Ensure that you include a position. If there is no information online simple set the position to 0.0.

Begin your analysis for the **{party_name}** now.
"""

## Extraction the results

In [8]:
def extract_final_position_dicts(log_path="response_log.jsonl", output_txt="final_positions.txt"):
    with open(log_path, "r") as f:
        lines = f.readlines()

    all_positions = []

    for line in lines:
        data = json.loads(line)
        output = data.get("output", "")

        # Extract the dictionary inside the triple-backtick Python block
        match = re.search(r"```python\s*\n(.*?)```", output, re.DOTALL)
        if not match:
            continue  # skip if no block found

        dict_str = match.group(1).strip()

        try:
            parsed = ast.literal_eval(dict_str)
            for subtopic, position in parsed.items():
                all_positions.append({
                    "subtopic": subtopic,
                    "position": float(position)
                })
        except Exception as e:
            print(f"Failed to parse dictionary in one entry: {e}")
            continue

    # Write to .txt
    with open(output_txt, "w") as f:
        for entry in all_positions:
            f.write(f"subtopic: {entry['subtopic']}\n")
            f.write(f"position: {entry['position']}\n\n")

    print(f"Extracted {len(all_positions)} subtopic positions to {output_txt}")

# Test the API

In [9]:
# iterate through the topics and add responses to the dictionary
LOG_PATH = "liberal_responses.jsonl" 

party_name = "liberal party of Canada"

for key, value in policy_strings.items():
    prompt = generate_prompt(party_name, value)
    output = ask_gpt_with_search(prompt, model="gpt-4.1", log_path=LOG_PATH)
    print(f"Finished {key}")

extract_final_position_dicts(log_path="liberal_responses.jsonl", output_txt="liberal.txt")

  "timestamp": datetime.datetime.utcnow().isoformat(),


Finished Economic
Finished Social1
Finished Social2
Finished Environment
Finished Foreign1
Finished Foreign2
Finished Justice
Finished Science
Finished Goverenence
Extracted 32 subtopic positions to liberal.txt


In [10]:
# iterate through the topics and add responses to the dictionary
LOG_PATH = "conservative_responses.jsonl" 

party_name = "conservative party of Canada"

for key, value in policy_strings.items():
    prompt = generate_prompt(party_name, value)
    output = ask_gpt_with_search(prompt, model="gpt-4.1", log_path=LOG_PATH)
    print(f"Finished {key}")

extract_final_position_dicts(log_path="conservative_responses.jsonl", output_txt="conservative.txt")

  "timestamp": datetime.datetime.utcnow().isoformat(),


Finished Economic
Finished Social1
Finished Social2
Finished Environment
Finished Foreign1
Finished Foreign2
Finished Justice
Finished Science
Finished Goverenence
Extracted 35 subtopic positions to conservative.txt


In [11]:
# iterate through the topics and add responses to the dictionary
LOG_PATH = "ndp_responses.jsonl" 

party_name = "new democratic party of Canada"

for key, value in policy_strings.items():
    prompt = generate_prompt(party_name, value)
    output = ask_gpt_with_search(prompt, model="gpt-4.1", log_path=LOG_PATH)
    print(f"Finished {key}")

extract_final_position_dicts(log_path="ndp_responses.jsonl", output_txt="ndp.txt")

  "timestamp": datetime.datetime.utcnow().isoformat(),


Finished Economic
Finished Social1
Finished Social2
Finished Environment
Finished Foreign1
Finished Foreign2
Finished Justice
Finished Science
Finished Goverenence
Extracted 35 subtopic positions to ndp.txt


# Get the justifications

In [15]:
def extract_justification_blocks(
    log_path="response_log.jsonl",
    justifications_txt="justifications.txt"
):
    with open(log_path, "r") as f:
        lines = f.readlines()

    all_blocks = []

    for line in lines:
        data = json.loads(line)
        output = data.get("output", "")

        # Match everything that looks like: 1. **Subtopic**\n   - Justification: ...\n   - Position: ...
        blocks = re.findall(
            r"\d+\.\s+\*\*(.*?)\*\*\n(.*?)(?=\n\d+\.|\Z)",
            output,
            re.DOTALL
        )

        for subtopic, body in blocks:
            all_blocks.append({
                "subtopic": subtopic.strip(),
                "text": body.strip()
            })

    # Write to text file
    with open(justifications_txt, "w") as f:
        for entry in all_blocks:
            f.write(f"subtopic: {entry['subtopic']}\n")
            f.write(entry['text'] + "\n\n")

    print(f"Exported {len(all_blocks)} justification blocks to {justifications_txt}")


In [16]:
extract_justification_blocks(
    log_path="liberal_responses.jsonl",
    justifications_txt="liberal_justifications.txt"
)

extract_justification_blocks(
    log_path="conservative_responses.jsonl",
    justifications_txt="conservative_justifications.txt"
)

extract_justification_blocks(
    log_path="ndp_responses.jsonl",
    justifications_txt="ndp_justifications.txt"
)

Exported 35 justification blocks to liberal_justifications.txt
Exported 35 justification blocks to conservative_justifications.txt
Exported 35 justification blocks to ndp_justifications.txt


# Turn into dictionaries

In [16]:
def update_party_responses_from_txt(party_responses, txt_path):
    # Read and extract subtopic–position pairs
    with open(txt_path, "r") as f:
        text = f.read()

    # Find all subtopic-position pairs
    matches = re.findall(r"subtopic:\s*(.*?)\nposition:\s*([+-]?[0-9]*\.?[0-9]+)", text)

    for subtopic, pos_str in matches:
        position = float(pos_str)
        found = False

        for category in party_responses:
            if subtopic in party_responses[category]:
                party_responses[category][subtopic]["position"] = position
                found = True
                break

        if not found:
            print(f"[WARNING] Subtopic '{subtopic}' not found in any category.")

    return party_responses



In [17]:
import copy

ndp_positions = update_party_responses_from_txt(copy.deepcopy(party_responses), "ndp.txt")
liberal_positions = update_party_responses_from_txt(copy.deepcopy(party_responses), "liberal.txt")
conservative_positions = update_party_responses_from_txt(copy.deepcopy(party_responses), "conservative.txt")

# Save the Data

In [23]:
import pickle

with open('../data/GPT_liberal.pkl', 'wb') as f:
    pickle.dump(liberal_positions, f)

with open('../data/GPT_ndp.pkl', 'wb') as f:
    pickle.dump(ndp_positions, f)

with open('../data/GPT_conservative.pkl', 'wb') as f:
    pickle.dump(conservative_positions, f)