# Political Chatbot Analysis: Canadian Bill Importance Evaluation

This notebook performs an analysis of Canadian bills using a **text-based** approach. The goal is to evaluate the importance of each bill using **LLMs** and divide the text into smaller chunks to assess their significance in deciding whether to pass the bill.

In [12]:
import json
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import openai
from dotenv import load_dotenv
import os
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from langchain.output_parsers import EnumOutputParser
from enum import Enum

# load the .env file
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = openai_api_key

# load the bill data
with open("detailed_bills_with_full_text.json", "r", encoding="utf-8") as file:
    bills_data = json.load(file)

full_text = bills_data[0]['full_text'] 

# split bill into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,  
    length_function=len,  
    is_separator_regex=False,  
)

chunks = text_splitter.split_text(full_text)


### Qualitative level using EnumOutputParser

In [27]:
class ImportanceLevel(str, Enum):
    LOW = "LOW"
    MEDIUM = "MEDIUM"
    HIGH = "HIGH"
parser = EnumOutputParser(enum=ImportanceLevel)

llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.2)

prompt_template = """
You are a legislative assistant tasked with analyzing sections of a bill. Your goal is to assess the importance of the given section in deciding whether the bill should be passed.

Here is the section of the bill:
{text}

Classify the importance as either LOW, MEDIUM, or HIGH. Respond with only one of these words.
"""

prompt = PromptTemplate(input_variables=["text"], template=prompt_template)

def analyze_importance_level(chunk):
    formatted_prompt = prompt.format(text=chunk)
    result = llm.invoke(formatted_prompt).content
    importance_level = parser.parse(result.strip())
    return importance_level

### Rank one chunk (for testing)

In [28]:
importance_level = analyze_importance_level(chunks[18])
print(f"Chunk Text Preview: {chunks[0][:100]}...")
print(f"Importance Level: {importance_level}")

Chunk Text Preview: ENGLISH
Cover
Cover
Summary
Summary
BILL C-79
BILL C-79
First Session, Forty-fourth Parliament,
70-7...
Importance Level: MEDIUM


### Quantitative score using PydanticOutputParser

In [29]:
# Pydantic model for the response
class ImportanceScore(BaseModel):
    score: float = Field(..., ge=0, le=100, description="Quantitative importance score as a percentage (0 to 100).")

# PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=ImportanceScore)

# initialize LLM
llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.2)

prompt_template = """
You are a legislative assistant tasked with analyzing sections of a bill. Your goal is to assess the importance of the given section in deciding whether the bill should be passed.

Here is the section of the bill:
{text}

Provide a quantitative importance score as a percentage (0% to 100%) where:
- 0% means the section has no importance in deciding whether the bill should pass.
- 100% means the section is crucial for the bill's decision.
Respond with only the numeric percentage (no text).

Respond in valid JSON format as follows:
{{ "score": <percentage_value> }}
"""

prompt = PromptTemplate(input_variables=["text"], template=prompt_template)

def analyze_importance(chunk):
    """Analyze the importance of a bill chunk using LLM."""
    formatted_prompt = prompt.format(text=chunk)
    result = llm.invoke(formatted_prompt).content.strip()
    # importance = parser.parse(result.strip())
    return parser.parse(result).score


### Quantify one chunk (for testing)

In [30]:
importance = analyze_importance(chunks[18])
print(f"Chunk Text Preview: {chunks[0][:100]}...")
print(f"Importance Score: {importance}%")

Chunk Text Preview: ENGLISH
Cover
Cover
Summary
Summary
BILL C-79
BILL C-79
First Session, Forty-fourth Parliament,
70-7...
Importance Score: 65.0%


### Quantify all chunks

In [None]:
importances = [analyze_importance(chunk) for chunk in chunks]

for i, (chunk, importance) in enumerate(zip(chunks, importances)):
    print(f"\nChunk {i+1}:")
    print(f"Text preview: {chunk[:100]}...")
    print(f"Importance: {importance}")