In [None]:
import re
import json
import openai
import requests

import streamlit as st

from bs4 import BeautifulSoup

from utils import RenderJSON

In [None]:
def html_to_structured_text(html_string):
    # Create a BeautifulSoup object from the HTML string
    soup = BeautifulSoup(html_string, 'html.parser')

    # Extract the text and create a structured text string
    structured_text = ''
    for tag in soup.find_all():
        tag_name = tag.name
        if tag_name == 'p':
            structured_text += f'\n\n{tag.text}\n\n'
        elif tag_name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            structured_text += f'\n\n{tag.text}\n{"=" * len(tag.text)}\n\n'
        elif tag_name == 'ul':
            for li in tag.find_all('li'):
                structured_text += f'* {li.text}\n'
        elif tag_name == 'ol':
            for li in tag.find_all('li'):
                index = li.find_previous_siblings('li')
                index = str(len(index) + 1)
                structured_text += f'{index}. {li.text}\n'

    if structured_text == '':
        return html_string
        
    return structured_text

def validate_bill_id(bill_id=''):
    '''Takes ProPublica bill_id ex "sres21-118'''
    if bill_id is None:
        return False

    pattern = r'^(hr|s|hjres|sjres|hconres|sconres|hres|sres)(\d+)-(\d+)$'
    return bool(re.match(pattern, bill_id))

def get_bill_summaries_official(bill_id):
    '''Takes ProPublica bill_id ex "sres21-118"
    https://api.congress.gov/#/bill/bill_summaries
    '''
    if not validate_bill_id(bill_id):
        return None

    CONGRESS_API_KEY = st.secrets["CONGRESS_API_KEY"]
    # TODO: select a time for textVersion, be it specific or min/max

    bill, congress = bill_id.split('-')
    bill_type, bill_n = re.split('(\d+)', bill)[:2]

    url = f"https://api.congress.gov/v3/bill/{congress}/{bill_type}/{bill_n}/summaries?api_key={CONGRESS_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        json_data = response.json()
    else:
        print("Error retrieving bill summaries. Status code:", response.status_code)
    return json_data

In [None]:
with open("../data/house_all_voting_positions.json", 'r') as f:
    house_all_voting_positions = json.load(f)

# RenderJSON(house_all_voting_positions)

In [None]:
selected_rep_id = "A000370"

voting_position = house_all_voting_positions[selected_rep_id]["Passed"][0]
# RenderJSON(voting_position)

In [None]:
bill_summary = get_bill_summaries_official(voting_position['bill_id'])
# RenderJSON(bill_summary)

# get the version of the summary for the bill on the date of the vote.
summary_on_date = [ bs for bs in bill_summary['summaries'] if bs['actionDate'] == voting_position['date']]
# RenderJSON(summary_on_date)

In [None]:
bill_sum_text_pretty = html_to_structured_text(summary_on_date[0]['text'])
print(bill_sum_text_pretty)

# DOING i was here

In [None]:
# Calculate how many tokens the summary is worth
# https://platform.openai.com/docs/guides/embeddings/how-can-i-tell-how-many-tokens-a-string-has-before-i-embed-it
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
import tiktoken

In [None]:
# # TODO: SENATE
# with open("house_all_voting_positions_2021-01-03_2023-03-07.json", 'r') as f:
#     house_all_voting_positions = json.load(f)

## OpenAI API
- https://platform.openai.com/docs/libraries/python-bindings
- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
- [How_to_count_tokens_with_tiktoken.ipynb](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)

In [None]:
openai.api_key = st.secrets["OPENAI_API_KEY"]

# list models
models = openai.Model.list()
# RenderJSON(models)
for m in models['data']:
    print(m['id'])

In [None]:
model_id = 'gpt-3.5-turbo-0301'

# response = openai.Completion.create(model="gpt-3.5-turbo", prompt="Say this is a test", temperature=0, max_tokens=7)
# https://github.com/openai/openai-python
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world!"}])
print(completion.choices[0].message.content)

In [None]:
openai.Model

In [None]:
help(openai)