In [53]:
from openai import OpenAI
import os
import pandas as pd
import sqlite3
from dotenv import load_dotenv
from tqdm.notebook import tqdm
from groq import Groq


In [58]:
load_dotenv()

db_path = "data/bill_data.db"

openai_model = "gpt-3.5-turbo-0125"
max_length = 256

openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

groq_model="mixtral-8x7b-32768"

with open('summarize_prompt.txt', 'r') as file:
    prompt_instructions = file.read()

In [59]:
def create_prompt(instructions, query):
    prompt = [
            {
                "role": "system",
                "content": instructions
            },
            {
                "role": "user",
                "content": query
            }
        ]
    return prompt

def query_openai(client, instructions, query):
    prompt = create_prompt(instructions, query)

    response = client.chat.completions.create(
        model=openai_model,
        messages=prompt,
        max_tokens=max_length,
    )

    return response.choices[0].message.content

def query_groq(client, instructions, query):
    prompt = create_prompt(instructions, query)

    response = client.chat.completions.create(
        model=groq_model,
        messages=prompt
    )

    return response.choices[0].message.content

In [27]:
def get_bill_data(limit=None):
    conn = sqlite3.connect(db_path)
    
    query = '''
        SELECT 
            b.bill_congress, 
            b.bill_type, 
            b.bill_number,
            b.bill_version,
            b.policy_area,
            b.subjects,
            bt.readable_text
        FROM bills b
        LEFT JOIN bill_text bt ON b.bill_congress = bt.bill_congress 
                               AND b.bill_number = bt.bill_number
    '''
    
    if limit is not None and limit > 0:
        query += f'\nLIMIT {limit}'
    
    df = pd.read_sql_query(query, conn)
    
    conn.close()
    
    # Extract bill version suffixes from the readable_text column
    df['bill_version_suffix'] = df['readable_text'].str.extract(r'(\b\w+\b)(?=:)', expand=False)
    
    # Select the latest version of each bill based on the bill_version_suffix
    df = df.loc[df.groupby(['bill_congress', 'bill_type', 'bill_number'])['bill_version_suffix'].idxmax()]
    
    return df

pd.reset_option('display.width')
pd.reset_option('display.max_colwidth')
bill_df = get_bill_data(limit=10)
print(bill_df.head())

   bill_congress bill_type bill_number bill_version            policy_area  \
1            113   HCONRES    HCONRES1        3.0.0               Congress   
2            113   HCONRES   HCONRES10        3.0.0              Education   
5            113   HCONRES  HCONRES100        3.0.0               Congress   
7            113   HCONRES  HCONRES101        3.0.0  International Affairs   
8            113   HCONRES  HCONRES102        3.0.0                 Health   

                                            subjects  \
1          Congressional operations and organization   
2  Assault and harassment offenses,Commemorative ...   
5  Commemorative events and holidays,Human rights...   
7  Asia,Congressional oversight,Detention of pers...   
8  Child health,Child safety and welfare,Commemor...   

                                       readable_text bill_version_suffix  
1  113 HCON 1 EH: Regarding consent to assemble o...                  EH  
2  113 HCON 10 IH: Supporting the goals and 

In [57]:
tqdm.pandas(desc="Generating summaries...")
bill_df['summary'] = bill_df['readable_text'].progress_apply(lambda x: query_groq(groq_client, prompt_instructions, x))
print(bill_df.head())

Generating summaries...:   0%|          | 0/6 [00:00<?, ?it/s]

InternalServerError: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>api.groq.com | 502: Bad gateway</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrapper">
    <div id="cf-error-details" class="p-0">
        <header class="mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8">
            <h1 class="inline-block sm:block sm:mb-2 font-light text-60 lg:text-4xl text-black-dark leading-tight mr-2">
              <span class="inline-block">Bad gateway</span>
              <span class="code-label">Error code 502</span>
            </h1>
            <div>
               Visit <a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.groq.com" target="_blank" rel="noopener noreferrer">cloudflare.com</a> for more information.
            </div>
            <div class="mt-3">2024-03-25 08:31:10 UTC</div>
        </header>
        <div class="my-8 bg-gradient-gray">
            <div class="w-240 lg:w-full mx-auto">
                <div class="clearfix md:px-8">
                  
<div id="cf-browser-status" class=" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center">
  <div class="relative mb-10 md:m-0">
    
    <span class="cf-icon-browser block md:hidden h-20 bg-center bg-no-repeat"></span>
    <span class="cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4"></span>
    
  </div>
  <span class="md:block w-full truncate">You</span>
  <h3 class="md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3">
    
    Browser
    
  </h3>
  <span class="leading-1.3 text-2xl text-green-success">Working</span>
</div>

<div id="cf-cloudflare-status" class=" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center">
  <div class="relative mb-10 md:m-0">
    <a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.groq.com" target="_blank" rel="noopener noreferrer">
    <span class="cf-icon-cloud block md:hidden h-20 bg-center bg-no-repeat"></span>
    <span class="cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4"></span>
    </a>
  </div>
  <span class="md:block w-full truncate">Boston</span>
  <h3 class="md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3">
    <a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.groq.com" target="_blank" rel="noopener noreferrer">
    Cloudflare
    </a>
  </h3>
  <span class="leading-1.3 text-2xl text-green-success">Working</span>
</div>

<div id="cf-host-status" class="cf-error-source relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center">
  <div class="relative mb-10 md:m-0">
    
    <span class="cf-icon-server block md:hidden h-20 bg-center bg-no-repeat"></span>
    <span class="cf-icon-error w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4"></span>
    
  </div>
  <span class="md:block w-full truncate">api.groq.com</span>
  <h3 class="md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3">
    
    Host
    
  </h3>
  <span class="leading-1.3 text-2xl text-red-error">Error</span>
</div>

                </div>
            </div>
        </div>

        <div class="w-240 lg:w-full mx-auto mb-8 lg:px-8">
            <div class="clearfix">
                <div class="w-1/2 md:w-full float-left pr-6 md:pb-10 md:pr-0 leading-relaxed">
                    <h2 class="text-3xl font-normal leading-1.3 mb-4">What happened?</h2>
                    <p>The web server reported a bad gateway error.</p>
                </div>
                <div class="w-1/2 md:w-full float-left leading-relaxed">
                    <h2 class="text-3xl font-normal leading-1.3 mb-4">What can I do?</h2>
                    <p class="mb-6">Please try again in a few minutes.</p>
                </div>
            </div>
        </div>

        <div class="cf-error-footer cf-wrapper w-240 lg:w-full py-10 sm:py-4 sm:px-8 mx-auto text-center sm:text-left border-solid border-0 border-t border-gray-300">
  <p class="text-13">
    <span class="cf-footer-item sm:block sm:mb-1">Cloudflare Ray ID: <strong class="font-semibold">869d8a2e1f548fc9</strong></span>
    <span class="cf-footer-separator sm:hidden">&bull;</span>
    <span id="cf-footer-item-ip" class="cf-footer-item hidden sm:block sm:mb-1">
      Your IP:
      <button type="button" id="cf-footer-ip-reveal" class="cf-footer-ip-reveal-btn">Click to reveal</button>
      <span class="hidden" id="cf-footer-ip">38.42.206.96</span>
      <span class="cf-footer-separator sm:hidden">&bull;</span>
    </span>
    <span class="cf-footer-item sm:block sm:mb-1"><span>Performance &amp; security by</span> <a rel="noopener noreferrer" href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.groq.com" id="brand_link" target="_blank">Cloudflare</a></span>
    
  </p>
  <script>(function(){function d(){var b=a.getElementById("cf-footer-item-ip"),c=a.getElementById("cf-footer-ip-reveal");b&&"classList"in b&&(b.classList.remove("hidden"),c.addEventListener("click",function(){c.classList.add("hidden");a.getElementById("cf-footer-ip").classList.remove("hidden")}))}var a=document;document.addEventListener&&a.addEventListener("DOMContentLoaded",d)})();</script>
</div><!-- /.error-footer -->


    </div>
</div>
</body>
</html>

In [46]:
pd.set_option('display.max_colwidth', None)
print(bill_df['summary'])
pd.reset_option('display.max_colwidth')

1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              The purpose of House Concurrent Resolution 1 (H. CON. RES. 1) is to address the issue of consent to assemble outside the seat of government as articulated in the first session of the 113th Congress. This resolution sp