In [2]:
import requests
import pandas as pd
import xmltodict

In [4]:
r = requests.get('https://www.govinfo.gov/bulkdata/BILLSTATUS/118/hr/BILLSTATUS-118hr184.xml')

In [13]:
data = xmltodict.parse(r.content)

In [55]:
def enforce_schema(json_data):
    conform_data = {
        "bill": {
            "number": json_data.get("bill", {}).get("number"),
            "updateDate": json_data.get("bill", {}).get("updateDate"),
            "type": json_data.get("bill", {}).get("type"),
            "introducedDate": json_data.get("bill", {}).get("introducedDate"),
            "congress": json_data.get("bill", {}).get("congress"),
            "committees": {
                "item": [item_dict(item) for item in ensure_list(get_item_if_exists(json_data.get("bill", {}).get("committees", {})))]
            },
            "actions": {
                "item": [action_dict(action) for action in ensure_list(get_item_if_exists(json_data.get("bill", {}).get("actions", {})))]
            },
            "sponsors": {
                "item": [sponsor_dict(sponsor) for sponsor in ensure_list(get_item_if_exists(json_data.get("bill", {}).get("sponsors", {})))]
            },
            "cosponsors": {
                "count": json_data.get("bill", {}).get("cosponsors", {}).get("count"),
                "item": [cosponsor_dict(cosponsor) for cosponsor in ensure_list(get_item_if_exists(json_data.get("bill", {}).get("cosponsors", {})))]
            },
            "policyArea": {
                "name": json_data.get("bill", {}).get("policyArea", {}).get("name")
            },
            "subjects": {
                "legislativeSubjects": {
                    "item": [subject_dict(subject) for subject in ensure_list(get_item_if_exists(json_data.get("bill", {}).get("subjects", {}).get("legislativeSubjects", {})))]
                }
            },
            "title": json_data.get("bill", {}).get("title"),
            "latestAction": {
                "actionDate": json_data.get("bill", {}).get("latestAction", {}).get("actionDate"),
                "text": json_data.get("bill", {}).get("latestAction", {}).get("text")
            },
            "most_recent_text": get_most_recent_text(json_data.get("bill", {}).get('summaries', {}).get('summary', {}))
        }
    }

    return conform_data


def item_dict(item):
    if isinstance(item, dict):
        return {
            "name": item.get("name"),
            "chamber": item.get("chamber"),
            "type": item.get("type")
        }
    return {}


def action_dict(action):
    if isinstance(action, dict):
        return {
            "actionDate": action.get("actionDate"),
            "text": action.get("text"),
            "type": action.get("type"),
            "actionCode": action.get("actionCode"),
            "recordedVotes": [vote_dict(vote) for vote in ensure_list(action.get("recordedVotes", {}).get("recordedVote", []))]
        }
    return {}


def sponsor_dict(sponsor):
    if isinstance(sponsor, dict):
        return {
            "bioguideId": sponsor.get("bioguideId"),
            "fullName": sponsor.get("fullName"),
            "firstName": sponsor.get("firstName"),
            "lastName": sponsor.get("lastName")
        }
    return {}


def cosponsor_dict(cosponsor):
    if isinstance(cosponsor, dict):
        return {
            "bioguideId": cosponsor.get("bioguideId")
        }
    return {}


def subject_dict(subject):
    if isinstance(subject, dict):
        return {
            "name": subject.get("name")
        }
    return {}


def vote_dict(vote):
    if isinstance(vote, dict):
        return {
            "rollNumber": vote.get("rollNumber"),
            "url": vote.get("url"),
            "chamber": vote.get("chamber"),
            "congress": vote.get("congress"),
            "date": vote.get("date"),
            "sessionNumber": vote.get("sessionNumber")
        }
    return {}

def get_most_recent_text(item):
    summary_list = ensure_list(item)
    filtered_summary_list = [summary for summary in summary_list if 'text' in summary]

    if not filtered_summary_list:
        return None
    
    most_recent_summary = max(filtered_summary_list, key=lambda x: datetime.strptime(x['actionDate'], '%Y-%m-%d'))

    most_recent_text = most_recent_summary['text'][:1000]

    formatted_most_recent_text = BeautifulSoup(most_recent_text, 'html.parser').prettify()
    
    return formatted_most_recent_text

def ensure_list(item):
    if isinstance(item, list):
        return item
    elif item:
        return [item]
    return []

def get_item_if_exists(input_data):
    if input_data:
        return input_data.get("item", [])
    else:
        return []

In [56]:
test = ensure_list(get_item_if_exists(data['billStatus'].get("bill", {}).get("sponsors", {})))

In [57]:
[sponsor for sponsor in ensure_list(get_item_if_exists(data['billStatus'].get("bill", {}).get("sponsors", {})))]

[{'bioguideId': 'M000871',
  'fullName': 'Rep. Mann, Tracey [R-KS-1]',
  'firstName': 'Tracey',
  'lastName': 'Mann',
  'party': 'R',
  'state': 'KS',
  'district': '1',
  'isByRequest': 'N'}]

In [58]:
[sponsor for sponsor in ensure_list(get_item_if_exists(data['billStatus'].get("bill", {}).get("sponsors", {})))]

[{'bioguideId': 'M000871',
  'fullName': 'Rep. Mann, Tracey [R-KS-1]',
  'firstName': 'Tracey',
  'lastName': 'Mann',
  'party': 'R',
  'state': 'KS',
  'district': '1',
  'isByRequest': 'N'}]

In [59]:
[sponsor_dict(sponsor) for sponsor in ensure_list(get_item_if_exists(data['billStatus'].get("bill", {}).get("sponsors", {})))]

[{'bioguideId': 'M000871',
  'fullName': 'Rep. Mann, Tracey [R-KS-1]',
  'firstName': 'Tracey',
  'lastName': 'Mann'}]

In [44]:
[sponsor.get('bioguideID') for sponsor in ensure_list(get_item_if_exists(data['billStatus'].get("bill", {}).get("sponsors", {})))]

[None]

In [2]:
def sen_id_dict(item):
    if isinstance(item, dict):
        return {
            'first_name': item.get('full_name').get('first_name'),
            'last_name': item.get('full_name').get('last_name'),
            'state': item.get('state'),
            'bioguideID': item.get('bioguide'),
            'lisid':item.get('lisid')
        }
    return {}

In [12]:
# import os
# os.environ.get("congress_api_key")

In [13]:
import json

In [7]:
start_date  = "2023-01-01T00:00:00Z"
end_date    = "2024-06-30T00:00:00Z"
limit       = 250
api_key     = 'WNue8kCDCOIlewAsULgnN8j6SqSgAZjE2sYbPsBb'
offset      = 0

path = f'https://api.congress.gov/v3/member?format=json&fromDateTime={start_date}&toDateTime={end_date}&offset={offset}&limit={limit}&api_key={api_key}'

In [14]:
response = requests.get(path).json()

In [15]:
response

{'members': [{'bioguideId': 'F000476',
   'depiction': {'attribution': 'Image courtesy of the Member',
    'imageUrl': 'https://www.congress.gov/img/member/f000476_200.jpg'},
   'district': 10,
   'name': 'Frost, Maxwell',
   'partyName': 'Democratic',
   'state': 'Florida',
   'terms': {'item': [{'chamber': 'House of Representatives',
      'startYear': 2023}]},
   'updateDate': '2024-06-28T18:36:16Z',
   'url': 'https://api.congress.gov/v3/member/F000476?format=json'},
  {'bioguideId': 'F000480',
   'district': 20,
   'name': 'Fong, Vince',
   'partyName': 'Republican',
   'state': 'California',
   'terms': {'item': [{'chamber': 'House of Representatives',
      'startYear': 2024}]},
   'updateDate': '2024-06-27T18:40:14Z',
   'url': 'https://api.congress.gov/v3/member/F000480?format=json'},
  {'bioguideId': 'J000292',
   'depiction': {'attribution': 'Image courtesy of the Member',
    'imageUrl': 'https://www.congress.gov/img/member/j000292_200.jpg'},
   'district': 6,
   'name': 'J

In [51]:
def get_senate_id_lookup():
    response = requests.get('https://www.senate.gov/about/senator-lookup.xml')
    sen_dict = xmltodict.parse(response.content)
    sen_id_list = [sen_id_dict(sen) for sen in sen_dict['senators']['senator']]
    
    return sen_id_list

In [53]:
# get_senate_id_lookup()

In [54]:
# response = requests.get('https://www.senate.gov/about/senator-lookup.xml')
# sen_dict = xmltodict.parse(response.content)

In [55]:
# sen_df = pd.DataFrame(sen_dict['senators']['senator'])

In [49]:
id_dict(sen_dict['senators']['senator'][0])

{'first_name': 'Joseph',
 'last_name': 'Abbott',
 'state': 'NC',
 'bioguideID': 'A000006',
 'lisid': None}

In [31]:
sen_df.publication_name[0].keys()

dict_keys(['first_name', 'middle_name', 'last_name'])

In [32]:
sen_df.head()

Unnamed: 0,full_name,publication_name,party,state,bioguide,service_dates,death_date,lisid,featured_bio,@current
0,"{'first_name': 'Joseph', 'middle_name': 'Carte...","{'first_name': 'Joseph', 'middle_name': 'C.', ...",R,NC,A000006,"{'service_date': {'begin_date': {'@day': '14',...","{'@day': '08', '@month': '10', '@year': '1881'}",,,
1,"{'first_name': 'Hazel', 'middle_name': 'Hempel...","{'first_name': 'Hazel', 'middle_name': 'H.', '...",R,NE,A000010,"{'service_date': {'begin_date': {'@day': '08',...","{'@day': '30', '@month': '07', '@year': '1966'}",,,
2,"{'first_name': 'James', 'middle_name': None, '...","{'first_name': None, 'middle_name': None, 'las...",R,SD,A000009,"{'service_date': {'begin_date': {'@day': '03',...","{'@day': '16', '@month': '05', '@year': '2012'}",S145,,
3,"{'first_name': 'James', 'middle_name': 'George...","{'first_name': 'James', 'middle_name': 'G.', '...",D,SD,A000017,"{'service_date': {'begin_date': {'@day': '03',...","{'@day': '24', '@month': '02', '@year': '2023'}",S002,,
4,"{'first_name': 'Spencer', 'middle_name': None,...","{'first_name': None, 'middle_name': None, 'las...",R,MI,A000355,"{'service_date': {'begin_date': {'@day': '03',...","{'@day': '', '@month': '', '@year': ''}",S238,,
