In [11]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [3]:
from src.utils import convert_dates

In [5]:
from src.database import SessionLocal
from src.models.committee import Committee


In [6]:
with SessionLocal() as session:
    committees = session.query(Committee).all()


In [20]:
df = pd.DataFrame([{
    'id': c.id,
    'name': c.name,
    'subject_matter': c.subject_matter,
    'url': c.url,
    'created_at': c.created_at,
    'updated_at': c.updated_at
} for c in committees])

df.head()


Unnamed: 0,id,name,subject_matter,url,created_at,updated_at
0,330,Aging,to be added later,https://www.capitoltrades.com/committees/spag,2024-12-24 18:14:03.498186+00:00,2024-12-24 18:21:07.653077+00:00
1,331,"Agriculture, Nutrition & Forestry",to be added later,https://www.capitoltrades.com/committees/ssaf,2024-12-24 18:14:03.501257+00:00,2024-12-24 18:21:07.657043+00:00
2,358,Energy & Commerce,to be added later,https://www.capitoltrades.com/committees/hsif,2024-12-24 18:14:03.602298+00:00,2024-12-24 18:21:07.731693+00:00
3,350,Veteran’s Affairs,to be added later,https://www.capitoltrades.com/committees/hsvr,2024-12-24 18:14:03.548081+00:00,2024-12-24 18:21:46.325988+00:00
4,334,"Banking, Housing & Urban Affairs",to be added later,https://www.capitoltrades.com/committees/ssbk,2024-12-24 18:14:03.508977+00:00,2024-12-24 18:21:07.665801+00:00


In [10]:
resp = requests.get('https://www.capitoltrades.com/committees/spag')

In [144]:

def extract_trade_table_with_links(base_url):
    response = requests.get(base_url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table')  # Modify selector if needed
    import pdb;pdb.set_trace()
    title = soup.find('h1').get_text(strip=True)
    df = pd.read_html(str(table))[0]

    detail_links = []
    tickers = []
    currencies = []
    names = []
    parties = []
    chambers = []
    states = []
    issuers = []

    for row in table.find_all('tr')[1:]:  # Skip header
        link_tag = row.find('a', href=True)
        if link_tag:
            full_url = urljoin(base_url, link_tag['href'])
            detail_links.append(full_url)
        else:
            detail_links.append(None)

        issuer_ticker_span = row.find('span', class_='q-field issuer-ticker')
        if issuer_ticker_span:
            issuer_ticker_text = issuer_ticker_span.get_text(strip=True)
            parts = issuer_ticker_text.split(':')
            if len(parts) == 2:
                ticker, currency = parts
            else:
                ticker, currency = None, None
        else:
            ticker, currency = None, None
        party, chamber, state = split_name_string(row.find('div', class_='politician-info').get_text(strip=True))
        
        names.append(row.find('h2', class_='politician-name').get_text(strip=True))
        issuers.append( row.find(class_ = 'issuer-name').get_text(strip=True))
        parties.append(party)
        chambers.append(chamber)    
        states.append(state)
        tickers.append(ticker)
        currencies.append(currency)

    df['party'], df['chamber'], df['state'] = parties, chambers, states
    df['issuer'] = issuers
    df['name'] = names
    df['detail_link'] = detail_links
    df['ticker'] = tickers
    df['currency'] = currencies
    df = convert_dates(df, ['Published', 'Traded'])  # Convert date columns to datetime    

    # Convert all column names to lower case
    df.columns = df.columns.str.lower()

    return df[['name', 'party', 'chamber', 'state', 'issuer', 'ticker', 'currency', 'published', 'traded', 'type', 'size', 'detail_link']]

In [145]:
tradedf = extract_trade_table_with_links('https://www.capitoltrades.com/committees/ssbk')
tradedf.head()

  df = pd.read_html(str(table))[0]


Unnamed: 0,name,party,chamber,state,issuer,ticker,currency,published,traded,type,size,detail_link
0,John Fetterman,Democrat,Senate,PA,IKON OFFICE SOLUTIONS,,,2024-12-03,2024-11-21,buy,1K–15K,https://www.capitoltrades.com/politicians/F000479
1,Bill Hagerty,Republican,Senate,TN,Keysight Technologies Inc,KEYS,US,2024-11-25,2024-10-30,sell,50K–100K,https://www.capitoltrades.com/politicians/H000601
2,Tina Smith,Democrat,Senate,MN,Tactile Systems Technology Inc,TCMD,US,2024-11-21,2024-11-12,buy,50K–100K,https://www.capitoltrades.com/politicians/S001203
3,Mark Warner,Democrat,Senate,VA,CITY OF ALEXANDRIA VIRGINIA,,,2024-11-12,2024-10-08,buy,500K–1M,https://www.capitoltrades.com/politicians/W000805
4,John Fetterman,Democrat,Senate,PA,V.F. Corp,VFC,US,2024-11-12,2024-10-24,buy,1K–15K,https://www.capitoltrades.com/politicians/F000479


In [146]:
tradedf

Unnamed: 0,name,party,chamber,state,issuer,ticker,currency,published,traded,type,size,detail_link
0,John Fetterman,Democrat,Senate,PA,IKON OFFICE SOLUTIONS,,,2024-12-03,2024-11-21,buy,1K–15K,https://www.capitoltrades.com/politicians/F000479
1,Bill Hagerty,Republican,Senate,TN,Keysight Technologies Inc,KEYS,US,2024-11-25,2024-10-30,sell,50K–100K,https://www.capitoltrades.com/politicians/H000601
2,Tina Smith,Democrat,Senate,MN,Tactile Systems Technology Inc,TCMD,US,2024-11-21,2024-11-12,buy,50K–100K,https://www.capitoltrades.com/politicians/S001203
3,Mark Warner,Democrat,Senate,VA,CITY OF ALEXANDRIA VIRGINIA,,,2024-11-12,2024-10-08,buy,500K–1M,https://www.capitoltrades.com/politicians/W000805
4,John Fetterman,Democrat,Senate,PA,V.F. Corp,VFC,US,2024-11-12,2024-10-24,buy,1K–15K,https://www.capitoltrades.com/politicians/F000479
5,John Fetterman,Democrat,Senate,PA,ENSTAR FINANCE LLC,,,2024-11-12,2024-10-17,buy,1K–15K,https://www.capitoltrades.com/politicians/F000479
6,John Fetterman,Democrat,Senate,PA,Kyndryl Holdings Inc,KD,US,2024-11-12,2024-10-24,sell,1K–15K,https://www.capitoltrades.com/politicians/F000479
7,John Fetterman,Democrat,Senate,PA,AUTODESK Inc,ADSK,US,2024-11-12,2024-10-17,sell,1K–15K,https://www.capitoltrades.com/politicians/F000479
8,John Fetterman,Democrat,Senate,PA,Yamana Gold Inc,AUY,US,2024-11-12,2024-10-17,sell,1K–15K,https://www.capitoltrades.com/politicians/F000479
9,John Fetterman,Democrat,Senate,PA,Kyndryl Holdings Inc,KD,US,2024-11-12,2024-10-25,sell,1K–15K,https://www.capitoltrades.com/politicians/F000479


In [148]:
dfs = df.url.map(extract_trade_table_with_links)

  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df[column] = pd.to_datetime(df[column], errors='coerce')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df[column] = pd.to_datetime(df[column], errors='coerce')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df[column] = pd.to_datetime(df[column], 

In [152]:
dfs[2]

Unnamed: 0,name,party,chamber,state,issuer,ticker,currency,published,traded,type,size,detail_link
0,Scott Peters,Democrat,House,CA,STEPSTONE TACTICAL GROWTH FUND III LP,,,2024-12-17,2024-11-18,buy,1K–15K,https://www.capitoltrades.com/politicians/P000608
1,Scott Peters,Democrat,House,CA,VISALIA CALIFORNIA,,,2024-12-17,2024-11-07,buy,500K–1M,https://www.capitoltrades.com/politicians/P000608
2,Neal Dunn,Republican,House,FL,Cadence Bank,CADE,US,2024-12-17,2024-11-06,buy,1K–15K,https://www.capitoltrades.com/politicians/D000628
3,Neal Dunn,Republican,House,FL,Capital One Financial Corp,COF,US,2024-12-17,2024-11-06,buy,1K–15K,https://www.capitoltrades.com/politicians/D000628
4,Neal Dunn,Republican,House,FL,JPMorgan Chase & Co,JPM,US,2024-12-17,2024-11-06,buy,1K–15K,https://www.capitoltrades.com/politicians/D000628
5,Neal Dunn,Republican,House,FL,Regions Financial Corp,RF,US,2024-12-17,2024-11-06,buy,1K–15K,https://www.capitoltrades.com/politicians/D000628
6,Neal Dunn,Republican,House,FL,U.S. Bancorp,USB,US,2024-12-17,2024-11-06,buy,1K–15K,https://www.capitoltrades.com/politicians/D000628
7,Scott Peters,Democrat,House,CA,COUNTY OF ORANGE CALIFORNIA,,,2024-11-18,2024-10-15,sell,50K–100K,https://www.capitoltrades.com/politicians/P000608
8,Scott Peters,Democrat,House,CA,PUBLIC UTILITIES COMMISSION OF CITY AND COUNTY...,,,2024-11-18,2024-10-01,sell,500K–1M,https://www.capitoltrades.com/politicians/P000608
9,Rick Allen,Republican,House,GA,US TREASURY BILLS,,,2024-11-15,2024-10-09,buy,100K–250K,https://www.capitoltrades.com/politicians/A000372


In [128]:
df.head()

Unnamed: 0,id,name,subject_matter,url,created_at,updated_at
0,330,Aging,to be added later,https://www.capitoltrades.com/committees/spag,2024-12-24 18:14:03.498186+00:00,2024-12-24 18:21:07.653077+00:00
1,331,"Agriculture, Nutrition & Forestry",to be added later,https://www.capitoltrades.com/committees/ssaf,2024-12-24 18:14:03.501257+00:00,2024-12-24 18:21:07.657043+00:00
2,358,Energy & Commerce,to be added later,https://www.capitoltrades.com/committees/hsif,2024-12-24 18:14:03.602298+00:00,2024-12-24 18:21:07.731693+00:00
3,350,Veteran’s Affairs,to be added later,https://www.capitoltrades.com/committees/hsvr,2024-12-24 18:14:03.548081+00:00,2024-12-24 18:21:46.325988+00:00
4,334,"Banking, Housing & Urban Affairs",to be added later,https://www.capitoltrades.com/committees/ssbk,2024-12-24 18:14:03.508977+00:00,2024-12-24 18:21:07.665801+00:00


In [23]:
url = df.iloc[0]['url']

In [45]:
df_trades

Unnamed: 0,politician,traded issuer,published,traded,type,size,unnamed: 6,detail_link,ticker,currency
0,Rick ScottRepublicanSenateFL,METROPOLITAN WATER RECLAMATION DISTRICT OF GRE...,2024-12-20,2024-12-04,sell,100K–250K,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
1,Rick ScottRepublicanSenateFL,SOUTH CAROLINA STATE PORTS AUTHORITYN/A,2024-12-20,2024-12-04,sell,500K–1M,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
2,Rick ScottRepublicanSenateFL,TAMPA FLORIDA SOLID WASTE SYSTEMN/A,2024-12-20,2024-12-04,buy,100K–250K,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
3,Rick ScottRepublicanSenateFL,TAMPA FLORIDA SOLID WASTE SYSTEMN/A,2024-12-20,2024-12-04,buy,500K–1M,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
4,Rick ScottRepublicanSenateFL,TAMPA FLORIDA SOLID WASTE SYSTEMN/A,2024-12-20,2024-12-04,buy,250K–500K,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
5,Rick ScottRepublicanSenateFL,EL PASO WATER & SEWERN/A,2024-12-20,2024-11-21,sell,500K–1M,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
6,Rick ScottRepublicanSenateFL,ARIZONA BOARD OF REGENTSN/A,2024-12-20,2024-11-21,sell,250K–500K,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
7,Rick ScottRepublicanSenateFL,ALABAMA HIGHWAY FINANCE AUTHORITYN/A,2024-12-20,2024-11-21,sell,500K–1M,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
8,Rick ScottRepublicanSenateFL,WASHINGTON KING COUNTYN/A,2024-12-20,2024-11-21,sell,250K–500K,Goto trade detail page.,https://www.capitoltrades.com/politicians/S001217,,
9,Richard BlumenthalDemocratSenateCT,BRAZIL GOVERNMENT BONDSN/A,2024-12-11,2024-11-18,sell,250K–500K,Goto trade detail page.,https://www.capitoltrades.com/politicians/B001277,,


In [26]:
sample_string = 'Rick ScottRepublicanSenateFL'

In [89]:
#check for republican or democrat and split accordingly  also split for state and for senate or house
def split_name_string(s):
    if 'Republican' in s:
        party = 'Republican'
    elif 'Democrat' in s:
        party = 'Democrat'
    else:
        party = None

    if 'Senate' in s:
        chamber = 'Senate'
    elif 'House' in s:
        chamber = 'House'
    else:
        chamber = None

    state = s[-2:]
    return party, chamber, state



In [41]:
r = split_name_string(sample_string)

In [42]:
r

('Rick Scott', 'Republican', 'Senate', 'FL')