# Compiling Senator and House of Representatives Stock Trading Data


In [1]:
import pandas as pd
import yfinance as yf

## Senate Stocks

In [2]:
senator_trades = pd.read_csv("data/cleaned/all_transactions_senate.csv")
senator_trades = senator_trades.drop("Unnamed: 0", axis=1)
senator_trades.head()

Unnamed: 0,transaction_date,owner,ticker,type,amount,comment,senator,transaction_year,transaction_month,transaction_day,amount_lower,amount_upper
0,2021-08-18,Joint,SSYS,Purchase,"$1,001 - $15,000",--,Thomas H Tuberville,2021,8,18,1001,15000
1,2021-08-12,Joint,SSBK,Purchase,"$50,001 - $100,000",--,Thomas H Tuberville,2021,8,12,50001,100000
2,2021-08-23,Spouse,LMRK,Sale (Full),"$1,001 - $15,000",--,Thomas R Carper,2021,8,23,1001,15000
3,2021-08-11,Spouse,WW,Purchase,"$1,001 - $15,000",--,Thomas R Carper,2021,8,11,1001,15000
4,2021-08-04,Spouse,ARE,Sale (Partial),"$1,001 - $15,000",--,Thomas R Carper,2021,8,4,1001,15000


In [3]:
senate_stocks = yf.download(" ".join(senator_trades["ticker"].unique()),
                            start=senator_trades["transaction_date"].min(),
                            end=senator_trades["transaction_date"].max())
senate_stocks = senate_stocks.drop(["Volume", "Open", "Adj Close", "Low", "High"], axis=1)
senate_stocks

[*********************100%***********************]  909 of 909 completed

128 Failed downloads:
- UA-C: No data found, symbol may be delisted
- DGNR: No data found, symbol may be delisted
- XON: No data found, symbol may be delisted
- IRET: No data found, symbol may be delisted
- Q: No data found for this date range, symbol may be delisted
- AVP: No data found, symbol may be delisted
- MXIM: No data found, symbol may be delisted
- MIK: No data found, symbol may be delisted
- BUNT-RP: No data found, symbol may be delisted
- IPHI: No data found, symbol may be delisted
- CTRL: No data found, symbol may be delisted
- VSI: No data found, symbol may be delisted
- DPS: No data found for this date range, symbol may be delisted
- NBL: No data found, symbol may be delisted
- TLLP: No data found for this date range, symbol may be delisted
- WPX: No data found, symbol may be delisted
- PLKI: No data found for this date range, symbol may be delisted
- ESV: No data found, symbol may be delisted
- LV

Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,0QZI.IL,3V64.TI,A,AA,AAGIY,AAL,AAN,AAON,AAPL,AAT,...,YUMC,ZAYO,ZBH,ZIOP,ZM,ZNGA,ZNGA.SW,ZTS,^MWE,^RGP
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2012-09-13,,,27.453505,23.140890,14.930000,11.250000,,8.617778,24.392143,27.580000,...,,,1377800.0,518500.0,,13112100.0,,,,
2012-09-14,,,28.419170,23.645519,15.150000,10.530000,,9.088889,24.688572,27.860001,...,,,1864800.0,502500.0,,31370600.0,,,,
2012-09-17,,,28.347639,23.020741,14.950000,10.560000,,8.871111,24.992144,28.000000,...,,,746200.0,568900.0,,11066300.0,,,,
2012-09-18,,,28.290415,22.804470,15.000000,10.310000,,9.075556,25.068214,27.879999,...,,,1231800.0,855600.0,,9374100.0,,,,
2012-09-19,,,28.526466,22.756411,14.850000,10.970000,,9.000000,25.075001,27.500000,...,,,1628200.0,569500.0,,18435400.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-17,361.5,,160.910004,41.610001,49.689999,19.340000,27.799999,68.070000,150.190002,37.799999,...,3633300.0,,1970000.0,2891200.0,1991500.0,19603700.0,,1080500.0,,
2021-08-18,357.5,,163.020004,41.490002,49.709999,19.139999,28.230000,67.040001,146.360001,37.630001,...,2815500.0,,1299400.0,1427000.0,2092700.0,11997100.0,,1089300.0,,
2021-08-19,355.5,,167.669998,36.919998,50.500000,18.730000,28.299999,68.419998,146.699997,37.570000,...,1992100.0,,1166400.0,4221300.0,2476000.0,21796400.0,,1126200.0,,
2021-08-20,357.5,,168.130005,37.709999,49.740002,18.500000,28.110001,68.940002,148.190002,37.869999,...,1542400.0,,1019100.0,3428000.0,2354400.0,20518000.0,,1194000.0,,


In [4]:
trades = []
for senator in senator_trades["senator"].unique():
    data = senator_trades[senator_trades["senator"] == senator]
    data = data.sort_values("transaction_date", ascending=True)

    temp = {}

    for row in data.itertuples():
        weight = (row.amount_lower + row.amount_upper) / 2
        if row.type == "Sale (Full)":
            if row.ticker in temp and row.ticker in senate_stocks["Close"]:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = senate_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = senate_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                    temp.pop(row.ticker)
                except KeyError as e:
                    print(e)
        elif row.type == "Sale (Partial)":
            if row.ticker in temp:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = senate_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = senate_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                except KeyError as e:
                    print(e)
        elif row.type == "Purchase":
            row_dict = {}
            for field in senator_trades.columns:
                row_dict[field] = getattr(row, field)
            temp[row.ticker] = row_dict

senator_trades = pd.DataFrame(trades)

'2020-02-22'
'2020-02-22'
'2019-01-19'


In [5]:
senator_trades.to_csv("data/cleaned/senator_trades.csv")

## House of Representatives Stocks

In [6]:
house_trades = pd.read_csv("data/cleaned/all_transactions_house.csv")
house_trades = house_trades.drop("Unnamed: 0", axis=1)
house_trades

Unnamed: 0,transaction_date,owner,ticker,type,amount,representative,district,cap_gains_over_200_usd,transaction_year,transaction_month,transaction_day,amount_lower,amount_upper
0,2021-09-27,joint,BP,purchase,"$1,001 - $15,000",Hon. Virginia Foxx,NC05,False,2021,9,27,1001,15000
1,2021-09-13,joint,XOM,purchase,"$1,001 - $15,000",Hon. Virginia Foxx,NC05,False,2021,9,13,1001,15000
2,2021-09-10,joint,ILPT,purchase,"$15,001 - $50,000",Hon. Virginia Foxx,NC05,False,2021,9,10,15001,50000
3,2021-09-28,joint,PM,purchase,"$15,001 - $50,000",Hon. Virginia Foxx,NC05,False,2021,9,28,15001,50000
4,2021-09-17,self,BLK,sale_partial,"$1,001 - $15,000",Hon. Alan S. Lowenthal,CA47,False,2021,9,17,1001,15000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6778,2020-03-18,self,COST,purchase,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,3,18,1001,15000
6779,2020-04-22,self,FB,sale_full,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,4,22,1001,15000
6780,2020-04-22,self,KMI,sale_full,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,4,22,1001,15000
6781,2020-04-09,--,SWK,sale_partial,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,4,9,1001,15000


In [7]:
house_stocks = yf.download(" ".join(house_trades["ticker"].unique()),
                            start=house_trades["transaction_date"].min(),
                            end=house_trades["transaction_date"].max())
house_stocks = house_stocks.drop(["Volume", "Open", "Adj Close", "Low", "High"], axis=1)
house_stocks

[*********************100%***********************]  1560 of 1560 completed

107 Failed downloads:
- BRK.A: No data found, symbol may be delisted
- FSKR: No data found, symbol may be delisted
- GEAGY: No data found, symbol may be delisted
- MSBHY: No data found, symbol may be delisted
- CMD: No data found, symbol may be delisted
- QCHR: No data found, symbol may be delisted
- SNOXX: None
- TOT: No data found, symbol may be delisted
- QTS: No data found, symbol may be delisted
- APC: No data found, symbol may be delisted
- TFDXX: None
- BOA: None
- ZOOM: No data found, symbol may be delisted
- GWPH: No data found, symbol may be delisted
- RF$A: No data found, symbol may be delisted
- WLTL: No data found, symbol may be delisted
- BAC$K: No data found, symbol may be delisted
- PFPT: No data found, symbol may be delisted
- NBLX: No data found, symbol may be delisted
- APPL: None
- CCXX: No data found, symbol may be delisted
- WYND: No data found, symbol may be delisted
- PS: No data found, 

Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,35G.SG,AA,AAGIY,AAIGF,AAL,AAN,AAPL,AAVMY,AB,ABALX,...,YUMC,Z,ZBH,ZBRA,ZEN,ZION,ZM,ZNGA,ZOOM,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-12-27,22.889999,27.160000,32.520000,8.01,32.040001,,39.037498,,26.059999,24.750000,...,994100.0,2511000.0,1578900.0,504500.0,1119800.0,2534200.0,,14644000.0,,2244700.0
2018-12-28,22.990000,26.600000,32.930000,8.24,31.830000,,39.057499,,26.820000,24.780001,...,1199900.0,1541600.0,1860000.0,344800.0,953800.0,2558600.0,,7666400.0,,1797300.0
2018-12-29,,,,,,,,,,,...,,,,,,,,,,
2018-12-30,,,,,,,,,,,...,,,,,,,,,,
2018-12-31,,26.580000,32.880001,8.28,32.110001,,39.435001,,27.320000,24.900000,...,795700.0,1882400.0,1389700.0,409100.0,921100.0,2575600.0,,10814800.0,,1485200.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-26,,,,,,,,,,,...,,,,,,,,,,
2021-09-27,41.680000,51.560001,44.799999,11.34,21.469999,28.850000,145.369995,14.09,51.669998,33.040001,...,3840900.0,4560200.0,1579400.0,196200.0,1268900.0,1362900.0,3084100.0,21299400.0,,1964000.0
2021-09-28,40.459999,50.980000,44.139999,10.96,21.430000,28.110001,141.910004,14.02,49.730000,32.590000,...,3749800.0,4710400.0,1615900.0,234300.0,1191200.0,1128400.0,5212900.0,13919200.0,,1957300.0
2021-09-29,41.419998,50.580002,45.360001,11.50,21.010000,28.490000,142.830002,14.13,49.799999,32.630001,...,2948400.0,3959300.0,913900.0,331900.0,1690400.0,1425200.0,4316300.0,20252700.0,,1430400.0


In [8]:
trades = []
for representative in house_trades["representative"].unique():
    data = house_trades[house_trades["representative"] == representative]
    data = data.sort_values("transaction_date", ascending=True)
    temp = {}
    for row in data.itertuples():
        weight = (row.amount_lower + row.amount_upper) / 2
        if row.type == "sale_full":
            if row.ticker in temp and row.ticker in house_stocks["Close"]:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = house_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = house_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                    temp.pop(row.ticker)
                except KeyError as e:
                    print(e)
        elif row.type == "sale_partial":
            if row.ticker in temp and row.ticker in house_stocks["Close"]:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = house_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = house_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                except KeyError as e:
                    print(e)
        elif row.type == "purchase":
            row_dict = {}
            for field in house_trades.columns:
                row_dict[field] = getattr(row, field)
            temp[row.ticker] = row_dict

house_trades = pd.DataFrame(trades)

In [10]:
house_trades.to_csv("data/cleaned/house_trades.csv")

In [None]:
spy = yf.download("spy",
            min(senator_trades["transaction_date"].min(), house_trades["transaction_date"].min()),
            max(senator_trades["sell_date"].max(), house_trades["sell_date"].max())
            )
spy.to_csv("data/cleaned/spy.csv")