# Compiling Senator and House of Representatives Stock Trading Data


In [12]:
import pandas as pd
import yfinance as yf

## Senate Stocks

In [13]:
senator_trades = pd.read_csv("data/cleaned/all_transactions_senate.csv")
senator_trades = senator_trades.drop("Unnamed: 0", axis=1)
senator_trades.head()

Unnamed: 0,transaction_date,owner,ticker,type,amount,comment,senator,transaction_year,transaction_month,transaction_day,amount_lower,amount_upper
0,2021-08-18,Joint,SSYS,Purchase,"$1,001 - $15,000",--,Thomas H Tuberville,2021,8,18,1001,15000
1,2021-08-12,Joint,SSBK,Purchase,"$50,001 - $100,000",--,Thomas H Tuberville,2021,8,12,50001,100000
2,2021-08-23,Spouse,LMRK,Sale (Full),"$1,001 - $15,000",--,Thomas R Carper,2021,8,23,1001,15000
3,2021-08-11,Spouse,WW,Purchase,"$1,001 - $15,000",--,Thomas R Carper,2021,8,11,1001,15000
4,2021-08-04,Spouse,ARE,Sale (Partial),"$1,001 - $15,000",--,Thomas R Carper,2021,8,4,1001,15000


In [14]:
senate_stocks = yf.download(" ".join(senator_trades["ticker"].unique()),
                            start=senator_trades["transaction_date"].min(),
                            end=senator_trades["transaction_date"].max())
senate_stocks = senate_stocks.drop(["Volume", "Open", "Adj Close", "Low", "High"], axis=1)
senate_stocks

[*********************100%***********************]  909 of 909 completed

131 Failed downloads:
- CYBX: None
- PCLN: None
- CMCSK: None
- ABCO: None
- GAS: None
- VSM: No data found, symbol may be delisted
- WGP: No data found, symbol may be delisted
- KORS: None
- MXIM: No data found, symbol may be delisted
- BPL: No data found, symbol may be delisted
- MFRM: None
- AMSG: None
- XON: No data found, symbol may be delisted
- MSTY.PA: No data found, symbol may be delisted
- ESV: No data found, symbol may be delisted
- TMK: No data found, symbol may be delisted
- WTR: No data found, symbol may be delisted
- DD-PA: No data found, symbol may be delisted
- AXLL: None
- CLC: None
- CTL: No data found, symbol may be delisted
- QLIK: None
- LB: No data found, symbol may be delisted
- CTAA: No data found, symbol may be delisted
- RTN: No data found, symbol may be delisted
- LLTC: None
- CRZO: No data found, symbol may be delisted
- RDSA: No data found, symbol may be delisted
- CBS: No data found

Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close
Unnamed: 0_level_1,0QZI.IL,3V64.TI,A,AA,AAGIY,AAL,AAN,AAON,AAPL,AAT,...,YUMC,ZAYO,ZBH,ZIOP,ZM,ZNGA,ZNGA.SW,ZTS,^MWE,^RGP
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2012-09-13,,,27.453505,23.140890,14.930000,11.250000,,8.617778,24.392143,27.580000,...,,,66.629997,5.45,,2.96,,,,
2012-09-14,,,28.419170,23.645519,15.150000,10.530000,,9.088889,24.688572,27.860001,...,,,66.900002,5.53,,3.18,,,,
2012-09-17,,,28.347639,23.020741,14.950000,10.560000,,8.871111,24.992144,28.000000,...,,,66.930000,5.69,,3.08,,,,
2012-09-18,,,28.290415,22.804470,15.000000,10.310000,,9.075556,25.068214,27.879999,...,,,67.190002,5.77,,3.08,,,,
2012-09-19,,,28.526466,22.756411,14.850000,10.970000,,9.000000,25.075001,27.500000,...,,,67.139999,5.71,,3.20,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-17,361.5,,160.910004,41.610001,49.689999,19.340000,27.799999,68.070000,150.190002,37.799999,...,60.279999,,147.860001,1.68,345.540009,8.21,,206.600006,,
2021-08-18,357.5,,163.020004,41.490002,49.709999,19.139999,28.230000,67.040001,146.360001,37.630001,...,59.130001,,144.380005,1.66,340.929993,8.20,,203.330002,,
2021-08-19,355.5,,167.669998,36.919998,50.500000,18.730000,28.299999,68.419998,146.699997,37.570000,...,58.959999,,144.119995,1.44,332.380005,8.37,,206.440002,,
2021-08-20,357.5,,168.130005,37.709999,49.740002,18.500000,28.110001,68.940002,148.190002,37.869999,...,59.169998,,145.460007,1.54,336.859985,8.51,,207.050003,,


In [15]:
trades = []
for senator in senator_trades["senator"].unique():
    data = senator_trades[senator_trades["senator"] == senator]
    data = data.sort_values("transaction_date", ascending=True)

    temp = {}

    for row in data.itertuples():
        weight = (row.amount_lower + row.amount_upper) / 2
        if row.type == "Sale (Full)":
            if row.ticker in temp and row.ticker in senate_stocks["Close"]:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = senate_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = senate_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                    temp.pop(row.ticker)
                except KeyError as e:
                    print(e)
        elif row.type == "Sale (Partial)":
            if row.ticker in temp:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = senate_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = senate_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                except KeyError as e:
                    print(e)
        elif row.type == "Purchase":
            row_dict = {}
            for field in senator_trades.columns:
                row_dict[field] = getattr(row, field)
            temp[row.ticker] = row_dict

senator_trades = pd.DataFrame(trades)

[*********************103%************************]  938 of 909 completed'2020-02-22'
'2020-02-22'
[**********************105%************************]  956 of 909 completed'2019-01-19'
[**********************106%*************************]  959 of 909 completed

In [16]:
senator_trades.to_csv("data/cleaned/senator_trades.csv")

## House of Representatives Stocks

In [17]:
house_trades = pd.read_csv("data/cleaned/all_transactions_house.csv")
house_trades = house_trades.drop("Unnamed: 0", axis=1)
house_trades

Unnamed: 0,transaction_date,owner,ticker,type,amount,representative,district,cap_gains_over_200_usd,transaction_year,transaction_month,transaction_day,amount_lower,amount_upper
0,2021-09-27,joint,BP,purchase,"$1,001 - $15,000",Hon. Virginia Foxx,NC05,False,2021,9,27,1001,15000
1,2021-09-13,joint,XOM,purchase,"$1,001 - $15,000",Hon. Virginia Foxx,NC05,False,2021,9,13,1001,15000
2,2021-09-10,joint,ILPT,purchase,"$15,001 - $50,000",Hon. Virginia Foxx,NC05,False,2021,9,10,15001,50000
3,2021-09-28,joint,PM,purchase,"$15,001 - $50,000",Hon. Virginia Foxx,NC05,False,2021,9,28,15001,50000
4,2021-09-17,self,BLK,sale_partial,"$1,001 - $15,000",Hon. Alan S. Lowenthal,CA47,False,2021,9,17,1001,15000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6778,2020-03-18,self,COST,purchase,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,3,18,1001,15000
6779,2020-04-22,self,FB,sale_full,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,4,22,1001,15000
6780,2020-04-22,self,KMI,sale_full,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,4,22,1001,15000
6781,2020-04-09,--,SWK,sale_partial,"$1,001 - $15,000",Hon. Ed Perlmutter,CO07,False,2020,4,9,1001,15000


In [18]:
house_stocks = yf.download(" ".join(house_trades["ticker"].unique()),
                            start=house_trades["transaction_date"].min(),
                            end=house_trades["transaction_date"].max())
house_stocks = house_stocks.drop(["Volume", "Open", "Adj Close", "Low", "High"], axis=1)
house_stocks

[*********************100%***********************]  1560 of 1560 completedmpleted

160 Failed downloads:
- MXIM: No data found, symbol may be delisted
- BPL: No data found, symbol may be delisted
- MFRM: None
- AMSG: None
- XON: No data found, symbol may be delisted
- MSTY.PA: No data found, symbol may be delisted
- ESV: No data found, symbol may be delisted
- TMK: No data found, symbol may be delisted
- WTR: No data found, symbol may be delisted
- DD-PA: No data found, symbol may be delisted
- CLC: None
- AXLL: None
- CTL: No data found, symbol may be delisted
- QLIK: None
- LB: No data found, symbol may be delisted
- CTAA: No data found, symbol may be delisted
- RTN: No data found, symbol may be delisted
- CRZO: No data found, symbol may be delisted
- LLTC: None
- RDSA: No data found, symbol may be delisted
- CBS: No data found, symbol may be delisted
- ^RGP: None
- EMC: None
- CELG: No data found, symbol may be delisted
- BCR: None
- BUNT-RP: No data found, symbol may be delisted
- 

Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close
Unnamed: 0_level_1,0QZI.IL,35G.SG,3V64.TI,A,AAGIY,AAIGF,AAL,AAN,AAON,AAPL,...,ZBRA,ZEN,ZION,ZIOP,ZM,ZNGA,ZNGA.SW,ZOOM,^MWE,^RGP
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2012-09-13,,,,27.453505,,,,,8.617778,,...,,,,5.45,,,,,,
2012-09-14,,,,28.419170,,,,,9.088889,,...,,,,5.53,,,,,,
2012-09-17,,,,28.347639,,,,,8.871111,,...,,,,5.69,,,,,,
2012-09-18,,,,28.290415,,,,,9.075556,,...,,,,5.77,,,,,,
2012-09-19,,,,28.526466,,,,,9.000000,,...,,,,5.71,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-26,,,,,,,,,,,...,,,,,,,,,,
2021-09-27,,41.680000,,,44.799999,11.34,21.469999,28.850000,,145.369995,...,548.580017,121.330002,62.889999,,272.880005,7.56,,,,
2021-09-28,,40.459999,,,44.139999,10.96,21.430000,28.110001,,141.910004,...,531.179993,118.120003,62.740002,,261.890015,7.47,,,,
2021-09-29,,41.419998,,,45.360001,11.50,21.010000,28.490000,,142.830002,...,525.669983,116.269997,63.740002,,257.410004,7.55,,,,


In [19]:
trades = []
for representative in house_trades["representative"].unique():
    data = house_trades[house_trades["representative"] == representative]
    data = data.sort_values("transaction_date", ascending=True)
    temp = {}
    for row in data.itertuples():
        weight = (row.amount_lower + row.amount_upper) / 2
        if row.type == "sale_full":
            if row.ticker in temp and row.ticker in house_stocks["Close"]:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = house_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = house_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                    temp.pop(row.ticker)
                except KeyError as e:
                    print(e)
        elif row.type == "sale_partial":
            if row.ticker in temp and row.ticker in house_stocks["Close"]:
                try:
                    temp[row.ticker]["sell_date"] = row.transaction_date
                    temp[row.ticker]["weight"] = weight
                    buy = house_stocks["Close"][row.ticker].loc[temp[row.ticker]["transaction_date"]]
                    sell = house_stocks["Close"][row.ticker].loc[row.transaction_date]
                    temp[row.ticker]["gain"] = (sell - buy) / buy
                    trades.append(temp[row.ticker])
                except KeyError as e:
                    print(e)
        elif row.type == "purchase":
            row_dict = {}
            for field in house_trades.columns:
                row_dict[field] = getattr(row, field)
            temp[row.ticker] = row_dict

house_trades = pd.DataFrame(trades)

[************************112%**************************]  1751 of 1560 completed

In [20]:
house_trades.to_csv("data/cleaned/house_trades.csv")