In [None]:
# %% Import packages
import datetime
import pandas as pd
import numpy as np

In [None]:
# Define parameters
time_delta = datetime.timedelta(days=14)
start_date = datetime.date(year=2022, month=10, day=15)

In [None]:
# %% Define functions

def format_date(date_string):
    months_encoded = {"Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04",
                      "May": "05", "Jun": "06", "Jul": "07", "Aug": "08",
                      "Sept": "09", "Oct": "10", "Nov": "11", "Dec": "12"}

    date_list = date_string.split("\n")
    year = int(date_list[1])
    month = int(months_encoded[date_list[0].split(" ")[1]])
    day = int(date_list[0].split(" ")[0])
    return str(datetime.date(year=year, month=month, day=day))

def create_date(date_string):
    date_list = date_string.split("-")
    year = int(date_list[0])
    month = int(date_list[1])
    day = int(date_list[2])
    return (datetime.date(year=year, month=month, day=day))

def calculate_lag(trade, publish):
    trade_date = create_date(trade)
    publish_date = create_date(publish)
    lag = publish_date - trade_date
    return lag.days

In [None]:
# %% Load data

imported_data = pd.read_csv("capitoltrades.csv")

processed_data = {}
processed_data["Politician"] = []
processed_data["Chamber"] = []
processed_data["Party"] = []
processed_data["State"] = []
processed_data["Owner"] = []
processed_data["Traded"] = []
processed_data["Published"] = []
processed_data["Ticker"] = []
processed_data["Size"] = []
processed_data["Price"] = []


for row in imported_data.iterrows():
    if ":US" in row[1]["Traded Issuer"]:
        # Append the politician data
        politician_data = row[1]["Politician"].split("\n")
        processed_data["Politician"].append(politician_data[0])
        if "House" in politician_data[1]: processed_data["Chamber"].append("House")
        else: processed_data["Chamber"].append("Senate")
        if "Democrat" in politician_data[1]: processed_data["Party"].append("Democrat")
        else: processed_data["Party"].append("Republican")
        processed_data["State"].append(politician_data[1][-2:])
        processed_data["Owner"].append(row[1]["Owner"])

        # Append the date data
        processed_data["Traded"].append(format_date(row[1]["Traded"]))
        processed_data["Published"].append(format_date(row[1]["Published"]))

        # Apend the trade information
        processed_data["Ticker"].append(row[1]["Traded Issuer"].split("\n")[1].replace(":US", ""))
        processed_data["Size"].append(row[1]["Size"])
        if str(row[1]["Price"]) == 'nan': processed_data["Price"].append('nan')
        else: processed_data["Price"].append(row[1]["Price"].replace("$", ""))

processed_data_DF = pd.DataFrame(processed_data)

In [None]:
# %% Save when sorting by trade date

processed_data_DF_sorted = processed_data_DF.sort_values(by="Traded")
processed_data_DF_sorted.to_csv("congress_by_trade_date.csv", index=False)

start_temp = start_date
end_temp = start_date + time_delta
end_date = create_date(max([max(processed_data_DF_sorted["Traded"]), max(processed_data_DF_sorted["Published"])]))

biweekly_data = {}
biweekly_data["start_date"] = []
biweekly_data["end_date"] = []
biweekly_data["ticker"] = []
biweekly_data["lag"] = []
biweekly_data["n_trades"] = []
biweekly_data["Democrat"] = []
biweekly_data["Senate"] = []
biweekly_data["Size"] = []


while end_temp < end_date:

    temp1 = processed_data_DF_sorted[processed_data_DF_sorted["Traded"] > str(start_temp)]
    temp2 = temp1[temp1["Traded"] < str(end_temp)]

    for tick in np.unique(temp2["Ticker"]):
        ticker_temp = temp2[temp2["Ticker"] == tick]

        biweekly_data["start_date"].append(str(start_temp))
        biweekly_data["end_date"].append(str(end_temp))
        biweekly_data["ticker"].append(tick)

        all_lags = []
        for entry in ticker_temp.iterrows():
            all_lags.append(calculate_lag(entry[1]["Traded"], entry[1]["Published"]))
        biweekly_data["lag"].append(sum(all_lags) / len(all_lags))
        biweekly_data["n_trades"].append(len(ticker_temp))

        biweekly_data["Democrat"].append(len(ticker_temp[ticker_temp["Party"]=="Democrat"]) / len(ticker_temp))
        biweekly_data["Senate"].append(len(ticker_temp[ticker_temp["Chamber"]=="Senate"]) / len(ticker_temp))
        biweekly_data["Size"].append()


    start_temp = end_temp
    end_temp = start_temp + time_delta


pd.DataFrame(biweekly_data).to_csv("congress_by_trade_date.biweekly.csv", index=False)

In [None]:
# %% Save when sorting by declaration date

processed_data_DF_sorted = processed_data_DF.sort_values(by="Published")
processed_data_DF_sorted.to_csv("congress_by_declared_date.csv", index=False)

start_temp = start_date
end_temp = start_date + time_delta
end_date = create_date(max([max(processed_data_DF_sorted["Traded"]), max(processed_data_DF_sorted["Published"])]))

biweekly_data = {}
biweekly_data["start_date"] = []
biweekly_data["end_date"] = []
biweekly_data["ticker"] = []
biweekly_data["lag"] = []
biweekly_data["n_trades"] = []
biweekly_data["Democrat"] = []
biweekly_data["Senate"] = []

while end_temp < end_date:

    temp1 = processed_data_DF_sorted[processed_data_DF_sorted["Published"] > str(start_temp)]
    temp2 = temp1[temp1["Published"] < str(end_temp)]

    for tick in np.unique(temp2["Ticker"]):
        ticker_temp = temp2[temp2["Ticker"] == tick]

        biweekly_data["start_date"].append(str(start_temp))
        biweekly_data["end_date"].append(str(end_temp))
        biweekly_data["ticker"].append(tick)

        all_lags = []
        for entry in ticker_temp.iterrows():
            all_lags.append(calculate_lag(entry[1]["Traded"], entry[1]["Published"]))
        biweekly_data["lag"].append(sum(all_lags) / len(all_lags))
        biweekly_data["n_trades"].append(len(ticker_temp))

        biweekly_data["Democrat"].append(len(ticker_temp[ticker_temp["Party"]=="Democrat"]) / len(ticker_temp))
        biweekly_data["Senate"].append(len(ticker_temp[ticker_temp["Chamber"]=="Senate"]) / len(ticker_temp))

    start_temp = end_temp
    end_temp = start_temp + time_delta


pd.DataFrame(biweekly_data).to_csv("congress_by_declared_date.biweekly.csv", index=False)