# Variation of Earnings Beat or Earnings Miss Predictor

Nodes: A stock/ticker in a specific time (MM/YYYY), features: price, EPS, etc.

Edges: 2 kinds -- temporal and in the same plane (assume time is the same e.g. 04/2021). In the same plane, nodes are linked (edges formed) if owned by the same investor. Theoretically, edge is given a larger weight if multiple investors buy/own stock in the 2 connecting stock nodes and/or the purchase amount is large.


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
file_path = './data/politician-insider-trading-activity-apr-2021.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Transaction Date,First Name,Last Name,Title,Symbol,Name,Transaction Total,Transaction,Note Text
0,2021-04-30,Diana,Harshbarger,US Congressman,AEE,Ameren Corp,24002.0,Sell,"By Director And Spouse. Traded $3,003 To $45,000"
1,2021-04-30,Kathy,Manning,US Congressman,DIS,Walt Disney Co,8000.0,Sell,"Traded $1,001 to $15,000"
2,2021-04-30,Kathy,Manning,US Congressman,V,Visa Inc,8000.0,Sell,"Traded $1,001 to $15,000"
3,2021-04-30,Kathy,Manning,US Congressman,VTRS,Viatris Inc,8000.0,Sell,"Traded $1,001 to $15,000"
4,2021-04-30,Kathy,Manning,US Congressman,TJX,TJX Companies,8000.0,Sell,"Traded $1,001 to $15,000"


In [16]:
MONTHS = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
YEARS = ['2021', '2022']

In [19]:
for y in range(len(YEARS)):
    for m in range(len(MONTHS)):
        year = YEARS[y]
        month = MONTHS[m]
        file_path = f'./data/politician-insider-trading-activity-{month}-{year}.csv'
        df = pd.read_csv(file_path)
        print(year, month)
        print(df.head())

2021 jan
  Transaction Date First Name    Last Name           Title Symbol  \
0       2021-01-29      Diana  Harshbarger  US Congressman   TMUS   
1       2021-01-29     Thomas       Suozzi  US Congressman     BA   
2       2021-01-29      Marie       Newman  US Congressman   TWTR   
3       2021-01-29      Marie       Newman  US Congressman     BA   
4       2021-01-29      Marie       Newman  US Congressman   MRNA   

             Name  Transaction Total Transaction  \
0     T-Mobile US             8000.0         Buy   
1  Boeing Company            32500.0        Sell   
2     Twitter Inc            16001.0        Sell   
3  Boeing Company            32500.0        Sell   
4     Moderna Inc            81002.0        Sell   

                                 Note Text  
0                Traded $1,001 to $15,000   
1               Traded $15,001 to $50,000   
2    By Spouse. Traded $2,002 to $30,000 .  
3    By Spouse. Traded $15,001 To $50,000   
4  By Spouse. Traded $32,004 to $130,0

In [7]:
# Company lists
dow_list = ['axp', 'amgn', 'aapl', 'ba', 'cat', 'cvx', 'csco', 'ko', 'dow', 'gs', 'hd', 'hon', 'ibm', 'intc', 'jnj', 'jpm', 'mcd', 'mmm', 'mrk', 'msft', 'nke', 'pg', 'crm', 'trv', 'utx', 'vz', 'v', 'wba', 'wmt', 'dis']
# Create a list of the ticker symbols for the S&P 500 companies
sp500_list = ['a', 'aapl', 'abbv', 'abt', 'acn', 'adbe', 'aig', 'all', 'amgn', 'amzn', 'axp', 'ba', 'bac', 'biib', 'bk', 'bkng', 'blk', 'bmy', 'brk.b', 'bsx', 'cat', 'chtr', 'cl', 'cmcsa', 'cof', 'cop', 'cost', 'crm', 'csco', 'cvx', 'dhr', 'dis', 'dow', 'duke', 'emr', 'exc', 'f', 'fb', 'fdx', 'gd', 'ge', 'gild', 'gm', 'goog', 'googl', 'gs', 'hd', 'hon', 'ibm', 'intc', 'jnj', 'jpm', 'kmi', 'ko', 'lly', 'lmt', 'low', 'ma', 'mcd', 'mdlz', 'met', 'microsoft', 'mmm', 'mo', 'mrk', 'ms', 'msft', 'nflx', 'nke', 'nvda', 'orcl', 'pep', 'pfe', 'pg', 'pm', 'pypl', 'qcom', 'crm', 'sbux', 'slb', 'so', 'spg', 't', 'tgt', 'tmo', 'tpr', 'trv', 'tsla', 'txn', 'unh', 'unp', 'ups', 'usb', 'v', 'vz', 'wba', 'wfc', 'wmt', 'xom']
# Create a list of the ticker symbols for the NASDAQ 100 companies
nasdaq100_list = ['aapl', 'adbe', 'adi', 'adp', 'adsk', 'alxn', 'amat', 'amd', 'amgn', 'amzn', 'asml', 'atvi', 'avgo', 'bidu', 'biib', 'bkng', 'bmrn', 'bngo', 'cdns', 'celg', 'cern', 'chkp', 'chtr', 'cmcsa', 'cost', 'csco', 'csct', 'ctas', 'ctrp', 'ctsh', 'ctxs', 'dltr', 'ea', 'ebay', 'esrx', 'fast', 'fb', 'fisv', 'fox', 'foxa', 'gild', 'goog', 'googl', 'has', 'hsic', 'idxx', 'ilmn', 'incy', 'intc', 'intu', 'isrg', 'jd', 'khc', 'klac', 'lbtya', 'lbtym', 'liberty', 'lrcx', 'mar', 'mat', 'mchp', 'mdlz', 'meli', 'mnst', 'msft', 'mu', 'mxim', 'myl', 'nclh', 'nflx', 'ntap', 'ntes', 'nvda', 'nxpi', 'orly', 'payx', 'pcar', 'pypl', 'qcom', 'regn', 'rost', 'sbux', 'sirius', 'swks', 'symc', 'tmus', 'trip', 'tsco', 'tsem', 'txn', 'ulta', 'vrtx', 'vrsn', 'vrtx', 'wdc', 'wday', 'wltw', 'wynn', 'xlnx', 'xray', 'zm']

In [24]:
# BUY transactions for month of January
file_path = f'./data/politician-insider-trading-activity-jan-2023.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Transaction Date,First Name,Last Name,Title,Symbol,Name,Transaction Total,Transaction,Note Text
0,2023-01-31,Daniel,Goldman,US Congressman,ZTS,Zoetis Inc Cl,8000.0,Buy,"Traded $1,001 to $15,000 ."
1,2023-01-31,Daniel,Goldman,US Congressman,WPM,Wheaton Precio,8000.0,Sell,"Traded $1,001 to $15,000 ."
2,2023-01-31,Daniel,Goldman,US Congressman,VTRS,Viatris Inc,8000.0,Buy,"Traded $1,001 to $15,000 ."
3,2023-01-31,Daniel,Goldman,US Congressman,VZ,Verizon Commun,8000.0,Buy,"Traded $1,001 to $15,000 ."
4,2023-01-31,Daniel,Goldman,US Congressman,VFC,V.F. Corp,75000.0,Sell,"Traded $50,001 to $100,000 ."


In [33]:
def get_edge_data(company_symbols):
    # Define edges if stock is owned by the same congressman/senator
    edges_buf = []
    # a list of [company idx, edge idx]?
    # is this an adjacency matrix of dimensions(company_symbols, company_symbols
    
    
    with open('./data/politician-insider-trading-activity-jan-2023.csv') as f:
        for line in f.readlines():
            try:
                line = line.split(',')
                first_name = line[1]
                last_name = line[2]
                ticker = line[4]
                action = line[7]  # Buy or Sell
                amt = line[6]
                if ticker in company_symbols:
                    print(first_name + ' ' + last_name, ticker, action, amt)
            except:
                pass

In [34]:
company_symbols = ['AAPL', 'META', 'TSLA', 'GOOGL', 'NVDA', 'UBER', 'ABNB', 'JNJ', 'F']
get_edge_data(company_symbols)

Daniel Goldman TSLA Sell 175000
Daniel Goldman JNJ Sell 175000
Daniel Goldman F Buy 32500
Daniel Goldman AAPL Sell 32500
Daniel Goldman GOOGL Buy 75000
Daniel Goldman ABNB Buy 32500
Joshua Gottheimer META Buy 8000
Kevin Hern JNJ Sell 8000
Joshua Gottheimer TSLA Buy 8000
Joshua Gottheimer META Buy 8000
Joshua Gottheimer AAPL Sell 8000
Thomas Kean JNJ Sell 32500
Diana Harshbarger META Buy 32002
Rohit Khanna F Buy 8000
Joshua Gottheimer AAPL Sell 8000
William Keating ABNB Buy 8000
Joshua Gottheimer AAPL Sell 8000
Rohit Khanna F Sell 8000
Rohit Khanna TSLA Sell 8000
Rohit Khanna META Sell 8000
Joshua Gottheimer JNJ Sell 32500
Joshua Gottheimer AAPL Buy 8000
Rohit Khanna AAPL Buy 32500
Rohit Khanna GOOGL Buy 48502
Rohit Khanna ABNB Sell 8000
Rohit Khanna META Sell 32500
Rohit Khanna AAPL Sell 75000
Rohit Khanna NVDA Buy 8000
Rohit Khanna TSLA Buy 8000


In [None]:
def get_edge_data(qtr_indices):
    # We need to define all the pairwise directed edges if the edge weight is above the similarity threshold
    edges_buf = []
    for cidx, ticker in enumerate(company_symbols):
        # edges can be out of order in the hdf5 file so we need to check both directions
        for eidx in range(len(company_symbols)):
            pair = ticker + '_' + company_symbols[eidx]
            if pair in edge_grp.keys():
                if edge_grp[pair][0] > edge_similarity_threshold:
                    edges_buf.append([cidx, eidx])
            # The ordering of the edge labels is unpredictable so we need to check both directions
            pair = company_symbols[eidx] + '_' + ticker
            if pair in edge_grp.keys():
                if edge_grp[pair][0] > edge_similarity_threshold:
                    edges_buf.append([cidx, eidx])

In [12]:
import torch
import torch.nn as nn
import torch_geometric.nn as gnn
import torch_geometric.data as gdata
import numpy as np
import h5py
import os
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import datetime as dt
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import time
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, classification_report
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm
