In [38]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os


In [39]:
load_dotenv()


hostname=os.getenv("hostname")
database=os.getenv("database")
username=os.getenv("username")
pwd=os.getenv("pwd")
port_id=os.getenv("port_id")
host = os.getenv("host")




conn_string = f"postgresql://{username}:{pwd}@{host}:{port_id}/{database}"
db =  create_engine(url=conn_string)

transactions = pd.read_sql("SELECT * FROM transactions", db)

transactions.head()

Unnamed: 0,transaction_date,owner,ticker,asset_description,asset_type,type,amount,comment,senator,ptr_link,disclosure_date,id
0,10/28/2022,Joint,MSFT,Microsoft Corporation - Common Stock Option Ty...,Stock Option,Sale (Full),"$1,001 - $15,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,0
1,10/31/2022,Joint,CLF,Cleveland-Cliffs Inc. Common Stock,Stock,Purchase,"$15,001 - $50,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,1
2,10/28/2022,Joint,MSFT,Microsoft Corporation - Common Stock Option Ty...,Stock Option,Sale (Full),"$1,001 - $15,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,2
3,10/28/2022,Joint,MSFT,Microsoft Corporation - Common Stock Option Ty...,Stock Option,Sale (Full),"$1,001 - $15,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,3
4,10/24/2022,Joint,MSFT,Microsoft Corporation - Common Stock,Stock,Purchase,"$15,001 - $50,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,4


In [40]:
allSenators_transactions = transactions.senator.unique()
allSenators_transactions

array(['Thomas H Tuberville', 'Thomas R Carper', 'Daniel S Sullivan',
       'Rick Scott', 'John Boozman', 'A. Mitchell Mcconnell, Jr.',
       'Susan M Collins', 'William F Hagerty, Iv', 'Gary C Peters',
       'Mark R Warner', 'Shelley M Capito', 'Debra S Fischer',
       'Lindsey Graham', 'John W Hickenlooper', 'Tina Smith',
       'John R Thune', 'Christopher A Coons', 'Ron L Wyden',
       'Patrick J Toomey', 'Jerry Moran,', 'Jacklyn S Rosen', 'Roy Blunt',
       'Cynthia M Lummis', 'Sheldon Whitehouse', 'Roger W Marshall',
       'John Hoeven', 'Rand Paul', 'Angus S King, Jr.',
       'Michael F Bennet', 'Mike Rounds', 'Richard M Burr',
       'Ladda Tammy Duckworth', 'James M Inhofe', 'Pat Roberts',
       'William Cassidy', 'Richard Blumenthal', 'David A Perdue , Jr',
       'Kelly Loeffler', 'Timothy M Kaine', 'Jeanne Shaheen',
       'Ron Johnson', 'Roger F Wicker', 'Dianne Feinstein',
       'Lamar Alexander', 'John N Kennedy', 'Rafael E Cruz',
       'Thomas Udall', 'John F

In [41]:
len(allSenators)

75

In [42]:
senatorInfo = pd.read_csv("./CSV Files/SenatorInfo Initial.csv", 
encoding='utf-8', 
on_bad_lines='skip')
senatorInfo.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
0,,Senator's Name,State,Party,Class,Office Room*,Phone
1,,"Bennet, Michael F.",Colorado,Democratic,III,SR-261,4-5852
2,,"Blumenthal, Richard",Connecticut,Democratic,III,SH-706,4-2823
3,,"Blunt, Roy",Missouri,Republican,III,SR-260,4-5721
4,,"Boozman, John",Arkansas,Republican,III,SH-141,4-4843


In [43]:
#drop first col 
senatorInfo = senatorInfo.iloc[:, 1:]

newHeader = senatorInfo.iloc[0]
newHeader

Unnamed: 1    Senator's Name
Unnamed: 2             State
Unnamed: 3             Party
Unnamed: 4             Class
Unnamed: 5      Office Room*
Unnamed: 6             Phone
Name: 0, dtype: object

In [44]:
senatorInfo = senatorInfo[1:]
senatorInfo.columns = newHeader
senatorInfo.head()

Unnamed: 0,Senator's Name,State,Party,Class,Office Room*,Phone
1,"Bennet, Michael F.",Colorado,Democratic,III,SR-261,4-5852
2,"Blumenthal, Richard",Connecticut,Democratic,III,SH-706,4-2823
3,"Blunt, Roy",Missouri,Republican,III,SR-260,4-5721
4,"Boozman, John",Arkansas,Republican,III,SH-141,4-4843
5,"Burr, Richard",North Carolina,Republican,III,SR-217,4-3154


In [45]:
senatorInfo.drop(["Office Room*", "Phone"], axis=1, inplace=True)
senatorInfo.rename(columns={"Senator's Name":"Name"}, inplace=True)
senatorInfo.head()

Unnamed: 0,Name,State,Party,Class
1,"Bennet, Michael F.",Colorado,Democratic,III
2,"Blumenthal, Richard",Connecticut,Democratic,III
3,"Blunt, Roy",Missouri,Republican,III
4,"Boozman, John",Arkansas,Republican,III
5,"Burr, Richard",North Carolina,Republican,III


In [46]:
senatorInfo['transactionsMatchName'] = senatorInfo['Name'].apply(lambda row: row in allSenators_transactions)
senatorInfo[senatorInfo['transactionsMatchName']==False]

Unnamed: 0,Name,State,Party,Class,transactionsMatchName
1,"Bennet, Michael F.",Colorado,Democratic,III,False
2,"Blumenthal, Richard",Connecticut,Democratic,III,False
3,"Blunt, Roy",Missouri,Republican,III,False
4,"Boozman, John",Arkansas,Republican,III,False
5,"Burr, Richard",North Carolina,Republican,III,False
...,...,...,...,...,...
96,"Stabenow, Debbie",Michigan,Democratic,I,False
97,"Tester, Jon",Montana,Democratic,I,False
98,"Warren, Elizabeth",Massachusetts,Democratic,I,False
99,"Whitehouse, Sheldon",Rhode Island,Democratic,I,False


None of these names match our transactions table, we need to create a name id for each senator

# Create Senator Id For Matching

In [50]:
idList = [100+i for i in range(len(allSenators_transactions))]
senatorIdMap = dict(zip(allSenators_transactions, idList))
transactions['senatorId'] = transactions['senator'].apply(lambda row: senatorIdMap[row])
transactions.head(2)

Unnamed: 0,transaction_date,owner,ticker,asset_description,asset_type,type,amount,comment,senator,ptr_link,disclosure_date,id,senatorId
0,10/28/2022,Joint,MSFT,Microsoft Corporation - Common Stock Option Ty...,Stock Option,Sale (Full),"$1,001 - $15,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,0,100
1,10/31/2022,Joint,CLF,Cleveland-Cliffs Inc. Common Stock,Stock,Purchase,"$15,001 - $50,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,1,100


In [53]:
# allSenators_transactions
idTable = transactions[['senator', 'senatorId']]
idTable['lastName'] = idTable['senator'].apply(lambda row: row.split(" ")[-1])
idTable.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  idTable['lastName'] = idTable['senator'].apply(lambda row: row.split(" ")[-1])


Unnamed: 0,senator,senatorId,lastName
0,Thomas H Tuberville,100,Tuberville
1,Thomas H Tuberville,100,Tuberville
2,Thomas H Tuberville,100,Tuberville
3,Thomas H Tuberville,100,Tuberville
4,Thomas H Tuberville,100,Tuberville


In [60]:
# senatorIdMap[['senator', 'senatorId']]['senatorId'].unique()
idTable = idTable.drop_duplicates()

idTable

Unnamed: 0,senator,senatorId,lastName
0,Thomas H Tuberville,100,Tuberville
61,Thomas R Carper,101,Carper
103,Daniel S Sullivan,102,Sullivan
105,Rick Scott,103,Scott
120,John Boozman,104,Boozman
...,...,...,...
9560,Michael B Enzi,170,Enzi
9563,Rafael Edward (Ted) Cruz,171,Cruz
9613,Elizabeth Warren,172,Warren
9629,Richard C Shelby,173,Shelby


In [66]:
lastNameList = idTable['lastName'].to_list()
senatorInfo['nameFoundInIDTable'] = senatorInfo['Name'].apply(lambda row: row in lastNameList)

In [70]:
senatorInfo.head()

Unnamed: 0,Name,State,Party,Class,transactionsMatchName,nameFoundInIDTable
1,"Bennet, Michael F.",Colorado,Democratic,III,False,False
2,"Blumenthal, Richard",Connecticut,Democratic,III,False,False
3,"Blunt, Roy",Missouri,Republican,III,False,False
4,"Boozman, John",Arkansas,Republican,III,False,False
5,"Burr, Richard",North Carolina,Republican,III,False,False


In [72]:
idTable.to_csv("idTable.csv")

# Handle Differnet Names for the Same Senator

In [74]:
idTable = pd.read_csv("idTable.csv")
idTable

Unnamed: 0.1,Unnamed: 0,senator,senatorId,lastName
0,0,Thomas H Tuberville,100,Tuberville
1,61,Thomas R Carper,101,Carper
2,103,Daniel S Sullivan,102,Sullivan
3,105,Rick Scott,103,Scott
4,120,John Boozman,104,Boozman
...,...,...,...,...
70,9560,Michael B Enzi,170,Enzi
71,9563,Rafael Edward (Ted) Cruz,171,Cruz
72,9613,Elizabeth Warren,172,Warren
73,9629,Richard C Shelby,173,Shelby


In [84]:
idTable= pd.read_csv("./CSV Files/idTable Final.csv")
idTable.head()

Unnamed: 0,row,senator,senatorId,party
0,1,Thomas H Tuberville,100,Republican
1,2,Thomas R Carper,101,Democratic
2,3,Daniel S Sullivan,102,Republican
3,4,Rick Scott,103,Republican
4,5,John Boozman,104,Republican


In [87]:
newIdList = idTable['senatorId'].to_list()
seantorNames = idTable['senator'].to_list()

idMap_new = dict(zip(seantorNames, newIdList))
transactions['senatorId'] = transactions['senator'].apply(lambda row: idMap_new[row])

transactions.head(1)

Unnamed: 0,transaction_date,owner,ticker,asset_description,asset_type,type,amount,comment,senator,ptr_link,disclosure_date,id,senatorId
0,10/28/2022,Joint,MSFT,Microsoft Corporation - Common Stock Option Ty...,Stock Option,Sale (Full),"$1,001 - $15,000",--,Thomas H Tuberville,https://efdsearch.senate.gov/search/view/ptr/b...,11/10/2022,0,100


In [88]:
transactions[transactions['senatorId'].isnull()]

Unnamed: 0,transaction_date,owner,ticker,asset_description,asset_type,type,amount,comment,senator,ptr_link,disclosure_date,id,senatorId


# Post new Transactions table and Senator Info table to DB

In [90]:
def postDataFrameDB(frame , dbTableName):
    try:
        conn_string = f"postgresql://{username}:{pwd}@{host}:{port_id}/{database}"
        db =  create_engine(url=conn_string)
        conn=db.connect()

        frame.to_sql(dbTableName, con=conn, if_exists='replace', index=False)
        print("posted successfully")

    except Exception as err:
        print(err)


postDataFrameDB(transactions , 'transactions') 

posted successfully


In [91]:
postDataFrameDB(idTable , 'senator_info') 

posted successfully
