In [1]:
from contrans import contrans
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import os
import json
import requests
import psycopg
import sqlite3
from plotly import express as px
from sqlalchemy import create_engine
#dotenv.load_dotenv()
congresskey = os.getenv('congresskey')
newskey = os.getenv('newskey')
postgres_password = os.getenv('POSTGRES_PASSWORD')

In [2]:
ct = contrans()

In [4]:
server, engine = ct.connect_to_postgres(postgres_password)

In [6]:
bills = ct.connect_to_mongo(host= 'localhost')


In [7]:
bills.count_documents({})

16968

In [14]:
billdf = ct.query_mongo(bills, {}, {'bill.sponsor': 1,
                         'bill_text': 1,
                         '_id':0})

In [15]:
billdf

Unnamed: 0,bill_text
0,\n[Congressional Bills 118th Congress]\n[From ...
1,\n[Congressional Bills 118th Congress]\n[From ...
2,\n[Congressional Bills 118th Congress]\n[From ...
3,\n[Congressional Bills 118th Congress]\n[From ...
4,\n[Congressional Bills 118th Congress]\n[From ...
...,...
16963,\n[Congressional Bills 118th Congress]\n[From ...
16964,\n[Congressional Bills 118th Congress]\n[From ...
16965,\n[Congressional Bills 118th Congress]\n[From ...
16966,\n[Congressional Bills 118th Congress]\n[From ...


In [None]:
billdf = billdf.groupby(['bill.sponsor'])('bill_text').count()

In [None]:
billdf = billdf.set_index('sponsor')

In [None]:
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer
tfIdfVectorizer= TfidfVectorizer(stop_words='english',
                                 max_df = .8,
                                 ngram_ran ge = (1,3))
tfIdf = tfIdfVectorizer.fit_transform(bill_df['bill_text'])

In [None]:
charwords = pd.DataFrame()
for t in range(0,tfIdf.shape[0]):
    print(f'Now working on {t} of {tfIdf.shape[0]}', end="\r")
    df = pd.DataFrame(tfIdf[t].T.todense(), index=tfIdfVectorizer.get_feature_names_out(), columns=["TF-IDF"])
    df = df.sort_values('TF-IDF', ascending=False).reset_index().head(10)
    df = df.rename({'index':'word', 'TF-IDF':'tf_idf'}, axis=1)
    df['sponsor_id'] = billdf.index[t]
    charwords = pd.concat([charwords, df], ignore_index=True)

In [None]:
ct.plot_ideology('N000188')

In [None]:
server, engine = self.connect_to_postgres()
myquery = '''
SELECT bioguideid, district, name, partyname, state, nominate_dim1
FROM members
'''
ideo = pd.read_sql_query(myquery, con=engine)

In [None]:
b='N000188'

myquery = f'''
SELECT name AS Name,
        partyname AS Party,
        state AS State,
        district AS District,
        CAST((2024 - born) AS INT) AS Age
    FROM members
    WHERE bioguideid='{b}'
'''
members = pd.read_sql_query(myquery, con=engine)

In [None]:
members.T

In [None]:
members['firstname'] + ' '+ members['lastname'] + ' (' + members['partyletter'] + ', ' + members['state'] + '-' + members['district'] + ')'

In [None]:
myquery = '''
SELECT bioguideid, district, name, partyname, state, nominate_dim1
FROM members
'''
ideo= pd.read_sql(myquery, con=engine)

In [None]:
fig = px.histogram(ideo, x="nominate_dim1", nbins=50, title='Distribution of Nominate Dim 1')
fig.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
import numpy as np

# Create a figure and axis
fig, ax = plt.subplots(figsize=(8, 6))

# Create a colormap that maps negative values to blue and positive values to red
cmap = cm.get_cmap('coolwarm_r')

# Create a distribution plot of the "nominate_dim1" value
sns.kdeplot(ideo['nominate_dim1'], ax=ax, cmap=cmap, fill=True, cbar=True, cbar_kws={'label': 'Nominate Dim 1'})

# Set the color scale range for the colorbar
cbar = ax.collections[0].colorbar
cbar.set_ticks([-1, 0, 1])
cbar.set_ticklabels(['Negative', 'Zero', 'Positive'])

# Set the title and labels
ax.set_title('Distribution of Nominate Dim 1')
ax.set_xlabel('Nominate Dim 1')
ax.set_ylabel('Density')

# Show the plot
plt.show()


In [None]:
mongo_bills.count_documents({})

In [None]:
ct.query_mongo(mongo_bills, {},{})

In [None]:
total_data = ct.query_mongo(mongo_bills, {},{'bill_text':1,
                                 '_id':0})

In [None]:
ct.query_mongo(mongo_bills, {'bill.cosponsors.count':{'$gt':10}},{'bill.title':1,'_id':0})

In [None]:
ct.query_mongo_searchengine(mongo_bills, 'bill_text','historicql ships', columns={'bill.title':1,
                                                                                  'bill_text':1,
                                                                                  '_id':0})

In [None]:
#creates a new empty contrans database
dbserver, engine = ct.connect_to_postgres(ct.POSTGRES_PASSWORD, create_contrans=True)

In [None]:
members = ct.get_bioguideIDs() #pulling members data from Cogress API
members = ct.make_cand_table(members) # joinng the contrivutions ID with the Congress API data
terms, members = ct.terms_df(members) # separates the terms (non-atomic) data from members
ideology = ct.get_ideology() # gets ideology data from vote view
ct.make_members_df(members,ideology, engine=engine) # joins members and ideology, uploads to postgres

In [None]:
ct.make_terms_df(terms, engine=engine)

In [None]:
votes = ct.get_votes()
ct.make_votes_df(votes, engine)

In [None]:
import pymongo
MONGO_INITDB_ROOT_USERNAME= os.getenv('MONGO_INITDB_ROOT_USERNAME') 
MONGO_INITDB_ROOT_PASSWORD=os.getenv('MONGO_INITDB_ROOT_PASSWORD')
myclient = pymongo.MongoClient(f"mongodb://{MONGO_INITDB_ROOT_USERNAME}:{MONGO_INITDB_ROOT_PASSWORD}@localhost:27017/")

In [None]:
myclient.list_database_names()

# myclient is the mongo server

# create a data base
mongo_contrans= myclient['contrans']

# create a collection in the database where the records will go

mongo_bills = mongo_contrans['bills']

In [None]:
bill_list = ct.get_sponsoredlegislation(bioguideids[0])
onebill = ct.get_billdata(bill_list[0]['url'])

allbills = [ct.get_billdata(x['url']) for x in bill_list]
allbills

In [None]:
allbills

In [None]:
mongo_bills.insert_many(allbills)

In [None]:
myclient.list_database_names()

In [None]:
engine