In [1]:
%pip install boto3

Collecting boto3
  Downloading boto3-1.34.23-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting s3transfer<0.11.0,>=0.10.0
  Downloading s3transfer-0.10.0-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.35.0,>=1.34.23
  Downloading botocore-1.34.23-py3-none-any.whl (11.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.9/11.9 MB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: botocore, s3transfer, boto3
Successfully installed boto3-1.34.23 botocore-1.34.23 s3transfer-0.10.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
import boto3
from botocore.exceptions import NoCredentialsError

def upload_to_s3(bucket_name, file_name, object_name=None):
    """
    Upload a file to an S3 bucket

    :param bucket_name: Bucket to upload to
    :param file_name: File to upload
    :param object_name: S3 object name. If not specified then file_name is used
    :return: Boolean True if file was uploaded, else False
    """
    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket_name, object_name)
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False

    return f"s3://{bucket_name}/{object_name}"


In [6]:
upload_to_s3(bucket_name="ddp-bills", file_name="bill_text.pdf", object_name= "bill_text_2")

's3://ddp-bills/bill_text_2'

In [11]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def download_pdf(pdf_url, local_path="bill_text.pdf"):
    response = requests.get(pdf_url)
    if response.status_code == 200:
        with open(local_path, 'wb') as file:
            file.write(response.content)
        return local_path
    else:
        raise Exception(f"Failed to download PDF from {pdf_url}")

def fetch_bill_details(bill_page_url):
    """
    Fetches details of a bill from the Florida Senate Bill page and downloads its PDF.
    :param bill_page_url: URL of the specific bill page.
    :return: A dictionary containing the bill title, description, and local PDF path.
    """
    base_url = 'https://www.flsenate.gov'
    response = requests.get(urljoin(base_url, bill_page_url))

    bill_details = {
        "title": "",
        "description": "",
        "pdf_path": "",  # Changed from pdf_url to pdf_path
    }

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the bill title
        bill_title_tag = soup.find('div', id='prevNextBillNav').find_next('h2')
        if bill_title_tag:
            bill_details["title"] = bill_title_tag.get_text(strip=True)

        # Extract the bill description
        bill_description_tag = soup.find('p', class_='width80')
        if bill_description_tag:
            bill_details["description"] = bill_description_tag.get_text(strip=True)

        # Extract the bill PDF link and download it
        bill_pdf_link = soup.find('a', class_='lnk_BillTextPDF')
        if bill_pdf_link:
            pdf_url = urljoin(base_url, bill_pdf_link['href'])
            bill_details["pdf_path"] = download_pdf(pdf_url)

    return bill_details

bill_details = fetch_bill_details("https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText")


In [14]:
import json
import boto3

def upload_to_s3(bucket_name, bill_details):
    """
    Uploads bill details and the associated PDF to an S3 bucket.

    :param bucket_name: The name of the S3 bucket.
    :param bill_details: A dictionary containing the bill's details, including the local PDF path.
    """
    s3_client = boto3.client('s3')

    # Upload the PDF file
    pdf_path = bill_details["pdf_path"]
    pdf_key = f"pdfs/{pdf_path.split('/')[-1]}"
    with open(pdf_path, 'rb') as pdf_file:
        s3_client.upload_fileobj(pdf_file, bucket_name, pdf_key)

    # Prepare and upload the bill details as a JSON file
    json_details = bill_details.copy()
    json_details["pdf_path"] = f"s3://{bucket_name}/{pdf_key}"  # Update the PDF path to the S3 location
    json_key = f"bill_details/{pdf_path.split('/')[-1].replace('.pdf', '.json')}"
    s3_client.put_object(Body=json.dumps(json_details), Bucket=bucket_name, Key=json_key)

    return {
        "pdf_s3_path": json_details["pdf_path"],
        "json_s3_path": f"s3://{bucket_name}/{json_key}"
    }

# Example usage
bucket_name = 'ddp-bills'
bill_page_url = "https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText"
bill_details = fetch_bill_details(bill_page_url)
s3_paths = upload_to_s3(bucket_name, bill_details)
print(s3_paths)


{'pdf_s3_path': 's3://ddp-bills/pdfs/bill_text.pdf', 'json_s3_path': 's3://ddp-bills/bill_details/bill_text.json'}


In [15]:
import json
import boto3

def upload_to_s3(bucket_name, bill_details):
    """
    Uploads bill details and the associated PDF to an S3 bucket with public read access.

    :param bucket_name: The name of the S3 bucket.
    :param bill_details: A dictionary containing the bill's details, including the local PDF path.
    """
    s3_client = boto3.client('s3')

    # Upload the PDF file with public read access
    pdf_path = bill_details["pdf_path"]
    pdf_key = f"pdfs/{pdf_path.split('/')[-1]}"
    with open(pdf_path, 'rb') as pdf_file:
        s3_client.upload_fileobj(
            pdf_file,
            bucket_name,
            pdf_key,
            ExtraArgs={'ACL': 'public-read'}  # Set ACL to 'public-read'
        )

    # Prepare and upload the bill details as a JSON file with public read access
    json_details = bill_details.copy()
    json_details["pdf_path"] = f"s3://{bucket_name}/{pdf_key}"  # Update the PDF path to the S3 location
    json_key = f"bill_details/{pdf_path.split('/')[-1].replace('.pdf', '.json')}"
    s3_client.put_object(
        Body=json.dumps(json_details),
        Bucket=bucket_name,
        Key=json_key,
        ACL='public-read'  # Set ACL to 'public-read'
    )

    return {
        "pdf_s3_path": json_details["pdf_path"],
        "json_s3_path": f"s3://{bucket_name}/{json_key}"
    }

# Example usage
bucket_name = 'ddp-bills'
bill_page_url = "https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText"
bill_details = fetch_bill_details(bill_page_url)
s3_paths = upload_to_s3(bucket_name, bill_details)
print(s3_paths)


{'pdf_s3_path': 's3://ddp-bills/pdfs/bill_text.pdf', 'json_s3_path': 's3://ddp-bills/bill_details/bill_text.json'}


In [31]:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

# Define the SQLAlchemy model for the 'bill' table
Base = declarative_base()
class Bill(Base):
    __tablename__ = 'bill'

    id = Column(Integer, primary_key=True)
    govId = Column(String)
    billTextPath = Column(String)

def connect_to_db(host, database, user, password, port=3306):
    """
    Establishes a connection to the MariaDB database and returns a session.
    """
    try:
        # Create a SQLAlchemy database engine with host and port
        db_url = f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}"
        engine = create_engine(db_url, echo=True)  # Set echo to True for debugging

        # Create a session
        Session = sessionmaker(bind=engine)
        session = Session()

        print("Connected to MariaDB database")
        return session
    except Exception as e:
        print(f"Error: {e}")
        return None

def insert_bill(session, govId, billTextPath):
    """
    Inserts a new bill into the 'bill' table using SQLAlchemy.
    """
    try:
        # Create a new Bill instance with the S3 URL
        new_bill = Bill(govId=govId, billTextPath=billTextPath)

        # Add the new_bill to the session and commit it
        session.add(new_bill)
        session.commit()

        print("Bill inserted successfully.")
        return new_bill.id
    except Exception as e:
        print(f"Error: {e}")
        return None

# Database connection details (including the default port 3306)
db_host = 'ddp-api.czqcac8oivov.us-east-1.rds.amazonaws.com'
db_name = 'digital_democracy'
db_user = 'DataWithAlex'
db_password = '%Mineguy29'  # Replace with your actual database password
db_port = 3306  # Specify the port if needed, otherwise omit this line

# Connect to the database
session = connect_to_db(db_host, db_name, db_user, db_password, db_port)

if session:
    # Example bill details
    bill_page_url = "https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText"
    bill_details = fetch_bill_details(bill_page_url)
    
    # Construct the S3 URL for billTextPath
    s3_url = f"https://ddp-bills.s3.amazonaws.com/bill_details/{bill_details['pdf_path'].split('/')[-1].replace('.pdf', '.json')}"
    
    # Insert the bill into the database with the S3 URL as billTextPath
    bill_id = insert_bill(session, "test", s3_url)

    # Close the database session
    session.close()
    print("Database session closed.")
else:
    print("Database connection not established.")

Connected to MariaDB database
2024-01-22 16:12:56,405 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2024-01-22 16:12:56,407 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 16:12:56,677 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2024-01-22 16:12:56,678 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 16:12:56,827 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2024-01-22 16:12:56,828 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 16:12:56,916 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-01-22 16:12:57,013 INFO sqlalchemy.engine.Engine INSERT INTO bill (`govId`, `billTextPath`) VALUES (%(govId)s, %(billTextPath)s)
2024-01-22 16:12:57,013 INFO sqlalchemy.engine.Engine [generated in 0.09185s] {'govId': 'test', 'billTextPath': 'https://ddp-bills.s3.amazonaws.com/bill_details/bill_text.json'}
2024-01-22 16:12:57,060 INFO sqlalchemy.engine.Engine COMMIT
Bill inserted successfully.
2024-01-22 16:12:57,159 INFO sqlalchemy.engine.Engine BEGIN (implic

In [16]:
%pip install mysql.connector

Collecting mysql.connector
  Downloading mysql-connector-2.2.9.tar.gz (11.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.9/11.9 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: mysql.connector
  Building wheel for mysql.connector (setup.py) ... [?25ldone
[?25h  Created wheel for mysql.connector: filename=mysql_connector-2.2.9-cp310-cp310-macosx_10_9_x86_64.whl size=247958 sha256=09d3cc40e3ae7df11c11acb33783c40e7e89ea02f6d861990034e032fefac7b5
  Stored in directory: /Users/alexsciuto/Library/Caches/pip/wheels/3e/7f/a3/928a40d1ffd01dd16057f308042377afc505d56145375adf40
Successfully built mysql.connector
Installing collected packages: mysql.connector
Successfully installed mysql.connector-2.2.9
Note: you may need to restart the kernel to use updated packages.


In [None]:
"https://ddp-bills.s3.amazonaws.com/bill_details/bill_text.json"

In [18]:
%pip install mysql-connector-python


Collecting mysql-connector-python
  Downloading mysql_connector_python-8.3.0-py2.py3-none-any.whl (557 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.9/557.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: mysql-connector-python
Successfully installed mysql-connector-python-8.3.0
Note: you may need to restart the kernel to use updated packages.


In [21]:
%pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [29]:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

# Define the SQLAlchemy model for the 'bill' table
Base = declarative_base()
class Bill(Base):
    __tablename__ = 'bill'

    id = Column(Integer, primary_key=True)
    govId = Column(String)
    billTextPath = Column(String)

def connect_to_db(host, database, user, password, port=3306):
    """
    Establishes a connection to the MariaDB database and returns a session.
    """
    try:
        # Create a SQLAlchemy database engine with host and port
        db_url = f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}"
        engine = create_engine(db_url, echo=True)  # Set echo to True for debugging

        # Create a session
        Session = sessionmaker(bind=engine)
        session = Session()

        print("Connected to MariaDB database")
        return session
    except Exception as e:
        print(f"Error: {e}")
        return None

def insert_bill(session, govId, billTextPath):
    """
    Inserts a new bill into the 'bill' table using SQLAlchemy.
    """
    try:
        # Create a new Bill instance
        new_bill = Bill(govId=govId, billTextPath=billTextPath)

        # Add the new_bill to the session and commit it
        session.add(new_bill)
        session.commit()

        print("Bill inserted successfully.")
        return new_bill.id
    except Exception as e:
        print(f"Error: {e}")
        return None

# Database connection details (including the default port 3306)
db_host = 'ddp-api.czqcac8oivov.us-east-1.rds.amazonaws.com'
db_name = 'digital_democracy'
db_user = 'DataWithAlex'
db_password = '%Mineguy29'  # Replace with your actual database password
db_port = 3306  # Specify the port if needed, otherwise omit this line

# Connect to the database
session = connect_to_db(db_host, db_name, db_user, db_password, db_port)

if session:
    # Example bill details
    bill_page_url = "https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText"
    bill_details = fetch_bill_details(bill_page_url)
    s3_paths = upload_to_s3('ddp-bills', bill_details)

    # Insert the bill into the database
    bill_id = insert_bill(session, "test", s3_paths["pdf_s3_path"])

    # Close the database session
    session.close()
    print("Database session closed.")
else:
    print("Database connection not established.")


Connected to MariaDB database
2024-01-22 15:50:18,075 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2024-01-22 15:50:18,076 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 15:50:18,367 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2024-01-22 15:50:18,368 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 15:50:18,507 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2024-01-22 15:50:18,508 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 15:50:18,603 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-01-22 15:50:18,705 INFO sqlalchemy.engine.Engine INSERT INTO bill (`govId`, `billTextPath`) VALUES (%(govId)s, %(billTextPath)s)
2024-01-22 15:50:18,706 INFO sqlalchemy.engine.Engine [generated in 0.09975s] {'govId': 'test', 'billTextPath': 's3://ddp-bills/pdfs/bill_text.pdf'}
2024-01-22 15:50:18,761 INFO sqlalchemy.engine.Engine COMMIT
Bill inserted successfully.
2024-01-22 15:50:18,856 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-01-22 15:50:18,950 I

In [32]:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

# Define the SQLAlchemy model for the 'bill' table
Base = declarative_base()
class Bill(Base):
    __tablename__ = 'bill'

    id = Column(Integer, primary_key=True)
    govId = Column(String)
    billTextPath = Column(String)

def connect_to_db(host, database, user, password, port=3306):
    """
    Establishes a connection to the MariaDB database and returns a session.
    """
    try:
        # Create a SQLAlchemy database engine with host and port
        db_url = f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}"
        engine = create_engine(db_url, echo=True)  # Set echo to True for debugging

        # Create a session
        Session = sessionmaker(bind=engine)
        session = Session()

        print("Connected to MariaDB database")
        return session
    except Exception as e:
        print(f"Error: {e}")
        return None

def insert_bill(session, govId, billTextPath):
    """
    Inserts a new bill into the 'bill' table using SQLAlchemy.
    """
    try:
        # Create a new Bill instance with the S3 URL
        new_bill = Bill(govId=govId, billTextPath=billTextPath)

        # Add the new_bill to the session and commit it
        session.add(new_bill)
        session.commit()

        print("Bill inserted successfully.")
        return new_bill.id
    except Exception as e:
        print(f"Error: {e}")
        return None

# Database connection details (including the default port 3306)
db_host = 'ddp-api.czqcac8oivov.us-east-1.rds.amazonaws.com'
db_name = 'digital_democracy'
db_user = 'DataWithAlex'
db_password = '%Mineguy29'  # Replace with your actual database password
db_port = 3306  # Specify the port if needed, otherwise omit this line

# Connect to the database
session = connect_to_db(db_host, db_name, db_user, db_password, db_port)

if session:
    # Example bill details
    bill_page_url = "https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText"
    bill_details = fetch_bill_details(bill_page_url)
    
    # Construct the S3 URL for billTextPath
    s3_url = f"https://ddp-bills.s3.amazonaws.com/bill_details/{bill_details['pdf_path'].split('/')[-1].replace('.pdf', '.json')}"
    
    # Insert the bill into the database with the S3 URL as billTextPath
    bill_id = insert_bill(session, "test", s3_url)

    # Close the database session
    session.close()
    print("Database session closed.")
else:
    print("Database connection not established.")


Connected to MariaDB database
2024-01-22 16:20:18,625 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2024-01-22 16:20:18,626 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 16:20:18,898 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2024-01-22 16:20:18,899 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 16:20:19,032 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2024-01-22 16:20:19,033 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-01-22 16:20:19,119 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-01-22 16:20:19,212 INFO sqlalchemy.engine.Engine INSERT INTO bill (`govId`, `billTextPath`) VALUES (%(govId)s, %(billTextPath)s)
2024-01-22 16:20:19,213 INFO sqlalchemy.engine.Engine [generated in 0.09140s] {'govId': 'test', 'billTextPath': 'https://ddp-bills.s3.amazonaws.com/bill_details/bill_text.json'}
2024-01-22 16:20:19,260 INFO sqlalchemy.engine.Engine COMMIT
Bill inserted successfully.
2024-01-22 16:20:19,353 INFO sqlalchemy.engine.Engine BEGIN (implic

In [23]:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

# Define the SQLAlchemy model for the 'bill' table
Base = declarative_base()
class Bill(Base):
    __tablename__ = 'bill'

    id = Column(Integer, primary_key=True)
    govId = Column(String)
    billTextPath = Column(String)

def connect_to_db(host, database, user, password):
    """
    Establishes a connection to the MariaDB database and returns a session.
    """
    try:
        # Create a SQLAlchemy database engine
        db_url = f"mysql+mysqlconnector://{user}:{password}@{host}/{database}"
        engine = create_engine(db_url, echo=True)  # Set echo to True for debugging

        # Create a session
        Session = sessionmaker(bind=engine)
        session = Session()

        print("Connected to MariaDB database")
        return session
    except Exception as e:
        print(f"Error: {e}")
        return None

def insert_bill(session, govId, billTextPath):
    """
    Inserts a new bill into the 'bill' table using SQLAlchemy.
    """
    try:
        # Create a new Bill instance
        new_bill = Bill(govId=govId, billTextPath=billTextPath)

        # Add the new_bill to the session and commit it
        session.add(new_bill)
        session.commit()

        print("Bill inserted successfully.")
        return new_bill.id
    except Exception as e:
        print(f"Error: {e}")
        return None

# Database connection details
db_host = 'ddp-api.czqcac8oivov.us-east-1.rds.amazonaws.com'
db_name = 'digital_democracy'
db_user = 'DataWithAlex'
db_password = '%Mineguy29'  # Replace with your actual database password

# Connect to the database
session = connect_to_db(db_host, db_name, db_user, db_password)

# Example bill details
bill_page_url = "https://www.flsenate.gov/Session/Bill/2023/23/ByCategory/?Tab=BillText"
bill_details = fetch_bill_details(bill_page_url)
s3_paths = upload_to_s3('ddp-bills', bill_details)

# Insert the bill into the database
bill_id = insert_bill(session, "test-2", s3_paths["pdf_s3_path"])

# Close the database session
session.close()
print("Database session closed.")


Connected to MariaDB database
(Background on this error at: https://sqlalche.me/e/14/rvf5)
Database session closed.


In [17]:
import mysql.connector

def insert_bill_data(gov_id, s3_path):
    conn = mysql.connector.connect(
        host='hostname', 
        database='digital_democracy', 
        user='username', 
        password='password'
    )
    cursor = conn.cursor()

    insert_bill = "INSERT INTO bill (govId, billTextPath) VALUES (%s, %s)"
    bill_data = (gov_id, s3_path)

    cursor.execute(insert_bill, bill_data)
    conn.commit()

    cursor.close()
    conn.close()


In [None]:
import boto3

# Create an S3 client
s3 = boto3.client('s3')

# Your S3 Bucket Name
bucket_name = 'your-bucket-name'

# The text you want to store
text_data = 'This is the text I want to store in S3'

# The S3 object key (similar to filename)
object_key = 'my-text-data.txt'

# Uploading the text to S3
s3.put_object(Body=text_data, Bucket=bucket_name, Key=object_key)
