In [5]:
import fitz  # PyMuPDF
import os

# Load the PDF file
pdf_path = "chapter-7-taxation-2024.pdf"
pdf = fitz.open(pdf_path)

# Find the start of each ARTICLE and capture the ARTICLE name
article_starts = []  # List to store the start page of each ARTICLE
article_names = []  # List to store the names of the ARTICLES
for i, page in enumerate(pdf):
    # Extract the text and search for "ARTICLE" with a number or letter
    text = page.get_text("text")
    if "ARTICLE" in text:
        lines = text.split("\n")
        for line in lines:
            if "ARTICLE" in line:
                words = line.split()
                if len(words) > 1 and words[0] == "ARTICLE":
                    article_name = words[1]  # Get the number/letter after "ARTICLE"
                    if article_name.isdigit() or (article_name[0].isdigit() and article_name[1:].isalpha()):
                        # If found, add the index of the page and the ARTICLE name
                        article_starts.append(i)
                        article_names.append(article_name)
                        break

# Determine the end of each ARTICLE and create separate PDFs
num_articles = len(article_starts)
output_dir = "articles output"  # Output directory with spaces
os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist

for i in range(num_articles):
    start_page = article_starts[i]
    end_page = article_starts[i + 1] if i < num_articles - 1 else len(pdf)

    # Create a new PDF document for the ARTICLE
    article_pdf = fitz.open()  # Create a new PDF object
    article_pdf.insert_pdf(pdf, from_page=start_page, to_page=end_page - 1)  # Add the specified page range

    # Save the new ARTICLE PDF with its ARTICLE name (with spaces instead of underscores)
    article_name = f"ARTICLE {article_names[i]}"
    article_path = os.path.join(output_dir, f"{article_name}.pdf")
    article_pdf.save(article_path)  # Save the new PDF without "incremental" option
    article_pdf.close()  # Close the new PDF document

# Close the original PDF
pdf.close()

print(f"PDFs created for {num_articles} articles in '{output_dir}' with names based on ARTICLE number (with spaces).")


PDFs created for 59 articles in 'articles output' with names based on ARTICLE number (with spaces).


In [6]:
import fitz  # PyMuPDF
import pandas as pd  # For handling CSV
import os

# Load the PDF file
pdf_path = "chapter-7-taxation-2024.pdf"
pdf = fitz.open(pdf_path)

# List to store data for the table
data = []

# Find the start of each ARTICLE and capture the ARTICLE name and short title
for i, page in enumerate(pdf):
    # Extract the text
    text = page.get_text("text")
    lines = text.split("\n")
    for j, line in enumerate(lines):
        if "ARTICLE" in line:
            words = line.split()
            if len(words) > 1 and words[0] == "ARTICLE":
                article_number = words[1]
                if article_number.isdigit() or (article_number[0].isdigit() and article_number[1:].isalpha()):
                    # Get the next line for the short title
                    if j + 1 < len(lines):
                        short_title = lines[j + 1].strip()  # The next line after "ARTICLE"
                    else:
                        short_title = "No Title"  # If there's no next line, set a default
                    data.append({
                        "ARTICLE Number": article_number,
                        "Short Title": short_title
                    })
                    break

# Create a DataFrame for the table
df = pd.DataFrame(data)

# Output directory
output_dir = "output_tables"
os.makedirs(output_dir, exist_ok=True)

# Save the table as a CSV file
csv_path = os.path.join(output_dir, "article_titles.csv")
df.to_csv(csv_path, index=False)

print(f"Table with ARTICLE numbers and short titles saved to '{csv_path}'")


Table with ARTICLE numbers and short titles saved to 'output_tables\article_titles.csv'


In [7]:
!pip install fredapi

Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fredapi-0.5.2-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.2




In [11]:
from fredapi import Fred
import os

# Load environment variables
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())

# Your FRED API key
FRED_API_KEY = os.getenv("FRED_API_KEY")

# Initialize FRED API with your API key
fred = Fred(api_key=FRED_API_KEY)

# Fetch a specific time series (example: daily crude oil prices)
series_id = "DCOILWTICO"  # West Texas Intermediate (WTI) Crude Oil Prices
oil_data = fred.get_series(series_id)  # Gets the full historical data

# Output a sample of the historical data
print("Historical Crude Oil Prices:")
print(oil_data.head())  # Display the first few records

# For complete data, you can save to a CSV
oil_data.to_csv("crude_oil_prices.csv")

Historical Crude Oil Prices:
1986-01-02    25.56
1986-01-03    26.00
1986-01-06    26.53
1986-01-07    25.85
1986-01-08    25.87
dtype: float64
