In [None]:
##!pip install pdfplumber

In [1]:
import zipfile
import pdfplumber
import os
from io import BytesIO

def extract_zip(uploaded_zip_path, extract_to="temp_pdfs"):
    """Extracts a zipped folder containing PDFs."""
    
    os.makedirs(extract_to, exist_ok=True)  # Ensure extraction folder exists
    
    with zipfile.ZipFile(uploaded_zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Walk through the extracted folder to find all PDFs
    pdf_files = []
    for root, _, files in os.walk(extract_to):
        for file in files:
            if file.lower().endswith(".pdf"):
                pdf_files.append(os.path.join(root, file))

    # Debugging: Print extracted files
    if not pdf_files:
        print("No PDFs found in the extracted zip folder.")
    else:
        print(f"Extracted {len(pdf_files)} PDFs: {pdf_files}")

    return pdf_files

def pdf_to_markdown_string(pdf_path):
    """Converts a single PDF file to a Markdown string."""
    md_content = f"# Extracted Content from {os.path.basename(pdf_path)}\n\n"
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                md_content += text + "\n\n"
    return md_content

def process_zipped_pdfs(zip_path):
    """Processes a zipped folder of PDFs and converts them to a list of Markdown-formatted strings."""
    pdf_files = extract_zip(zip_path)
    markdown_strings = [pdf_to_markdown_string(pdf) for pdf in pdf_files]
    return markdown_strings

# Example usage
zip_file_path = "C:/Users/tomas/Downloads/Legal Tactics Book-20250308T231558Z-001.zip"  # Replace with your  zip file path
markdown_list = process_zipped_pdfs(zip_file_path)





print(markdown_list)  

Extracted 18 PDFs: ['temp_pdfs\\Legal Tactics Book\\03 Security Deposits 2025.pdf', 'temp_pdfs\\Legal Tactics Book\\04 Tenancies 2025.pdf', 'temp_pdfs\\Legal Tactics Book\\05 Rent 2025 v2.pdf', 'temp_pdfs\\Legal Tactics Book\\06 Utilities 2025_0.pdf', 'temp_pdfs\\Legal Tactics Book\\17 Condos_2025_chapter.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-1-moving-in.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-10-getting-organized.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-11-moving-out.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-12-evictions.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-13-taking-landlord-to-court.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-14-using-court-system.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-15-rooming-houses.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-16-mobile-homes.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-18-foreclosures.pdf', 'temp_pdfs\\Legal Tactics Book\\lt1-chapter-2-tenant-screening.pdf', 'temp_pd

In [2]:
chaps = {}

chaps['Security Deposits'] = markdown_list[0].split("Private Programs", 1)[1]

chaps["Tenancy Types"] = markdown_list[1].split("Other Types of Non-Traditional Housing .. 12",1)[1]

chaps['Rent'] = markdown_list[2].split("Refuse and Move",1)[1]

chaps["Utilities"] = markdown_list[3].split("Special Rules for Tenants Living in\nCondominiums ............................................ 30",1)[1]

chaps['Condominium Control'] =  markdown_list[4].split("Protections for Tenants Living in\nCondos ...................................................... 22",1)[1]

chaps['Before You Move In'] =  markdown_list[5].split("What If the Landlord Backs Out ............................................... 15",1)[1]

chaps['Getting Organized'] =  markdown_list[6].split("How to Run a Good Meeting ................................................... 228",1)[1]

chaps["Moving Out"] =  markdown_list[7].split("What to Do Before Moving Out ............................................... 243",1)[1]

chaps['Evictions'] =  markdown_list[8].split("Eviction Timeline .................................................................. 283",1)[1]

chaps['When to take landlord to court'] = markdown_list[9].split("Failure to Provide Locks",1)[1]

chaps['Using the Court System'] =  markdown_list[10].split("Negotiating a Good Settlement .............................................. 346",1)[1]

chaps['Rooming Houses'] =  markdown_list[11].split("Eviction Hearing",1)[1]

chaps["Mobile Homes"] =  markdown_list[12].split("After Judgment",1)[1]

chaps['Tenants and Foreclosure'] =  markdown_list[13].split("Organizations that Help Tenants with Foreclosure ................. 428",1)[1]

chaps["Tenant Screening"]=  markdown_list[14].split("Background Checking Fees ...................................................... 34",1)[1]

chaps["Discrimination"] =  markdown_list[15].split("File a Complaint",1)[1]

chaps["Getting Repairs Made"] =  markdown_list[16].split("Moving Expenses and Other Help",1)[1]

chaps["Lead Poisoning"] =  markdown_list[17].split("Financial Assistance for Deleading ......................................... 193",1)[1]

