# Executive Orders Scraping

## 0- Preparation

In [1]:
# Import Python Packages
import os
import requests
import re
import json
import pandas as pd


## 1- Retrieving Data via API

In [2]:
# Base URL for the Federal Register API
base_url = 'https://www.federalregister.gov/api/v1/documents.json'

# Query Parameters
params = {
    "fields[]": [
        "title",
        "toc_subject",
        "disposition_notes", # fetch notes
        "document_number",
        "executive_order_number",
        "pdf_url",
        "raw_text_url",  # Fetch raw text URL for full body text
        "presidential_document_number",
        "president",  # Include president in the query
        "publication_date",  # Include publication date
        "signing_date",  # Include signing date
        "citation"  # Include citation (EO_Citation)
    ],
    "per_page": 200,  # max 1000
    "order": "newest",
    "conditions[agencies][]": "executive-office-of-the-president",
    "conditions[type][]": "PRESDOCU",
    "conditions[presidential_document_type][]": "executive_order",
    "conditions[president][]": "",
    "page": 1  # Start at page 1
}


In [3]:
# Directory to save downloaded executive orders
download_dir = os.getcwd()

# Create the directory if it does not exist
if not os.path.exists(download_dir):
    os.makedirs(download_dir)


In [4]:
# Function to sanitize filenames by removing invalid characters and truncating to 100 characters
def sanitize_filename(filename):
    sanitized = re.sub(r'[\\/*?:"<>|]', '', filename)  # Remove invalid characters
    return sanitized[:100]  # Truncate to 100 characters

# Function to fetch full text from the raw_text_url
def fetch_full_text(text_url):
    response = requests.get(text_url)
    if response.status_code == 200:
        return response.text
    else:
        print(f'Failed to fetch full text from {text_url}')
        return None
    

In [5]:
# List to hold all the data
documents_data = []
page_number = 1  # Track page number for progress

# Pagination loop for API requests
while True:
    print(f"Fetching data from page {page_number}...")
    response = requests.get(base_url, params=params)
    data = response.json()

    # Break if there are no more results
    if 'results' not in data or not data['results']:
        print("No more results. Exiting.")
        break

    # Loop through each document and organize data
    for i, document in enumerate(data['results'], start=1):
        title = document['title']
        toc_subject = document.get('toc_subject')
        disposition_notes = document.get('disposition_notes')
        pdf_url = document.get('pdf_url')
        raw_text_url = document.get('raw_text_url')
        document_number = document.get('document_number')
        executive_order_number = document.get('executive_order_number')
        presidential_document_number = document.get('presidential_document_number')
        president = document.get('president')
        publication_date = document.get('publication_date')
        signing_date = document.get('signing_date')
        EO_Citation = document.get('citation')

        # Fetch the full body text from the raw text URL
        full_text = fetch_full_text(raw_text_url) if raw_text_url else "No full text available"

        # Organize data into a dictionary
        document_info = {
            "title": title,
            "president": president,
            "publication_date": publication_date,
            "signing_date": signing_date,
            "citation": EO_Citation,
            "document_number": document_number,
            "executive_order_number": executive_order_number,
            "pdf_url": pdf_url,
            "toc_subject": toc_subject,
            "disposition_notes": disposition_notes,
            "full_text": full_text
        }

        # Add to list of documents
        documents_data.append(document_info)
        
        # Print progress update for each document
        print(f"Processed document {i} on page {page_number}: {title}")

    # Move to the next page
    params["page"] += 1
    page_number += 1

# Convert list of dictionaries to a pandas DataFrame
df = pd.DataFrame(documents_data)
      

Fetching data from page 1...
Processed document 1 on page 1: Combating Emerging Firearms Threats and Improving School-Based Active-Shooter Drills
Processed document 2 on page 1: Investing in America and Investing in American Workers
Processed document 3 on page 1: Establishing an Emergency Board To Investigate a Dispute Between New Jersey Transit Rail Operations and Its Locomotive Engineers Represented by the Brotherhood of Locomotive Engineers and Trainmen
Processed document 4 on page 1: White House Initiative on Advancing Educational Equity, Excellence, and Economic Opportunity Through Hispanic-Serving Institutions
Processed document 5 on page 1: White House Council on Supply Chain Resilience
Processed document 6 on page 1: COVID-19 and Public Health Preparedness and Response
Processed document 7 on page 1: Recognizing and Honoring Women's History
Processed document 8 on page 1: Advancing Women's Health Research and Innovation
Processed document 9 on page 1: Scaling and Expanding the

Processed document 69 on page 1: Imposing Sanctions on Foreign Persons Involved in the Global Illicit Drug Trade
Processed document 70 on page 1: Transforming Federal Customer Experience and Service Delivery To Rebuild Trust in Government
Processed document 71 on page 1: Catalyzing Clean Energy Industries and Jobs Through Federal Sustainability
Processed document 72 on page 1: The National Space Council
Processed document 73 on page 1: Nondisplacement of Qualified Workers Under Service Contracts
Processed document 74 on page 1: Termination of Emergency With Respect to the Situation in Burundi
Processed document 75 on page 1: Improving Public Safety and Criminal Justice for Native Americans and Addressing the Crisis of Missing or Murdered Indigenous People
Processed document 76 on page 1: Implementation of the Infrastructure Investment and Jobs Act
Processed document 77 on page 1: Designation To Exercise Authority Over the National Defense Stockpile
Processed document 78 on page 1: Whit

Processed document 142 on page 1: Ensuring a Lawful and Accurate Enumeration and Apportionment Pursuant to the Decennial Census
Processed document 143 on page 1: Advancing Racial Equity and Support for Underserved Communities Through the Federal Government
Processed document 144 on page 1: Taking Additional Steps To Address the National Emergency With Respect to Significant Malicious Cyber- Enabled Activities
Processed document 145 on page 1: Revocation of Executive Order 13770
Processed document 146 on page 1: Care of Veterans With Service in Uzbekistan
Processed document 147 on page 1: Protecting the United States From Certain Unmanned Aircraft Systems
Processed document 148 on page 1: Protecting Americans From Overcriminalization Through Regulatory Reform
Processed document 149 on page 1: Ensuring Democratic Accountability in Agency Rulemaking
Processed document 150 on page 1: Building the National Garden of American Heroes
Processed document 151 on page 1: Protecting Law Enforcemen

Processed document 15 on page 2: Encouraging International Support for the Recovery and Use of Space Resources
Processed document 16 on page 2: Establishing the Committee for the Assessment of Foreign Participation in the United States Telecommunications Services Sector
Processed document 17 on page 2: National Emergency Authority To Order the Selected Reserve and Certain Members of the Individual Ready Reserve of the Armed Forces to Active Duty
Processed document 18 on page 2: Delegating Additional Authority Under the Defense Production Act With Respect to Health and Medical Resources To Respond to the Spread of COVID-19
Processed document 19 on page 2: Preventing Hoarding of Health and Medical Resources To Respond to the Spread of COVID-19
Processed document 20 on page 2: Prioritizing and Allocating Health and Medical Resources to Respond to the Spread of COVID-19
Processed document 21 on page 2: Establishment of the Interagency Committee on Trade in Automotive Goods Under Section 20

Processed document 88 on page 2: Affording Congress an Opportunity To Address Family Separation
Processed document 89 on page 2: Ocean Policy To Advance the Economic, Security, and Environmental Interests of the United States
Processed document 90 on page 2: Promoting Accountability and Streamlining Removal Procedures Consistent With Merit System Principles
Processed document 91 on page 2: Exemption From Executive Order 13658 for Recreational Services on Federal Lands
Processed document 92 on page 2: Ensuring Transparency, Accountability, and Efficiency in Taxpayer-Funded Union Time Use
Processed document 93 on page 2: Developing Efficient, Effective, and Cost- Reducing Approaches To Federal Sector Collective Bargaining
Processed document 94 on page 2: Prohibiting Certain Additional Transactions With Respect to Venezuela
Processed document 95 on page 2: Efficient Federal Operations
Processed document 96 on page 2: Enhancing the Effectiveness of Agency Chief Information Officers
Process

Processed document 167 on page 2: Providing an Order of Succession Within the Department of Justice
Processed document 168 on page 2: Recognizing Positive Actions by the Government of Sudan and Providing for the Revocation of Certain Sudan-Related Sanctions
Processed document 169 on page 2: Exclusions From the Federal Labor-Management Relations Program
Processed document 170 on page 2: Designating the World Organisation for Animal Health as a Public International Organization Entitled To Enjoy Certain Privileges, Exemptions, and Immunities
Processed document 171 on page 2: Amending Executive Order 11016 To Update Eligibility Criteria for Award of the Purple Heart
Processed document 172 on page 2: Taking Additional Steps to Address the National Emergency With Respect to Significant Malicious Cyber- Enabled Activities
Processed document 173 on page 2: Adjustments of Certain Rates of Pay
Processed document 174 on page 2: Providing an Order of Succession Within the Department of Labor
Proc

Processed document 34 on page 3: Hostage Recovery Activities
Processed document 35 on page 3: Amendment to Executive Order 11155, Awards for Special Capability in Career and Technical Education
Processed document 36 on page 3: 2015 Amendments to the Manual for Courts-Martial, United States
Processed document 37 on page 3: Termination of Emergency With Respect to the Risk of Nuclear Proliferation Created by the Accumulation of a Large Volume of Weapons-Usable Fissile Material in the Territory of the Russian Federation
Processed document 38 on page 3: Blocking the Property of Certain Persons Engaging in Significant Malicious Cyber-Enabled Activities
Processed document 39 on page 3: Planning for Federal Sustainability in the Next Decade
Processed document 40 on page 3: Blocking Property and Suspending Entry of Certain Persons Contributing to the Situation in Venezuela
Processed document 41 on page 3: Promoting Private Sector Cybersecurity Information Sharing
Processed document 42 on page 

Processed document 113 on page 3: Improving Access to Mental Health Services for Veterans, Service Members, and Military Families
Processed document 114 on page 3: Accelerating Investment in Industrial Energy Efficiency
Processed document 115 on page 3: Preventing and Responding to Violence Against Women and Girls Globally
Processed document 116 on page 3: Authorizing Additional Sanctions With Respect to Iran
Processed document 117 on page 3: White House Initiative on Educational Excellence for African Americans
Processed document 118 on page 3: Taking Additional Steps to Address the National Emergency With Respect to Somalia
Processed document 119 on page 3: Blocking Property of Persons Threatening the Peace, Security, or Stability of Burma
Processed document 120 on page 3: Assignment of National Security and Emergency Preparedness Communications Functions
Processed document 121 on page 3: Blocking Property of the Government of the Russian Federation Relating to the Disposition of Hig

Processed document 184 on page 3: Correction
Processed document 185 on page 3: Correction
Processed document 186 on page 3: Correction
Processed document 187 on page 3: Correction
Processed document 188 on page 3: Correction
Processed document 189 on page 3: White House Initiative on Educational Excellence for Hispanics
Processed document 190 on page 3: Establishing the Gulf Coast Ecosystem Restoration Task Force
Processed document 191 on page 3: Blocking Property of Certain Persons With Respect to Serious Human Rights Abuses by the Government of Iran and Taking Certain Other Actions
Processed document 192 on page 3: 2010 Amendments to the Manual for Courts-Martial, United States
Processed document 193 on page 3: Blocking Property of Certain Persons With Respect to North Korea
Processed document 194 on page 3: Establishment of Pakistan and Afghanistan Support Office
Processed document 195 on page 3: Classified National Security Information Program for State, Local, Tribal, and Private 

Processed document 69 on page 4: Amendments To Executive Order 9397 Relating To Federal Agency Use of Social Security Numbers
Processed document 70 on page 4: Settlement of Claims Against Libya
Processed document 71 on page 4: Facilitation of a Presidential Transition
Processed document 72 on page 4: Further Amendments To Executive Orders 12139 And 12949 In Light of the Foreign Intelligence Surveillance Act of 1978 Amendments Act of 2008
Processed document 73 on page 4: Amendments to Executive Order 12962
Processed document 74 on page 4: To Authorize Certain Noncompetitive Appointments in the Civil Service for Spouses of Certain Members of the Armed Forces
Processed document 75 on page 4: Executive Branch Responsibilities With Respect To Orders of Succession
Processed document 76 on page 4: Further Amendments To Executive Order 13285, President's Council On Service And Civic Participation
Processed document 77 on page 4: Further Amendments to Executive Order 12333, United States Intell

Processed document 139 on page 4: Amending Executive Order 13381, As Amended, To Extend its Duration by One Year
Processed document 141 on page 4: Protecting the Property Rights of the American People
Processed document 142 on page 4: Blocking Property of Certain Persons Undermining Democratic Processes or Institutions in Belarus
Processed document 143 on page 4: Task Force on New Americans
Processed document 144 on page 4: Amendments to Executive Orders 11030, 13279, 13339, 13381, and 13389, and Revocation of Executive Order 13011
Processed document 145 on page 4: Strengthening Federal Efforts To Protect Against Identity Theft
Processed document 146 on page 4: Responsibilities of Federal Departments and Agencies With Respect to Volunteer Community Service
Processed document 147 on page 4: Blocking Property of Persons in Connection With the Conflict in Sudan's Darfur Region
Processed document 148 on page 4: Blocking Property of Additional Persons in Connection With the National Emergen

Processed document 6 on page 5: Further Amendment to Executive Order 11023, Providing for the Performance by the Secretary of Commerce of Certain Functions Relating to the National Oceanic and Atmospheric Administration
Processed document 7 on page 5: Establishment of Great Lakes Interagency Task Force and Promotion of a Regional Collaboration of National Significance for the Great Lakes
Processed document 8 on page 5: Increasing Economic Opportunity and Business Participation of Asian Americans and Pacific Islanders
Processed document 9 on page 5: Blocking Property of Certain Persons and Prohibiting the Export of Certain Goods to Syria
Processed document 10 on page 5: Issuance of Permits With Respect to Certain Energy-Related Facilities and Land Transportation Crossings on the International Boundaries of the United States
Processed document 11 on page 5: American Indian and Alaska Native Education
Processed document 12 on page 5: Incentives for the Use of Health Information Technology

Processed document 77 on page 5: Establishment of the Corporate Fraud Task Force
Processed document 78 on page 5: Tribal Colleges and Universities
Processed document 79 on page 5: Expedited Naturalization of Aliens and Noncitizen Nationals Serving in an Active-Duty Status During the War on Terrorism
Processed document 80 on page 5: Termination of Emergency With Respect to the Taliban and Amendment of Executive Order 13224 of September 23, 2001
Processed document 81 on page 5: Establishing a Transition Planning Office for the Department of Homeland Security Within the Office of Management and Budget
Processed document 82 on page 5: Activities To Promote Personal Fitness
Processed document 83 on page 5: President's Council on Physical Fitness and Sports
Processed document 84 on page 5: Amendment to Executive Order 13180, Air Traffic Performance-Based Organization
Processed document 85 on page 5: President's New Freedom Commission on Mental Health
Processed document 86 on page 5: 2002 Ame

Processed document 147 on page 5: Revocation of Executive Order and Presidential Memorandum Concerning Labor-Management Partnerships
Processed document 148 on page 5: Preservation of Open Competition and Government Neutrality Towards Government Contractors'Labor Relations on Federal and Federally Funded Construction Projects
Processed document 149 on page 5: Notification of Employee Rights Concerning Payment of Union Dues or Fees
Processed document 150 on page 5: President's Information Technology Advisory Committee, Further Amendment to Executive Order 13035, as Amended
Processed document 151 on page 5: Establishment of White House Office of Faith- Based and Community Initiatives
Processed document 152 on page 5: Agency Responsibilities With Respect to Faith- Based and Community Initiatives
Processed document 153 on page 5: Governmentwide Accountability for Merit System Principles; Workforce Information
Processed document 154 on page 5: Final Northwestern Hawaiian Islands Coral Reef E

Processed document 21 on page 6: Blocking Property and Prohibiting Transactions With the Taliban
Processed document 22 on page 6: Implementation of the Chemical Weapons Convention and the Chemical Weapons Convention Implementation Act
Processed document 23 on page 6: Amendment to Executive Order 13073, Year 2000 Conversion
Processed document 24 on page 6: Prohibition of Acquisition of Products Produced by Forced or Indentured Child Labor
Processed document 25 on page 6: Increasing Participation of Asian Americans and Pacific Islanders in Federal Programs
Processed document 26 on page 6: Amending the Civil Service Rules Relating To Federal Employees With Psychiatric Disabilities
Processed document 27 on page 6: Greening the Government Through Efficient Energy Management
Processed document 28 on page 6: Interagency Task Force on the Economic Development of the Southwest Border
Processed document 29 on page 6: Blocking Property of the Governments of the Federal Republic of Yugoslavia (Ser

Processed document 94 on page 6: Further Amendment to Executive Order 13017, Advisory Commission on Consumer Protection and Quality in the Health Care Industry
Processed document 95 on page 6: Coordination of United States Government International Exchanges and Training Programs
Processed document 96 on page 6: Eligibility of Certain Overseas Employees for Noncompetitive Appointments
Processed document 97 on page 6: Adding Members to and Extending the President's Council on Sustainable Development
Processed document 98 on page 6: Hong Kong Economic and Trade Offices
Processed document 99 on page 6: Internal Revenue Service Management Board
Processed document 100 on page 6: President's Advisory Board on Race
Processed document 101 on page 6: Organization for the Prohibition of Chemical Weapons
Processed document 102 on page 6: Improving Administrative Management in the Executive Branch
Processed document 103 on page 6: Prohibiting New Investment in Burma
Processed document 104 on page 6

Processed document 160 on page 6: Adjustments of Rates of Pay and Allowances for the Uniformed Services, Amendment to Executive Order No. 12984
Processed document 161 on page 6: Economy and Efficiency in Government Procurement Through Compliance With Certain Immigration and Naturalization Act Provisions
Processed document 162 on page 6: Civil Justice Reform
Processed document 163 on page 6: Amendment to Executive Order No. 12964
Processed document 164 on page 6: International Union for Conservation of Nature and Natural Resources
Processed document 165 on page 6: Establishing the Armed Forces Service Medal
Processed document 166 on page 6: Adjustments of Certain Rates of Pay and Allowances
Processed document 167 on page 6: Amendment to Executive Order No. 12871
Processed document 168 on page 6: Ordering the Selected Reserve of the Armed Forces to Active Duty
Processed document 169 on page 6: Administration of Export Controls
Processed document 170 on page 6: Further Amendment to Execut

Processed document 43 on page 7: Order of Succession of Officers To Act as Secretary of the Army
Processed document 44 on page 7: Amending Executive Order No. 12882
Processed document 45 on page 7: Coordinating Geographic Data Acquisition and Access: The National Spatial Data Infrastructure
Processed document 46 on page 7: Trade and Environment Policy Advisory Committee
Processed document 47 on page 7: Commission for Environmental Cooperation, Commission for Labor Cooperation, Border Environment Cooperation Commission, and North American Development Bank
Processed document 48 on page 7: Nuclear Cooperation With EURATOM
Processed document 49 on page 7: Energy Efficiency and Water Conservation at Federal Facilities
Processed document 50 on page 7: Identification of Trade Expansion Priorities
Processed document 51 on page 7: Establishing an Emergency Board To Investigate a Dispute Between The Long Island Rail Road and Certain of Its Employees Represented by the United Transportation Union

## 2- Exporting data

In [7]:
# Export to JSON
json_file = os.path.join(download_dir, 'eos_raw.json')
with open(json_file, 'w', encoding='utf-8') as jsonf:
    json.dump(documents_data, jsonf, ensure_ascii=False, indent=4)
print(f'Data exported')


Data exported
