## Extraction of BIS List changes 2024 to 2017

In [3]:
import pandas as pd
import os
import requests
import datetime
from bs4 import BeautifulSoup
import zipfile

In [4]:
url = 'https://www.ecfr.gov/api/renderer/v1/content/enhanced/2024-01-31/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744'
response = requests.get(url)
response

<Response [200]>

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')
soup

<div class="appendix" id="Supplement-No.-4-to-Part-744">
<h4 data-hierarchy-metadata='{"path":"/on/2024-01-31/title-15/part-744/appendix-Supplement No. 4 to Part 744","citation":"Supplement No. 4 to Part 744, Title 15"}'>Supplement No. 4 to Part 744—Entity List
</h4>
<p>This Supplement lists certain entities subject to license requirements for specified items under this parts 744 and 746 of the EAR. License requirements for these entities include exports, reexports, and transfers (in-country) unless otherwise stated. A license is required, to the extent specified on the Entity List, to export, reexport, or transfer (in-country) any item subject to the EAR when an entity that is listed on the Entity List is a party to the transaction as described in § 748.5(c) through (f) of the EAR. See <a class="cfr external" href="/on/2024-01-31/title-15/section-744.11">§ 744.11</a> for licensing requirements in the context of a “standards-related activity”. This list of entities is revised and updat

In [6]:
# now construct the urls for all months between 2017 and 2024

date_list =pd.date_range(start='20170316',end='20241231',freq='Y').strftime('%Y-%m-%d')
print(date_list)

websites = []
for i in date_list:
    url = "https://www.ecfr.gov/api/renderer/v1/content/enhanced/"+(i)+"/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744"
    websites.append(url)
print(websites)

Index(['2017-12-31', '2018-12-31', '2019-12-31', '2020-12-31', '2021-12-31',
       '2022-12-31', '2023-12-31', '2024-12-31'],
      dtype='object')
['https://www.ecfr.gov/api/renderer/v1/content/enhanced/2017-12-31/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744', 'https://www.ecfr.gov/api/renderer/v1/content/enhanced/2018-12-31/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744', 'https://www.ecfr.gov/api/renderer/v1/content/enhanced/2019-12-31/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744', 'https://www.ecfr.gov/api/renderer/v1/content/enhanced/2020-12-31/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744', 'https://www.ecfr.gov/api/renderer/v1/content/enhanced/2021-12-31/title-15?subtitle=B&chapter=VII&subchapter=C&part=744&appendix=Supplement%20No.%204%20to%20Part%20744', 

In [7]:
folder_name = "bis"
os.makedirs(folder_name, exist_ok=True)

# Now build the loop
bis_list = []
for i, site in enumerate(websites, start=1):
    response = requests.get(site)
    if response.status_code != 200:
        print(f'Skipping {site}')
        continue
    
    soup = BeautifulSoup(response.content, 'html.parser')
    bis_list.append(soup)

    # Save the soup content as a text file inside 'bis' folder
    filename = os.path.join(folder_name, f"{i}.txt")
    with open(filename, "w", encoding="utf-8") as file:
        file.write(str(soup))

    print(f'Saved {filename} and moving on')


zip_filename = "bis.zip"
with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(folder_name):
        for file in files:
            file_path = os.path.join(root, file)
            zipf.write(file_path, os.path.relpath(file_path, folder_name))

print(f"Zipped folder saved as {zip_filename}")


Saved bis/1.txt and moving on
Saved bis/2.txt and moving on
Saved bis/3.txt and moving on
Saved bis/4.txt and moving on
Saved bis/5.txt and moving on
Saved bis/6.txt and moving on
Saved bis/7.txt and moving on
Saved bis/8.txt and moving on
Zipped folder saved as bis.zip
