In [2]:
import pandas as pd
df=pd.read_csv("/content/Interpol_Countries_with_Codes_Corrected.csv")

In [13]:
lis=df["Country Code"]
len(lis)

193

In [14]:
import requests
import math
import pandas as pd
import time
import os

# === Setup ===
BASE_URL = "https://ws-public.interpol.int/notices/v1/red"
HEADERS = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64)"}
COUNTRY_FILE = "/content/Interpol_Countries_with_Codes_Corrected.csv"

# === Delay Configuration ===
PAGE_DELAY = 1.5
RETRY_DELAY = 3
MAX_RETRIES = 3

# === Load country code mappings ===
country_df = pd.read_csv(COUNTRY_FILE)
code_to_country = dict(zip(country_df['Country Code'].str.strip(), country_df['Country'].str.strip()))

# === Country Codes Input ===
selected_country_codes = lis

# === Request Functions ===
def get_notices_page(page: int, country_code: str):
    url = f"{BASE_URL}?nationality={country_code}&page={page}&resultPerPage=20"
    for attempt in range(MAX_RETRIES):
        try:
            response = requests.get(url, headers=HEADERS, timeout=10)
            if response.status_code == 200 and "application/json" in response.headers.get("Content-Type", ""):
                return response.json()
            else:
                print(f"⚠️ Status {response.status_code} for {url}")
        except Exception as e:
            print(f"🔁 Retry {attempt+1}/{MAX_RETRIES} - Error fetching page {page}: {e}")
            time.sleep(RETRY_DELAY)
    return None

def get_self_data(self_link: str):
    for attempt in range(MAX_RETRIES):
        try:
            response = requests.get(self_link, headers=HEADERS, timeout=10)
            if response.status_code == 200 and "application/json" in response.headers.get("Content-Type", ""):
                return response.json()
        except Exception as e:
            print(f"🔁 Retry {attempt+1}/{MAX_RETRIES} - Error fetching self data: {e}")
            time.sleep(RETRY_DELAY)
    return {}

# === Extract + Transform ===
def extract_combined_data(notice: dict, detail: dict):
    arrest_warrant = detail.get("arrest_warrants", [{}])[0] if detail.get("arrest_warrants") else {}

    def get_country_name(code):
        return code_to_country.get(code, code)

    return {
        "entity_id": notice.get("entity_id", None),
        "forename": notice.get("forename", None),
        "name": notice.get("name", None),
        "date_of_birth": notice.get("date_of_birth", None),
        "nationalities": ", ".join(notice.get("nationalities", [])) if notice.get("nationalities") else None,
        "thumbnail": notice["_links"].get("thumbnail", {}).get("href", None),
        "images": notice["_links"].get("images", {}).get("href", None),
        "self_link": notice["_links"].get("self", {}).get("href", None),

        # Self-link fields
        "sex_id": detail.get("sex_id", None),
        "place_of_birth": detail.get("place_of_birth", None),
        "country_of_birth": get_country_name(detail.get("country_of_birth_id", None)),
        "eyes_colors_id": detail.get("eyes_colors_id", None),
        "hairs_id": detail.get("hairs_id", None),
        "height": detail.get("height", None),
        "weight": detail.get("weight", None),
        "languages_spoken_ids": ", ".join(detail.get("languages_spoken_ids", [])) if detail.get("languages_spoken_ids") else None,
        "distinguishing_marks": detail.get("distinguishing_marks", None),
        "arrest_charge": arrest_warrant.get("charge", None),
        "arrest_issuing_country": get_country_name(arrest_warrant.get("issuing_country_id", None)),
        "arrest_charge_translation": arrest_warrant.get("charge_translation", None)
    }

# === Loop through Selected Country Codes ===
for country_code in selected_country_codes:
    print(f"\n==================== {country_code} ====================")
    OUTPUT_DIR = os.path.join(os.getcwd(), country_code)
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    CHECKPOINT_FILE = os.path.join(OUTPUT_DIR, f"{country_code}_checkpoint.csv")
    FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, f"{country_code}_combined_data.csv")

    all_data = []

    # Step 1: Get first page
    first_page = get_notices_page(1, country_code)
    if not first_page:
        print(f"❌ Failed to fetch first page for {country_code}. Skipping...")
        continue

    total_notices = first_page.get("total", 0)
    if total_notices == 0:
        print(f"⚠️ No notices found for {country_code}. Skipping...")
        continue

    total_pages = math.ceil(total_notices / 20)
    print(f"🔍 {total_notices} notices, {total_pages} pages for {country_code}")

    # Step 2: Scrape all pages
    for page in range(1, total_pages + 1):
        print(f"📄 Fetching page {page}/{total_pages} for {country_code}")
        page_data = get_notices_page(page, country_code)
        if not page_data:
            print(f"⚠️ Skipping page {page}")
            continue

        notices = page_data.get("_embedded", {}).get("notices", [])
        for notice in notices:
            self_link = notice["_links"]["self"]["href"]
            detail = get_self_data(self_link)
            record = extract_combined_data(notice, detail)
            all_data.append(record)

        # Save checkpoint after each page
        pd.DataFrame(all_data).to_csv(CHECKPOINT_FILE, index=False)
        print(f"💾 Checkpoint saved (Page {page})")
        time.sleep(PAGE_DELAY)

    # Step 3: Final save
    pd.DataFrame(all_data).to_csv(FINAL_OUTPUT_FILE, index=False)
    print(f"✅ Final saved at: {FINAL_OUTPUT_FILE}")



🔍 18 notices, 1 pages for DZ
📄 Fetching page 1/1 for DZ
💾 Checkpoint saved (Page 1)
✅ Final saved at: /content/DZ/DZ_combined_data.csv

🔍 7 notices, 1 pages for AO
📄 Fetching page 1/1 for AO
💾 Checkpoint saved (Page 1)
✅ Final saved at: /content/AO/AO_combined_data.csv

⚠️ No notices found for BJ. Skipping...

⚠️ No notices found for BW. Skipping...

⚠️ No notices found for BF. Skipping...

🔍 2 notices, 1 pages for BI
📄 Fetching page 1/1 for BI
💾 Checkpoint saved (Page 1)
✅ Final saved at: /content/BI/BI_combined_data.csv

⚠️ No notices found for CV. Skipping...

🔍 2 notices, 1 pages for CM
📄 Fetching page 1/1 for CM
💾 Checkpoint saved (Page 1)
✅ Final saved at: /content/CM/CM_combined_data.csv

🔍 4 notices, 1 pages for CF
📄 Fetching page 1/1 for CF
💾 Checkpoint saved (Page 1)
✅ Final saved at: /content/CF/CF_combined_data.csv

⚠️ No notices found for TD. Skipping...

⚠️ No notices found for KM. Skipping...

⚠️ No notices found for CG. Skipping...

🔍 1 notices, 1 pages for CD
📄 Fetchi

In [15]:
import os
import shutil
from google.colab import files

# Step 1: Identify 2-letter country folders
country_folders = [
    f for f in os.listdir("/content")
    if os.path.isdir(os.path.join("/content", f)) and len(f) == 2 and f.isalpha()
]

# Step 2: Create parent folder
parent_folder = f"ALL_COUNTRIES"
parent_path = os.path.join("/content", parent_folder)
os.makedirs(parent_path, exist_ok=True)

# Step 3: Move each 2-letter folder into parent folder
for folder in country_folders:
    src = os.path.join("/content", folder)
    dst = os.path.join(parent_path, folder)
    shutil.move(src, dst)

# Step 4: Zip the parent folder
zip_path = shutil.make_archive(parent_path, 'zip', parent_path)
print(f"📦 Zipped archive created at: {zip_path}")

# Step 5: Download the zip file
files.download(zip_path)


📦 Zipped archive created at: /content/ALL_COUNTRIES.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>