In [14]:
import requests
import zipfile
import os
from scipy.io import arff
import pandas as pd

In [2]:
# We inspect the source web page to get the url for direct downloading.
download_url = (
    "https://archive.ics.uci.edu/static/public/365/polish+companies+bankruptcy+data.zip"
)
response = requests.get(download_url)
zip_file_path = "downloaded_file.zip"

if response.status_code == 200:
    with open(zip_file_path, "wb") as file:
        file.write(response.content)
else:
    print("Failed to download the file")

In [15]:
def extract_arff_from_zip(zip_path, arff_filename, extraction_path="."):
    """
    Extracts an ARFF file from a ZIP archive.

    Parameters:
        zip_path (str): Path to the ZIP file.
        arff_filename (str): The name of the ARFF file within the ZIP.
        extraction_path (str): Folder path where to extract files.
    Returns:
        str: Path to the extracted ARFF file.
    """
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        # Extract only the ARFF file
        zip_ref.extract(arff_filename, path=extraction_path)
        return os.path.join(extraction_path, arff_filename)


def convert_arff_to_csv(arff_path, csv_path):
    """
    Converts an ARFF file to a CSV file.

    Parameters:
        arff_path (str): Path to the ARFF file.
        csv_path (str): Path where the CSV file will be saved.
    """
    # Load ARFF file
    data, meta = arff.loadarff(arff_path)
    df = pd.DataFrame(data)

    # Convert bytes columns to string (if needed)
    for col in df.select_dtypes([object]):
        if isinstance(df[col][0], bytes):
            df[col] = df[col].apply(lambda x: x.decode("utf-8"))

    # Save to CSV
    df.to_csv(csv_path, index=False)
    print(f"CSV file has been saved to {csv_path}")

In [17]:
for i in range(1, 6):
    # Paths configuration
    arff_filename = f"{i}year.arff"
    csv_path = f"{i}year.csv"

    # Processing
    extracted_arff_path = extract_arff_from_zip(zip_file_path, arff_filename)
    convert_arff_to_csv(extracted_arff_path, csv_path)

CSV file has been saved to 1year.csv
CSV file has been saved to 2year.csv
CSV file has been saved to 3year.csv
CSV file has been saved to 4year.csv
CSV file has been saved to 5year.csv
