In [1]:
import os
import rdata
import pandas as pd
import urllib.request
import warnings
warnings.filterwarnings("ignore")

In [4]:
# Constantes
DATA_FOLDER = "data"
CSV_OUTPUT_FOLDER = "csv_output"
RDA_FILES = [
    "Barcelona_POIS.rda", "Barcelona_Polygons.rda", "Barcelona_Sale.rda",
    "Madrid_POIS.rda", "Madrid_Polygons.rda", "Madrid_Sale.rda",
    "Valencia_POIS.rda", "Valencia_Polygons.rda", "Valencia_Sale.rda",
    "properties_by_district.rda"
]
BASE_URL = "https://github.com/paezha/idealista18/raw/master/data/"


def download_rdata():
    """
    Download .rda files from the specified repository, skipping files that already exist.
    """
    os.makedirs(DATA_FOLDER, exist_ok=True)

    for file_name in RDA_FILES:
        file_path = os.path.join(DATA_FOLDER, file_name)
        if not os.path.isfile(file_path):
            urllib.request.urlretrieve(BASE_URL + file_name, file_path)
            print(f"Downloaded: {file_name}")
        else:
            print(f"File already exists: {file_name}")

def read_rda_file(file_path):
    """
    Read an .rda file and return its converted data as a dictionary.
    """
    parsed_data = rdata.parser.parse_file(file_path)
    return rdata.conversion.convert(parsed_data, default_encoding="utf-8")

def convert_rda_to_csv(input_folder, output_folder):
    """
    Convert all .rda files in the input folder to CSV format.
    """
    os.makedirs(output_folder, exist_ok=True)

    for file_name in os.listdir(input_folder):
        if file_name.endswith(".rda"):
            file_path = os.path.join(input_folder, file_name)
            converted_data = read_rda_file(file_path)

            for obj_name, data in converted_data.items():
                if isinstance(data, pd.DataFrame):
                    output_csv_path = os.path.join(output_folder, f"{obj_name}.csv")
                    data.to_csv(output_csv_path, index=False)
                    print(f"Saved to CSV: {output_csv_path}")
                else:
                    print(f"Skipped non-DataFrame object: {obj_name}")





In [5]:
if __name__ == "__main__":
    # Download .rda files
    download_rdata()

    # Convert .rda files to CSV
    print("\nConverting .rda files to CSV...\n")
    convert_rda_to_csv(DATA_FOLDER, CSV_OUTPUT_FOLDER)

Downloaded: Barcelona_POIS.rda
Downloaded: Barcelona_Polygons.rda
Downloaded: Barcelona_Sale.rda
Downloaded: Madrid_POIS.rda
Downloaded: Madrid_Polygons.rda
Downloaded: Madrid_Sale.rda
Downloaded: Valencia_POIS.rda
Downloaded: Valencia_Polygons.rda
Downloaded: Valencia_Sale.rda
Downloaded: properties_by_district.rda

Converting .rda files to CSV...

Skipped non-DataFrame object: Barcelona_POIS
Saved to CSV: csv_output\Barcelona_Polygons.csv
Saved to CSV: csv_output\Barcelona_Sale.csv
Skipped non-DataFrame object: Madrid_POIS
Saved to CSV: csv_output\Madrid_Polygons.csv
Saved to CSV: csv_output\Madrid_Sale.csv
Saved to CSV: csv_output\properties_by_district.csv
Skipped non-DataFrame object: Valencia_POIS
Saved to CSV: csv_output\Valencia_Polygons.csv
Saved to CSV: csv_output\Valencia_Sale.csv
