In [1]:
import pandas as pd
import re

import matplotlib.pyplot as plt
from datetime import datetime

from DataProcessor import *

### Rent

In [2]:
date = 20240118
raw_input_path = f"data/immo24/rent/raw/data_{str(date)}.csv"
processed_output_path = f"data/immo24/rent/processed/data_{str(date)}.csv"

In [3]:
selected_columns = [
       'url', 'company', 'type', 'title',
       'std_address', 'area', 'n_room', 'n_bedroom', 'n_washroom', 'cold_rent',
       'price_per_m2', 'extra_cost', 'heater_cost', 'warm_price', 'deposit',
       'garage_price', 'from_date', 'year_of_house', 'year_of_energy'
]

In [4]:
df = pd.read_csv(raw_input_path)

df["url"] = df["URL"].copy()
df["company"] = df["Company"].copy()
df["type"] = df["Typ"].copy()
df["title"] = df["Title"].copy()

df["std_address"] = df["Address"].apply(process_address)
df["area"] = df["Wohnfläche ca."].apply(process_area)
df["n_room"] = df["Zimmer"].apply(process_room)
df["n_bedroom"] = df["Schlafzimmer"].apply(process_room)
df["n_washroom"] = df["Badezimmer"].apply(process_room)

df["cold_rent"] = df["Kaltmiete"].apply(find_price)
df["price_per_m2"] = df["Preis/m²"].apply(find_price)
df["extra_cost"] = df["Nebenkosten"].apply(find_price)
df["heater_cost"] = df["Heizkosten"].apply(find_price)
df["warm_price"] = df["Gesamtmiete"].apply(find_price)
df["deposit"] = df["Kaution o. Genossenschafts­anteile"].apply(find_price)
df["garage_price"] = df["Miete für Garage/Stellplatz"].apply(find_price)

df["from_date"] = df["Bezugsfrei ab"].apply(find_date)
df["year_of_house"] = df["Baujahr"].apply(find_year)
df["year_of_energy"] = df["Baujahr laut Energieausweis"].apply(find_year)

In [5]:
df[selected_columns].to_csv(processed_output_path, index=False)

### Buy

In [2]:
date = 20240106
raw_input_path = f"data/immo24/buy/raw/data_{str(date)}.csv"
processed_output_path = f"data/immo24/buy/processed/data_{str(date)}.csv"

In [3]:
selected_columns = [
    'url', 'company', 'type', 'title', 'energy_level',
       'energy_consumption_kWh/(m²*a)', 'std_address', 'area', 'n_room',
       'n_bedroom', 'n_washroom', 'buy_price', 'price_per_m2', 'provision',
       'rent_price', 'maintenance_costs', 'garage_price', 'from_date',
       'year_of_house', 'year_of_energy'
]

In [4]:
df = pd.read_csv(raw_input_path)

In [5]:
df["url"] = df["URL"].copy()
df["company"] = df["Company"].copy()
df["type"] = df["Typ"].copy()
df["title"] = df["Title"].copy()

df["energy_level"] = df["Energie­effizienz­klasse"].copy()
df["energy_consumption_kWh/(m²*a)"] = df["End­energie­verbrauch"].apply(find_price)

df["std_address"] = df["Address"].apply(process_address)
df["area"] = df["Wohnfläche ca."].apply(process_area)
df["n_room"] = df["Zimmer"].apply(process_room)
df["n_bedroom"] = df["Schlafzimmer"].apply(process_room)
df["n_washroom"] = df["Badezimmer"].apply(process_room)

df["buy_price"] = df["Kaufpreis"].apply(find_price)
df["price_per_m2"] = df["Preis/m²"].apply(find_price)
df["provision"] = df["Provision für Käufer"].apply(find_price)
df["rent_price"] = df["Mieteinnahmen pro Monat"].apply(find_price)
df["maintenance_costs"] = df["Hausgeld"].apply(find_price)
df["garage_price"] = df["Garage/ Stellplatz-Kaufpreis"].apply(find_price)

df["from_date"] = df["Bezugsfrei ab"].apply(find_date)
df["year_of_house"] = df["Baujahr"].apply(find_year)
df["year_of_energy"] = df["Baujahr laut Energieausweis"].apply(find_year)

In [6]:
df[selected_columns].to_csv(processed_output_path, index=False)