In [2]:
import pandas as pd
import numpy as np

In [14]:
myrealty_df = pd.read_csv("../data/myrealty_apartments.csv")
bars_df = pd.read_csv("../data/bars_apartments.csv")

In [15]:
import ast

# Since the facilities columns appear to contain string representations of lists, we need to convert them to actual lists
# We use the `ast.literal_eval` method to safely evaluate the string as a Python literal (list)
facilities_myrealty_lists = myrealty_df['facilities'].apply(ast.literal_eval)
facilities_bars_lists = bars_df['facilities'].apply(ast.literal_eval)

# Combine the lists of facilities from both datasets
combined_facilities = facilities_myrealty_lists.tolist() + facilities_bars_lists.tolist()

# Flatten the combined list and remove duplicates
unique_facilities = set(facility for sublist in combined_facilities for facility in sublist)

# Convert the set back to a sorted list
unique_facilities_list = sorted(list(unique_facilities))

unique_facilities_list

['Air conditioning',
 'Air-conditioner',
 'Balcony',
 'Basement',
 'Bilateral',
 'Building security',
 'Building service',
 'Built-in furniture',
 'Central heating',
 'Close to the bus station',
 'Closed balcony',
 'Electricity',
 'Elevator',
 'Equipment',
 'Euro windows',
 'Exclusive design',
 'Fireplace',
 'First line',
 'Fridge',
 'Front balcony',
 'Furniture',
 'Garage',
 'Gas',
 'Grating ',
 'Heated floor',
 'Heating',
 'Heating system',
 'High first floor',
 'Hot water',
 'Internet',
 'Iron door',
 'Irrigation',
 'Jacuzzi',
 'Kitchen furniture',
 'Kitchen stove',
 'Laminate flooring',
 'Loggia',
 'Mansard',
 'Open balcony',
 'Panorama',
 'Park',
 'Parking',
 'Parquet',
 'Playground',
 'Roadside',
 'Security system',
 'Sewerage, Canalization',
 'Storage room',
 'Sunny',
 'Superstructure',
 'TV',
 'Terrace',
 'Tile',
 'Transport availability',
 'View',
 'Wardrobe',
 'Washing machine',
 'Water',
 'water 24/7']

In [16]:
# Create dictionaries where keys are "myrealty" or "bars" and values are the sets of unique facilities for each dataset
unique_facilities_myrealty = set(facility for sublist in facilities_myrealty_lists for facility in sublist)
unique_facilities_bars = set(facility for sublist in facilities_bars_lists for facility in sublist)

# Creating the final dictionary
facilities_dict = {
    "myrealty": sorted(list(unique_facilities_myrealty)),
    "bars": sorted(list(unique_facilities_bars))
}

facilities_dict

{'myrealty': ['Air-conditioner',
  'Balcony',
  'Basement',
  'Bilateral',
  'Central heating',
  'Close to the bus station',
  'Electricity',
  'Elevator',
  'Equipment',
  'Euro windows',
  'Furniture',
  'Garage',
  'Gas',
  'Grating ',
  'Heated floor',
  'Heating',
  'High first floor',
  'Hot water',
  'Internet',
  'Iron door',
  'Irrigation',
  'Laminate flooring',
  'Loggia',
  'Open balcony',
  'Park',
  'Parking',
  'Parquet',
  'Playground',
  'Roadside',
  'Security system',
  'Sewerage, Canalization',
  'Storage room',
  'Sunny',
  'Tile',
  'View',
  'Water',
  'water 24/7'],
 'bars': ['Air conditioning',
  'Building security',
  'Building service',
  'Built-in furniture',
  'Closed balcony',
  'Elevator',
  'Exclusive design',
  'Fireplace',
  'First line',
  'Fridge',
  'Front balcony',
  'Furniture',
  'Garage',
  'Heating system',
  'Jacuzzi',
  'Kitchen furniture',
  'Kitchen stove',
  'Mansard',
  'Open balcony',
  'Panorama',
  'Parking',
  'Superstructure',
  'TV

In [17]:
# Calculate the intersection and differences of the facilities sets
facilities_intersection = unique_facilities_myrealty.intersection(unique_facilities_bars)
facilities_difference_myrealty = unique_facilities_myrealty.difference(unique_facilities_bars)
facilities_difference_bars = unique_facilities_bars.difference(unique_facilities_myrealty)

# Create a dictionary to store the intersection and differences
facilities_comparison_dict = {
    "intersection": sorted(list(facilities_intersection)),
    "difference_myrealty": sorted(list(facilities_difference_myrealty)),
    "difference_bars": sorted(list(facilities_difference_bars))
}

facilities_comparison_dict

{'intersection': ['Elevator',
  'Furniture',
  'Garage',
  'Open balcony',
  'Parking'],
 'difference_myrealty': ['Air-conditioner',
  'Balcony',
  'Basement',
  'Bilateral',
  'Central heating',
  'Close to the bus station',
  'Electricity',
  'Equipment',
  'Euro windows',
  'Gas',
  'Grating ',
  'Heated floor',
  'Heating',
  'High first floor',
  'Hot water',
  'Internet',
  'Iron door',
  'Irrigation',
  'Laminate flooring',
  'Loggia',
  'Park',
  'Parquet',
  'Playground',
  'Roadside',
  'Security system',
  'Sewerage, Canalization',
  'Storage room',
  'Sunny',
  'Tile',
  'View',
  'Water',
  'water 24/7'],
 'difference_bars': ['Air conditioning',
  'Building security',
  'Building service',
  'Built-in furniture',
  'Closed balcony',
  'Exclusive design',
  'Fireplace',
  'First line',
  'Fridge',
  'Front balcony',
  'Heating system',
  'Jacuzzi',
  'Kitchen furniture',
  'Kitchen stove',
  'Mansard',
  'Panorama',
  'Superstructure',
  'TV',
  'Terrace',
  'Transport avai