In [10]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [32]:
df = pd.read_csv("./data/apartments.csv", delimiter=";", index_col="ID")
df.head(10)

Unnamed: 0_level_0,DISTANCE,STOP_COUNT,COST,FITNESS,INTERNET,PETS_ALLOWED,ADDITIONAL_INFO
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,4,1,48000,23,3,Да,Подземный паркинг
2,14,2,25000,24,1,Нет,Панорама с видом на парк Сосновка.
3,24,1,45000,8,2,Да,Внутренний двор с игровыми и спортивными зонам...
4,22,1,25000,16,1,Да,
5,7,11,54000,29,4,Да,"Внутренний двор с ландшафтным озеленением, игр..."
6,5,8,28000,29,0,Да,
7,4,5,26000,26,1,Да,
8,26,11,25000,22,2,Да,Парк отдыха со специально оборудованными зонам...
9,28,8,27000,23,4,Да,
10,19,12,23000,12,4,Да,Рядом с домом расположены Суздальские озера и ...


In [33]:
def linear_normalization(x):
    return (x - x.min()) / (x.max() - x.min())

def exponential_normalization(x):
    return 1 - np.exp(1-(x / x.min()))

In [37]:

# Artem's story: Norm(STOP_COUNT) + Norm(DISTANCE) + Norm(COST) + 0.5 * Norm(FITNESS) – 0.2 * Norm(INTERNET)
first_df = df[df["INTERNET"] != 0].copy()
for col in ["STOP_COUNT", "COST", "DISTANCE", "FITNESS", "INTERNET"]:
    first_df["NORM_" + col] = exponential_normalization(first_df[col])

first_df["SCORE"] = (
    first_df["NORM_STOP_COUNT"] +
    first_df["NORM_COST"] +
    first_df["NORM_DISTANCE"] +
    first_df["NORM_FITNESS"] * 0.5 +
    first_df["NORM_INTERNET"] * -0.2
)
first_df = first_df.sort_values(by="SCORE", ascending=True)
print(first_df[['DISTANCE', 'STOP_COUNT', 'COST', 'FITNESS', 'INTERNET', 'SCORE']].head())
first_df.head(5)


     DISTANCE  STOP_COUNT   COST  FITNESS  INTERNET     SCORE
ID                                                           
63         14           1  22000        5         3  1.091300
74          5           1  33000       21         3  1.247692
1           4           1  48000       23         3  1.333446
178        25           1  22000        9         2  1.351758
141         4           3  23000        4         1  1.380711


Unnamed: 0_level_0,DISTANCE,STOP_COUNT,COST,FITNESS,INTERNET,PETS_ALLOWED,ADDITIONAL_INFO,NORM_STOP_COUNT,NORM_COST,NORM_DISTANCE,NORM_FITNESS,NORM_INTERNET,SCORE
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
63,14,1,22000,5,3,Да,Исторический центр Санкт-Петербурга.,0.0,0.046503,0.974438,0.486583,0.864665,1.0913
74,5,1,33000,21,3,Нет,"Для юных жителей будет обустроена велодорожка,...",0.0,0.435282,0.486583,0.997521,0.864665,1.247692
1,4,1,48000,23,3,Да,Подземный паркинг,0.0,0.723547,0.283469,0.998727,0.864665,1.333446
178,25,1,22000,9,2,Да,,0.0,0.046503,0.999347,0.864665,0.632121,1.351758
141,4,3,23000,4,1,Да,Тематические зоны во дворе: всепогодные игровы...,0.864665,0.090844,0.283469,0.283469,0.0,1.380711


In [36]:
# Anna's story
second_df = df[(df["INTERNET"] != 0) & (df["PETS_ALLOWED"] == "Да")].copy()

def has_dog_walking_option(text) -> int:
    if pd.isna(text):
        return 0
    dog_aliases = {'собак', 'питом', 'четвероног'}
    text = text.lower()
    condition_a = ('площадк' in text) and any(word in text for word in dog_aliases)
    condition_b = ('выгул' in text) and any(word in text for word in dog_aliases)
    return int(condition_a or condition_b)

second_df["DOG_WALKING"] = second_df["ADDITIONAL_INFO"].apply(has_dog_walking_option)

for col in ['STOP_COUNT', 'DISTANCE', 'COST', 'FITNESS', "INTERNET"]:
    second_df["NORM_" + col] = linear_normalization(second_df[col])


#Norm(STOP_COUNT) + Norm(DISTANCE) + Norm(COST) - 0.2 * Norm(INTERNET) - 0.2 * DOG_WALKING
second_df["SCORE"] = (
    second_df["NORM_STOP_COUNT"] +
    second_df["NORM_DISTANCE"] +
    second_df["NORM_COST"] -
    second_df["NORM_INTERNET"] * 0.2 -
    second_df["DOG_WALKING"] * 0.2
)
second_df = second_df.sort_values(by="SCORE", ascending=True)
print(second_df[['DISTANCE', 'STOP_COUNT', 'COST', 'FITNESS', 'INTERNET', 'DOG_WALKING', 'SCORE']].head())
    

     DISTANCE  STOP_COUNT   COST  FITNESS  INTERNET  DOG_WALKING     SCORE
ID                                                                        
141         4           3  23000        4         1            0  0.272911
63         14           1  22000        5         3            0  0.316059
11         13           2  21000       11         3            0  0.342191
38          8           1  29000       25         1            0  0.402834
23         11           1  25000       20         1            0  0.412955
