<h3>Importing Libraries</h3>

In [336]:
import pandas as pd
import re
from scipy.spatial import distance

In [337]:
locations = pd.read_csv("restoran_data.csv", encoding = "utf-8")

In [338]:
locations.head(1)

Unnamed: 0,Name,Type,Address,Cuisine type,Price,Extra,Reservation,Number of reviews,Working Hours,Average Price,Min Price,Max Price
0,Gold,cafe,"мкр. Жетысу 2, 11","паназиатская, американская, европейская, восто...",1500–3500 тенге на человека,"танцпол, Wi-Fi, спортивные трансляции",0,No reviews,"['Уточняйте, пожалуйста, часы работы по телефо...",2500.0,1500.0,3500.0


In [339]:
locations = locations.rename({'Cuisine type': 'Cuisine_type'}, axis=1)
locations = locations.rename({'Working Hours': 'Working_Hours'}, axis=1)
locations = locations.rename({'Average Price': 'Average_Price'}, axis=1)

<h3>Defining Target Location to look for simmilar Locations from dataset </h3>

In [340]:
cuisine_target = ["европейская", "казахская"]
extra_target = ["танцпол", "караоке"]
price_class = 6

<h2></h2>

In [341]:
def intersection(cell_string, lst2): #to find simmilar properties between target and rows in dataset
    lst = re.split('[^а-я]', cell_string)
    lst1 = ' '.join(lst).split() 
    lst3 = list(set(lst1).intersection(lst2))
    return lst3 
  


<h3>Simmilarity of row having same properties as target</h3>
$$\frac{A\cap B}{B}$$

In [342]:

locations['cuisine_simmilarity'] = locations.apply(lambda row: len(intersection(row.Cuisine_type, cuisine_target))/len(cuisine_target), axis=1)
locations['extra_simmilarity'] = locations.apply(lambda row: len(intersection(row.Extra, extra_target))/len(extra_target), axis=1)


In [343]:
locations['common_cuisine_types'] = locations.apply(lambda row: len(intersection(row.Cuisine_type, cuisine_target)), axis=1)
locations['common_extra_types'] = locations.apply(lambda row: len(intersection(row.Extra, extra_target)), axis=1)


<h3>Grouping price ranges into classes</h3>

In [344]:
def class_price(avg_price):
    if avg_price < 700:
        return 1
    elif avg_price < 1500:
        return 2
    elif avg_price < 2500:
        return 3
    elif avg_price < 5000:
        return 4
    elif avg_price < 10000:
        return 5
    else:
        return 6

In [345]:
locations['price_class'] = locations.apply(lambda row: class_price(row.Average_Price), axis=1)

<h2></h2>

<p>In mathematics, <b>the Euclidean distance</b> between two points in Euclidean space is a number, the length of a line segment between the two points.</p>

<img src="euclid-distance.png">

In [346]:
def calculate_euclidian(vec_1_cuisine, cev_1_extra, vec_1_price, vec_2_cuisine, cev_2_extra, vec_2_price,):
    dst = distance.euclidean([float(vec_1_cuisine),float(cev_1_extra),float(vec_1_price)], [float(vec_2_cuisine),float(cev_2_extra),float(vec_2_price)])
    return dst

In [352]:
locations['euclidian_distance'] = locations.apply(lambda row: calculate_euclidian(row.common_cuisine_types, row.common_extra_types,
                                                                                 row.price_class,
                                                                                 len(cuisine_target), len(extra_target),
                                                                                 class_price(price_class)), axis=1)


In [353]:
locations.head(5)

Unnamed: 0,Name,Type,Address,Cuisine_type,Price,Extra,Reservation,Number of reviews,Working_Hours,Average_Price,Min Price,Max Price,cuisine_simmilarity,extra_simmilarity,common_cuisine_types,common_extra_types,price_class,euclidian_distance
0,Gold,cafe,"мкр. Жетысу 2, 11","паназиатская, американская, европейская, восто...",1500–3500 тенге на человека,"танцпол, Wi-Fi, спортивные трансляции",0,No reviews,"['Уточняйте, пожалуйста, часы работы по телефо...",2500.0,1500.0,3500.0,0.5,0.5,1,1,4,3.316625
1,Пельмешка на Байтурсынова,cafe,"ул. Байтурсынова, 78 А","европейская, восточная",1500–3000 тенге на человека,"европейская, итальянская",1,No reviews,['10:00–23:00 (пн–сб)'],2250.0,1500.0,3000.0,0.5,0.0,1,0,3,3.0
2,Хомяк,cafe,"бульвар Бухар Жырау, 27/5","европейская, итальянская",3000–7000 тенге на человека,"Wi-Fi, детское кафе",0,5 отзывов,"['Уточняйте, пожалуйста, часы работы по телефо...",5000.0,3000.0,7000.0,0.5,0.0,1,0,5,4.582576
3,Пельмешка на Абылай хана,cafe,"пр. Абылай хана, 18, уг. пр. Райымбека","европейская, восточная",1500–3000 тенге на человека,"Wi-Fi, халяль",1,No reviews,"['09:00–23:00, без выходных']",2250.0,1500.0,3000.0,0.5,0.0,1,0,3,3.0
4,Gardizi,cafe,"мкр. Кокжиек, 53/1","европейская, грузинская",3000–5000 тенге на человека,"кальян, танцпол, Wi-Fi",0,6 отзывов,"['Уточняйте, пожалуйста, часы работы по телефо...",4000.0,3000.0,5000.0,0.5,0.5,1,1,4,3.316625


<h2></h2>

<p>In the end, according ro euclidian distance between points, we get simmilar offers to target location</p>

In [349]:
filtered_table = locations.sort_values(by='euclidian_distance', ascending=True)

In [350]:
filtered_table.head(5)

Unnamed: 0,Name,Type,Address,Cuisine_type,Price,Extra,Reservation,Number of reviews,Working_Hours,Average_Price,Min Price,Max Price,cuisine_simmilarity,extra_simmilarity,common_cuisine_types,common_extra_types,price_class,euclidian_distance
919,GulVar,restaurant,"ул. Казыбек би, 43","европейская, кавказская, казахская, японская",15000–25000 тенге на человека,"Wi-Fi, кальян, караоке, танцпол, камин, VIP-за...",1,No reviews,Не указано,20000.0,15000.0,25000.0,1.0,1.0,2,2,6,0.0
707,Вахтангури,restaurant,"ул. Гоголя, 157, уг. ул. Досмухамедова","европейская, казахская, японская, грузинская",7000–8000 тенге на человека,"Wi-Fi, караоке, VIP-зал, танцпол, шоу-программа",1,1 отзыв,"['12:00–22:00, без выходных']",7500.0,7000.0,8000.0,1.0,1.0,2,2,5,1.0
897,Пугасов,restaurant,"ул. Абдуллиных, 70","азербайджанская, казахская, европейская",5000–10000 тенге на человека,"Wi-Fi, кальян, большой ТВ-экран, танцпол, ками...",1,No reviews,Не указано,7500.0,5000.0,10000.0,1.0,1.0,2,2,5,1.0
992,Bellagio,restaurant,"ущ. Бутаковка, ул. Горная, 197","восточная, европейская, итальянская",20000–25000 тенге на человека,"Wi-Fi, кальян, большой ТВ-экран, танцпол, VIP-...",1,No reviews,Не указано,22500.0,20000.0,25000.0,0.5,1.0,1,2,6,1.0
693,Ресторанно-гостиничный комплекс «Grand Family ...,restaurant,"4-й микрорайон, 10/2","европейская, восточная, казахская",5000–7000 тенге на человека,"Wi-Fi, танцпол, проектор, шоу-программа, VIP-з...",1,No reviews,"['10:00–02:00, без выходных']",6000.0,5000.0,7000.0,1.0,1.0,2,2,5,1.0
