In [1]:
import numpy as np
from faker import Faker
from dotenv import load_dotenv
import os
load_dotenv();

In [2]:
faker = Faker(seed=666)
np.random.seed(666)

In [3]:
keywords = [
    'archeology',
    'museums',
    'music',
    'art',
    'cinema',
    'countryside',
    'tracking',
    'rafting',
    'history',
    'literature',
    'sport',
    'food',
    'wine',
    'beer'
]

In [4]:
def generate_guide():
    guide = {}
    guide['gender'] = np.random.choice(['male', 'female'], size=1).item()
    guide['name'] = (faker.first_name_male() if guide['gender'] == 'male' else faker.first_name_female()) + ' ' + faker.last_name()
    guide['birth_date'] = faker.date_of_birth()
    guide['now_available'] = bool(np.random.binomial(1, 0.3))
    guide['languages_spoken'] = list(np.random.choice(['english', 'italian', 'french', 'spanish', 'deutsche', 'dutch', 'bulgarian', 'chinese'], size=1+np.random.poisson(lam=1, size=1).item(), replace=False))
    guide['price'] = int(max(10, np.abs(np.random.normal(30, 5))))
    guide['education'] = np.random.choice(['elementary', 'high-school', 'bachelor', 'master', 'phd'], size=1).item()
    guide['biography'] = faker.profile()['job']
    guide['keywords'] = list(np.random.choice(keywords, size=np.random.poisson(2, size=1).item()))
    guide['current_location'] = {
        "lat": np.random.normal(40.3524, 0.01),
        "lon": np.random.normal(18.1732, 0.01)
    }

     # Generate previous tours
    guide['previous_tours'] = []
    for _ in range(np.random.poisson(lam=2, size=1).item()):
        tour = {
            "altitude": np.random.uniform(0, 3000),
            "distance": np.random.uniform(1, 50),
            "days": np.random.randint(1, 15),
            "tour_keywords": list(np.random.choice(keywords, size=np.random.poisson(lam=5, size=1).item())),
            "places": []
        }
        # Generate places for each tour
        for _ in range(np.random.poisson(lam=3, size=1).item()):
            place = {
                "name": faker.city(),
                "location": {
                    "lat": np.random.normal(40.3524, 0.01),
                    "lon": np.random.normal(18.1732, 0.01)
                }
            }
            tour['places'].append(place)
        guide['previous_tours'].append(tour)
    
    return guide

In [11]:
generate_guide()

{'gender': 'male',
 'name': 'Gabriel Peters',
 'birth_date': datetime.date(1921, 1, 15),
 'now_available': True,
 'languages_spoken': ['deutsche', 'english', 'dutch'],
 'price': 33,
 'education': 'high-school',
 'biography': 'Conservator, museum/gallery',
 'keywords': ['art', 'sport'],
 'current_location': {'lat': 40.34760978200165, 'lon': 18.17571400902332},
 'previous_tours': [{'altitude': 94.0067340810441,
   'distance': 15.93521361947813,
   'days': 13,
   'tour_keywords': ['music', 'beer', 'museums', 'wine'],
   'places': [{'name': 'South Joshua',
     'location': {'lat': 40.35323077221597, 'lon': 18.184725867957663}},
    {'name': 'Kevinmouth',
     'location': {'lat': 40.34336601554098, 'lon': 18.17505628367389}},
    {'name': 'Timothyfort',
     'location': {'lat': 40.35445235459901, 'lon': 18.177999986449194}}]},
  {'altitude': 2064.312768335438,
   'distance': 5.6577154467612845,
   'days': 6,
   'tour_keywords': ['wine', 'history'],
   'places': [{'name': 'Lake Judy',
     '

In [6]:
from datetime import datetime
from elasticsearch import Elasticsearch

client = Elasticsearch(os.getenv('HOST'), api_key=os.getenv('API_KEY'), verify_certs=False)

client.ssl_context = False

In [13]:
# Feed data
client.index(index='guides_tours', body=generate_guide())



ObjectApiResponse({'_index': 'guides_tours', '_id': 'GfkukY8BPN_o98AbfO3D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [None]:
N = 200

for i in range(1, N):
    client.index(index='guides_tours', body=generate_guide())
    print(f"Generated {i}/{N} guides")

In [15]:
user = {
    "location": {
        "lat": np.random.normal(40.3524, 0.01),
        "lon": np.random.normal(18.1732, 0.01)
        },
    "keywords": list(np.random.choice(keywords, size=np.random.poisson(2, size=1).item())),
    "languages": ["italian"]
}

user

{'location': {'lat': 40.35008188053676, 'lon': 18.182207781023273},
 'keywords': ['cinema'],
 'languages': ['italian']}

In [7]:
query = client.search(index='guides_tours', body={
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must": [
            {
              "terms": {
                "languages_spoken": ["bulgarian"]
              }
            },
            {
              "term": {
                "now_available": True
              }
            }
          ],
          "should": [
            {
              "nested": {
                "path": "previous_tours",
                "query": {
                  "bool": {
                    "should": [
                      {
                        "match": {
                          "previous_tours.tour_keywords": {
                            "query": "wine museums",
                            "operator": "or"
                          }
                        }
                      },
                      {
                        "nested": {
                          "path": "previous_tours.places",
                          "query": {
                            "bool": {
                              "should": [
                                {
                                  "match": {
                                    "previous_tours.places.name": {
                                      "query": "colosseum",
                                      "operator": "or"
                                    }
                                  }
                                },
                                {
                                  "geo_distance": {
                                    "distance": "50km",
                                    "previous_tours.places.location": {
                                      "lat": 41.8902,
                                      "lon": 18.160
                                    }
                                  }
                                }
                              ]
                            }
                          },
                        }
                      }
                    ]
                  }
                },
              }
            }
          ]
        }
      },
      "script_score": {
        "script": {
          "source": 
          """
            // Age scorer
            def target_age = params.target_age;
            Instant instant = Instant.ofEpochMilli(new Date().getTime());
            ZonedDateTime birth = doc['birth_date'].value;
            ZonedDateTime now = ZonedDateTime.ofInstant(instant, ZoneId.of('Z'));
            def doc_age = ChronoUnit.YEARS.between(birth, now);
            def age_score = Math.exp(-Math.pow(target_age - doc_age, 2) / 150);
            
            // Price scorer
            def price_score = Math.pow(2, -doc['price'].value / params.avg_price);
            
            // Education scorer
            def user_education = params.user_education;
            def education_scores_map = [
              "elementary": 0,
              "high-school": 0.2,
              "bachelor": 0.5,
              "master": 0.9,
              "phd": 1
            ];
            def guide_education = doc['education'].value;
            def education_score = education_scores_map[guide_education];
            def education_diff = education_scores_map[user_education] - education_score;
            def adjusted_education_score = Math.max(0, education_score + education_diff);
            
            // Distance scorer
            double target_lat = params.target_lat;
            double target_lon = params.target_lon;
            double targetLatRad = Math.toRadians(target_lat);
            double targetLonRad = Math.toRadians(target_lon);
            double docLat = doc['current_location'].lat;
            double docLon = doc['current_location'].lon;
            double docLatRad = Math.toRadians(docLat);
            double docLonRad = Math.toRadians(docLon);
            double earthRadius = 6371.0;
            double dLat = docLatRad - targetLatRad;
            double dLon = docLonRad - targetLonRad;
            double a = Math.sin(dLat / 2.0) * Math.sin(dLat / 2.0) +
                       Math.cos(targetLatRad) * Math.cos(docLatRad) *
                       Math.sin(dLon / 2.0) * Math.sin(dLon / 2.0);
            double c = 2.0 * Math.atan2(Math.sqrt(a), Math.sqrt(1.0 - a));
            double distance = earthRadius * c;
            double distance_score = 1 / (1 + distance);
            
            return 0.1 * age_score
                   + 0.3 * price_score
                   + 0.1 * adjusted_education_score
                   + 0.3 * distance_score
                   + 2 * _score;
          """,
          "params": {
            "target_age": 17,
            "avg_price": 31,
            "target_lat": 41.462,
            "target_lon": 18.161,
            "user_education": "phd"
          },
        },
      },
    }
  }
})

first = query.body['hits']['hits'][0]

In [8]:
first

{'_index': 'guides_tours',
 '_id': 'Fysqpo8BbN2u3vHcZ4zn',
 '_score': 39.800674,
 '_source': {'gender': 'male',
  'name': 'Timothy Bray',
  'birth_date': '1969-11-12',
  'now_available': True,
  'languages_spoken': ['bulgarian', 'italian', 'deutsche'],
  'price': 32,
  'education': 'phd',
  'biography': 'Psychiatrist',
  'keywords': ['archeology'],
  'current_location': {'lat': 40.351701701073104, 'lon': 18.170940778106704},
  'previous_tours': [{'altitude': 2229.691281092674,
    'distance': 48.88302583851392,
    'days': 4,
    'tour_keywords': ['museums',
     'food',
     'wine',
     'food',
     'wine',
     'museums',
     'sport'],
    'places': [{'name': 'South Reginabury',
      'location': {'lat': 40.35218512086396, 'lon': 18.166981926712992}}]},
   {'altitude': 859.2702495236679,
    'distance': 24.394761147692133,
    'days': 12,
    'tour_keywords': ['music', 'literature', 'beer', 'tracking', 'history'],
    'places': [{'name': 'New David',
      'location': {'lat': 40.33

In [9]:
source = first['_source']

source['previous_tours']

first

{'_index': 'guides_tours',
 '_id': 'Fysqpo8BbN2u3vHcZ4zn',
 '_score': 39.800674,
 '_source': {'gender': 'male',
  'name': 'Timothy Bray',
  'birth_date': '1969-11-12',
  'now_available': True,
  'languages_spoken': ['bulgarian', 'italian', 'deutsche'],
  'price': 32,
  'education': 'phd',
  'biography': 'Psychiatrist',
  'keywords': ['archeology'],
  'current_location': {'lat': 40.351701701073104, 'lon': 18.170940778106704},
  'previous_tours': [{'altitude': 2229.691281092674,
    'distance': 48.88302583851392,
    'days': 4,
    'tour_keywords': ['museums',
     'food',
     'wine',
     'food',
     'wine',
     'museums',
     'sport'],
    'places': [{'name': 'South Reginabury',
      'location': {'lat': 40.35218512086396, 'lon': 18.166981926712992}}]},
   {'altitude': 859.2702495236679,
    'distance': 24.394761147692133,
    'days': 12,
    'tour_keywords': ['music', 'literature', 'beer', 'tracking', 'history'],
    'places': [{'name': 'New David',
      'location': {'lat': 40.33