### TypeSense Client Connector

In [1]:
import typesense

client = typesense.Client({
  'nodes': [{
    'host': 'localhost', 
    'port': '8108',     
    'protocol': 'http'   
  }],
  'api_key': 'xyz',
  'connection_timeout_seconds': 2
})

### Defining and Create TypeSense Schema

In [2]:
products_schema = {
  'name': 'products',
  'fields': [
    {'name': 'title', 'type': 'string' },
    {'name': 'authors', 'type': 'string[]', 'facet': True },
    {'name': 'image_url', 'type': 'string' },
    {'name': 'publication_year', 'type': 'int32', 'facet': True },
    {'name': 'ratings_count', 'type': 'int32' },
    {'name': 'average_rating', 'type': 'float' }
  ],
  'default_sorting_field': 'ratings_count'
}

# Check if the collection exists
if 'products' in [col['name'] for col in client.collections.retrieve()]:
    client.collections['products'].delete()


client.collections.create(products_schema)

{'created_at': 1721805504,
 'default_sorting_field': 'ratings_count',
 'enable_nested_fields': False,
 'fields': [{'facet': False,
   'index': True,
   'infix': False,
   'locale': '',
   'name': 'title',
   'optional': False,
   'sort': False,
   'stem': False,
   'type': 'string'},
  {'facet': True,
   'index': True,
   'infix': False,
   'locale': '',
   'name': 'authors',
   'optional': False,
   'sort': False,
   'stem': False,
   'type': 'string[]'},
  {'facet': False,
   'index': True,
   'infix': False,
   'locale': '',
   'name': 'image_url',
   'optional': False,
   'sort': False,
   'stem': False,
   'type': 'string'},
  {'facet': True,
   'index': True,
   'infix': False,
   'locale': '',
   'name': 'publication_year',
   'optional': False,
   'sort': True,
   'stem': False,
   'type': 'int32'},
  {'facet': False,
   'index': True,
   'infix': False,
   'locale': '',
   'name': 'ratings_count',
   'optional': False,
   'sort': True,
   'stem': False,
   'type': 'int32'},
  

### Indexing Data to Typesense

In [3]:
import json
import typesense
from tqdm import tqdm

# Count all jsonl and make it as progress
total_lines = sum(1 for line in open('./books.jsonl'))
with open('./books.jsonl') as infile:
    for json_line in tqdm(infile, total=total_lines, desc="Processing JSONL"):
        book_document = json.loads(json_line)
        client.collections['products'].documents.create(book_document)

Processing JSONL: 100%|██████████| 9979/9979 [09:52<00:00, 16.85it/s]


### Search Data using Typesense

In [13]:
search_parameters = {
  'q'         : 'harry potter',
  'query_by'  : 'title',
  'sort_by'   : 'ratings_count:desc'
}
client.collections['products'].documents.search(search_parameters)

{'facet_counts': [],
 'found': 17,
 'hits': [{'document': {'authors': ['J.K. Rowling', ' Mary GrandPré'],
    'authors_facet': ['J.K. Rowling', ' Mary GrandPré'],
    'average_rating': 4.44,
    'id': '2',
    'image_url': 'https://images.gr-assets.com/books/1474154022m/3.jpg',
    'publication_year': 1997,
    'publication_year_facet': '1997',
    'ratings_count': 4602479,
    'title': "Harry Potter and the Philosopher's Stone"},
   'highlight': {'title': {'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}},
   'highlights': [{'field': 'title',
     'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}],
   'text_match': 1157451471441100921,
   'text_match_info': {'best_field_score': '2211897868288',
    'best_field_weight': 15,
    'fields_matched': 1,
    'num_tokens_dropped': 0,
    'score': '1157451471441100921',
    'tokens_matched': 2,


In [14]:
search_parameters = {
  'q'         : 'harry potter',
  'query_by'  : 'title',
  'filter_by' : 'publication_year:<1998',
  'sort_by'   : 'publication_year:desc'
}
client.collections['products'].documents.search(search_parameters)

{'facet_counts': [],
 'found': 1,
 'hits': [{'document': {'authors': ['J.K. Rowling', ' Mary GrandPré'],
    'authors_facet': ['J.K. Rowling', ' Mary GrandPré'],
    'average_rating': 4.44,
    'id': '2',
    'image_url': 'https://images.gr-assets.com/books/1474154022m/3.jpg',
    'publication_year': 1997,
    'publication_year_facet': '1997',
    'ratings_count': 4602479,
    'title': "Harry Potter and the Philosopher's Stone"},
   'highlight': {'title': {'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}},
   'highlights': [{'field': 'title',
     'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}],
   'text_match': 1157451471441100921,
   'text_match_info': {'best_field_score': '2211897868288',
    'best_field_weight': 15,
    'fields_matched': 1,
    'num_tokens_dropped': 0,
    'score': '1157451471441100921',
    'tokens_matched': 2,
 

#### Typo Search

In [15]:
search_parameters = {
  'q'         : 'experyment',
  'query_by'  : 'title',
  'facet_by'  : 'authors',
  'sort_by'   : 'average_rating:desc'
}
client.collections['products'].documents.search(search_parameters)

{'facet_counts': [{'counts': [{'count': 1,
     'highlighted': ' Käthe Mazur',
     'value': ' Käthe Mazur'},
    {'count': 1, 'highlighted': 'Mahatma Gandhi', 'value': 'Mahatma Gandhi'},
    {'count': 1, 'highlighted': 'Gretchen Rubin', 'value': 'Gretchen Rubin'},
    {'count': 1,
     'highlighted': 'James Patterson',
     'value': 'James Patterson'}],
   'field_name': 'authors',
   'sampled': False,
   'stats': {'total_values': 4}}],
 'found': 3,
 'hits': [{'document': {'authors': ['James Patterson'],
    'authors_facet': ['James Patterson'],
    'average_rating': 4.08,
    'id': '569',
    'image_url': 'https://images.gr-assets.com/books/1339277875m/13152.jpg',
    'publication_year': 2005,
    'publication_year_facet': '2005',
    'ratings_count': 172302,
    'title': 'The Angel Experiment'},
   'highlight': {'title': {'matched_tokens': ['Experiment'],
     'snippet': 'The Angel <mark>Experiment</mark>'}},
   'highlights': [{'field': 'title',
     'matched_tokens': ['Experiment'],