In [45]:
import os
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT, ID, NUMERIC, NGRAMWORDS
from whoosh.analysis import StemmingAnalyzer
import sys
import json
 
def createSearchableData():   
 
    '''
    Schema definition: title(name of file), path(as ID), content(indexed
    but not stored),textdata (stored text content)
    '''
    schema = Schema(author=TEXT(stored=True),
                    angry_score=NUMERIC(stored=True),
                    happy_score=NUMERIC(stored=True),
                    relaxed_score=NUMERIC(stored=True),
                    sad_score=NUMERIC(stored=True),
                    lyrics=NGRAMWORDS(minsize=2, maxsize=10, stored=True, field_boost=1.0, tokenizer=None, at='start', queryor=False, sortable=False),
                    song_name=TEXT(stored=True),
                    id=ID(stored=True))
    
    # Creating a index writer to add document as per schema
    ix = create_in("final_data/indexdir",schema)
    writer = ix.writer()
 
    with open('final_data/test.json', 'r') as file:
        res = json.load(file)
    
    for x in res:
        writer.add_document(author=x['author'], 
                            angry_score=x['angry_score'],
                            happy_score=x['happy_score'],
                            relaxed_score=x['relaxed_score'],
                            sad_score=x['sad_score'],
                            lyrics=x['lyrics'],
                            song_name=x['song_name'],
                            id=x['id'])
        
    writer.commit()

In [46]:
createSearchableData()

In [48]:
from whoosh.qparser import QueryParser
from whoosh import scoring
from whoosh.index import open_dir
 
    
def search(term, num):
    ix = open_dir("final_data/indexdir")
 
    with ix.searcher(weighting=scoring.Frequency) as searcher:
        query = QueryParser("lyrics", ix.schema).parse(term)
        results = searcher.search(query,limit=num)
        for i in range(min(len(results), num)):
            print(results[i])

In [53]:
search('donky', 2)