### Import packages

In [1]:
from math import radians, cos, sin, asin, sqrt
import sys, os, lucene, threading, time 
from java.nio.file import Paths
from org.apache.lucene import analysis, document, index, queryparser, search, store
from org.apache.lucene.store import SimpleFSDirectory 
from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig ,DirectoryReader,IndexReader,Term
from org.apache.lucene.search import \
    BooleanClause, BooleanQuery, Explanation, PhraseQuery, TermQuery, ScoreMode
from org.apache.lucene.util import Version
from org.apache.lucene.search.similarities import \
ClassicSimilarity,BooleanSimilarity, LMDirichletSimilarity,BM25Similarity,TFIDFSimilarity,LMJelinekMercerSimilarity
from org.apache.lucene.queryparser.classic import QueryParser,MultiFieldQueryParser
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.pylucene.queryparser.classic import \
    PythonQueryParser, PythonMultiFieldQueryParser
from org.apache.lucene.document import LatLonPoint,FloatPoint,IntPoint

### lucene initialization and read index

In [2]:
lucene.initVM()

<jcc.JCCEnv at 0x10e7ad318>

In [3]:
PATH = './data1/index'
directory =  SimpleFSDirectory(Paths.get(PATH))

### Build Search

In [4]:
ireader = DirectoryReader.open(directory)
isearcher = search.IndexSearcher(ireader)

### Query

---

#### Boolean Query

##### Single Field Boolean Query

In [5]:
#query = categories: food and beer 
b1 = BooleanQuery.Builder()
b1.add(TermQuery(Term("categories", "food")), BooleanClause.Occur.MUST)
b1.add(TermQuery(Term("categories", "beer")), BooleanClause.Occur.MUST)
bq1 = b1.build()

In [6]:
b1_hits = isearcher.search(bq1,10).scoreDocs

In [7]:
for hit in b1_hits:
    hitDoc = isearcher.doc(hit.doc)
    print('Business_id: '+hitDoc['business_id']+'\t'+
          'Name: '+hitDoc['name']+'\t'+
          'Address: '+hitDoc['address']+'\t'+
          'Categories: '+hitDoc['categories']
         )
    print('-'*100)

Business_id: 7lZBRWIBam0oAwFFOBvBhA	Name: Belmont Station	Address: 4500 SE Stark St	Categories: Nightlife, Bars, Beer Bar, Beer, Wine & Spirits, Beer Gardens, Food
----------------------------------------------------------------------------------------------------
Business_id: 3eGvOTVpmJ6B_dKwfGgXNw	Name: Hop City Craft Beer and Wine	Address: 99 Krog St NE	Categories: Beer Gardens, Food, Beer Bar, Beer, Wine & Spirits, Bars, Nightlife
----------------------------------------------------------------------------------------------------
Business_id: _eMpuJiJkBkG-j-I8CGOJg	Name: Imperial Bottle Shop & Taproom	Address: 2006 NE Alberta St	Categories: Beer Gardens, Nightlife, Beer, Wine & Spirits, Food, Bars, Beer Bar
----------------------------------------------------------------------------------------------------
Business_id: 4QrP2MbpEC6LLUD0L_OVLw	Name: Scout Beer Garden	Address: 3201 SW Moody Ave	Categories: Food, Beer Gardens, Beer, Wine & Spirits, Food Trucks, Nightlife
--------------

##### Multi Fields Boolean Query

In [60]:
#query = city: portland ; categories: shopping and restaurant 
b2 = BooleanQuery.Builder()
b2.add(TermQuery(Term("city", "portland")), BooleanClause.Occur.MUST)
b2.add(TermQuery(Term("categories", "korean")), BooleanClause.Occur.MUST)
b2.add(TermQuery(Term("categories", "barbeque")), BooleanClause.Occur.MUST)
bq2 = b2.build()

In [61]:
b2_hits = isearcher.search(bq2,10).scoreDocs

In [62]:
for hit in b2_hits:
    hitDoc = isearcher.doc(hit.doc)
    print('Business_id: '+hitDoc['business_id']+'\t'+
          'City: '+ hitDoc['city']+'\t'+
          'Name: '+hitDoc['name']+'\t'+
          'Address: '+hitDoc['address']+'\t'+
          'Categories: '+hitDoc['categories']
         )
    print('-'*100)

Business_id: 5Vrg_1JXv0B6pvkdLVxvnw	City: Portland	Name: Kkoki Korean BBQ - Portland	Address: 8001 SE Powell Blvd, Ste O	Categories: Barbeque, Korean, Restaurants
----------------------------------------------------------------------------------------------------
Business_id: TfkE5yyo7VOjJWn7-TsERA	City: Portland	Name: K-Town Korean BBQ	Address: 5450 SE 82nd Ave	Categories: Korean, Restaurants, Barbeque
----------------------------------------------------------------------------------------------------
Business_id: iis2N7u4j8L18wNCz8GGJg	City: Portland	Name: Toji Korean Grill House	Address: 4615 SE Hawthorne Blvd	Categories: Vietnamese, Barbeque, Korean, Restaurants
----------------------------------------------------------------------------------------------------
Business_id: IEoxvVxtMpqHDyom4Ad6Tw	City: Portland	Name: Sokongdong Tofu & BBQ	Address: 2850 SE 82nd Ave, Ste 11	Categories: Barbeque, Asian Fusion, Korean, Restaurants
-------------------------------------------------------

---

#### Ranking Query

In [11]:
#default model : BM25 with k_1 = 1.2 and b = 0.75
isearcher.getSimilarity()

<Similarity: BM25(k1=1.2,b=0.75)>

##### Single Field Ranking Query

In [57]:
#query= 'chinese food with good service'
r1 = QueryParser('review',StandardAnalyzer())
rk1 = r1.parse('chinese food with good service')

In [58]:
rk1_hits=isearcher.search(rk1,10).scoreDocs

In [59]:
for hit in rk1_hits:
    hitDoc = isearcher.doc(hit.doc)
    print('Business_id: '+hitDoc['business_id']+'\t'+
          'Name:'+hitDoc['name']+'\t'+
          'Categories:'+hitDoc['categories']+'\t'+
          'Stars: '+hitDoc['stars']+'\t'+
          'Score:%.2f'%(hit.score))
    print('-'*100)
    

Business_id: wLHLYa0DmjARl7tE2GSZdw	Name:Great Taste Chinese Restaurant	Categories:Chinese, Restaurants	Stars: 4.0	Score:6.45
----------------------------------------------------------------------------------------------------
Business_id: tHcTQimgBc0Bj1JyuAPU3w	Name:3-6-9 Chinese Restaurant	Categories:Chinese, Restaurants	Stars: 3.5	Score:6.42
----------------------------------------------------------------------------------------------------
Business_id: rLOIp4WVoSV_sJFMpXJEVA	Name:Green House Chinese Restaurants	Categories:Hong Kong Style Cafe, Cantonese, Chinese, Restaurants	Stars: 4.0	Score:6.40
----------------------------------------------------------------------------------------------------
Business_id: Hpw3-9IUXpN1yBpweeFmag	Name:East Chinatown Restaurant	Categories:Seafood, Restaurants, Soup, Chinese, Cantonese	Stars: 3.0	Score:6.35
----------------------------------------------------------------------------------------------------
Business_id: ZrQ1UB8GEQde1eHJJuz8Ug	Name:Ch

##### Multi Field Ranking Query

In [15]:
#query = 'portland salons with wifi'
multiFiled_query=  PythonMultiFieldQueryParser(['city','attributes','categories','review'],StandardAnalyzer())
multiFiled_query.setDefaultOperator(QueryParser.Operator.AND)
mrq = multiFiled_query.parse('portland salons with wifi',['city','attributes','categories','review'],
                     [BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD],
                     StandardAnalyzer())

In [16]:
mrq_hits=isearcher.search(mrq,10).scoreDocs
for hit in mrq_hits:
    hitDoc = isearcher.doc(hit.doc)
    print('Business_id: '+hitDoc['business_id']+'\t'+
          'City: '+ hitDoc['city']+'\t'+
          'Name:'+hitDoc['name']+'\t'+
          'Categories:'+hitDoc['categories']+'\t'+
          'Stars: '+hitDoc['stars']+'\t'+
          'Score:%.2f'%(hit.score))
    print('-'*100)
    

Business_id: 63sw2U3K_CgimD8claSkAg	City: Portland	Name:Moda Studios-Ceanna Lee	Categories:Beauty & Spas, Men's Hair Salons, Hair Salons, Hair Stylists	Stars: 4.5	Score:7.83
----------------------------------------------------------------------------------------------------
Business_id: fFoJtbo7K_7-NjJubxD4SA	City: Portland	Name:Luxury Nails & Foot Massage	Categories:Hair Removal, Waxing, Massage, Beauty & Spas, Day Spas, Nail Salons	Stars: 4.0	Score:7.59
----------------------------------------------------------------------------------------------------
Business_id: y_xHlaTePE7qnf2AO1-OrA	City: Portland	Name:Color Treats	Categories:Waxing, Nail Salons, Massage, Hair Removal, Beauty & Spas, Nail Technicians	Stars: 5.0	Score:6.95
----------------------------------------------------------------------------------------------------
Business_id: LmZvaw7W4UMhpsFJWUzy3w	City: Portland	Name:Silver Cloud Inn Portland	Categories:Hotels, Event Planning & Services, Hotels & Travel	Stars: 4.0	Score

---

#### Spatial Query

In [17]:
#function to calculate the distance between two ponits
def geodistance(lng1,lat1,lng2,lat2):
    lng1, lat1, lng2, lat2 = map(radians, [float(lng1), float(lat1), float(lng2), float(lat2)]) 
    dlon=lng2-lng1
    dlat=lat2-lat1
    a=sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    distance=2*asin(sqrt(a))*6371*1000 
    distance=round(distance/1000,3)
    return distance

In [19]:
#spatial information 
cur_location = ( 45.588906,-122.593331)
radius = 5000.
geo_q=LatLonPoint.newDistanceQuery(
'location',
    cur_location[0],
    cur_location[1],
    5000.
)

#keyword information
key_q = BooleanQuery.Builder()
key_q.add(TermQuery(Term("categories", "chinese")), BooleanClause.Occur.SHOULD)
key_q.add(TermQuery(Term("categories", "restaurant")), BooleanClause.Occur.SHOULD)
key_q.add(geo_q,BooleanClause.Occur.MUST)
geo_key1 = key_q.build()

#query = categories: chinese and restaurant within 5000. meters away

In [20]:
b1_hits = isearcher.search(geo_key1,10).scoreDocs
for hit in b1_hits:
    hitDoc = isearcher.doc(hit.doc)
    
    print('City:'+hitDoc['city']+'\t'+
          'Name:'+hitDoc['name']+'\t'+
          'Address:'+hitDoc['address']+'\t'+
          'Categories:'+hitDoc['categories']+'\t'+
          'Distance:'+str(geodistance(hitDoc['long'],hitDoc['lat'] , cur_location[1],cur_location[0]))+ 'km'
    )
    print('-'*100)
          

City:Vancouver	Name:Lucky Garden Restaurant	Address:10204 NE Mill Plain Blvd	Categories:Chinese, Restaurants	Distance:4.092km
----------------------------------------------------------------------------------------------------
City:Vancouver	Name:Chopsticks Restaurant	Address:7601 E Mill Plain Blvd	Categories:Chinese, Restaurants	Distance:3.966km
----------------------------------------------------------------------------------------------------
City:Vancouver	Name:Ming's Restaurant	Address:11909 SE Mill Plain Blvd	Categories:Chinese, Restaurants	Distance:4.842km
----------------------------------------------------------------------------------------------------
City:Portland	Name:China Wok	Address:6033 NE Win Sivers Dr, Ste D	Categories:Chinese, Restaurants	Distance:4.928km
----------------------------------------------------------------------------------------------------
City:Vancouver	Name:Fa Fa Gourmet Chinese Restaurant	Address:11712 NE 4th Plain Rd	Categories:Chinese, Restaurant

---

#### Numerical Range Query

In [55]:
#query = categories: bar  with stars higher than 4.5
range_q = FloatPoint.newRangeQuery('stars',4.5,5.)
rb = BooleanQuery.Builder()
rb.add(TermQuery(Term("categories", "bar")), BooleanClause.Occur.MUST)
rb.add(TermQuery(Term("city", "portland")), BooleanClause.Occur.MUST)
rb.add(range_q,BooleanClause.Occur.SHOULD)
rbq = rb.build()

In [56]:
rbq_hits = isearcher.search(rbq,10).scoreDocs
for hit in rbq_hits:
    hitDoc = isearcher.doc(hit.doc)
    print('City: '+hitDoc['city']+'\t'+
          'Name: '+hitDoc['name']+'\t'+
          'Star: '+hitDoc['stars']+'\t'+
          'Categories:'+hitDoc['categories']
    )
    print('-'*100)

City: Portland	Name: Beer O'Clock	Star: 4.5	Categories:Bars, Beer Bar, Nightlife
----------------------------------------------------------------------------------------------------
City: Portland	Name: Lombard House	Star: 5.0	Categories:Pubs, Beer Bar, Bars, Nightlife
----------------------------------------------------------------------------------------------------
City: Portland	Name: Barrio	Star: 5.0	Categories:Wine Bars, Bars, Beer Bar, Nightlife
----------------------------------------------------------------------------------------------------
City: Portland	Name: Mayfly Taproom And Bottle Shop	Star: 5.0	Categories:Beer Bar, Nightlife, Bars, Breweries, Food
----------------------------------------------------------------------------------------------------
City: Portland	Name: BeerQuest Walking Tours	Star: 4.5	Categories:Breweries, Walking Tours, Beer Tours, Bars, Pubs, Nightlife, Hotels & Travel, Beer Bar, Tours, Food, Bar Crawl
------------------------------------------------