In [4]:
#Import Modules
import os
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

In [9]:
#Load Database (pickle_file)
pickle_folder = ".\pickle_file"

In [10]:
#Load all pickle files and merge into single database
database = []
for pickle_file in os.listdir(pickle_folder):
    if pickle_file.endswith('.pkl'):
        with open(os.path.join(pickle_folder, pickle_file), "rb") as f:
            database.extend(pickle.load(f))

In [11]:
#Simple Search
def simple_search(query, num_results=5):
    results = []
    query = query.lower()
    for entry in database:
        if query in entry['Processed Text'].lower():
            results.append(entry)
    return results[:num_results]

In [12]:
#TF-IDF Search
def tfidf_search(query, num_results=5):
    documents = [entry['Processed Text'] for entry in database]
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(documents)
    query_vec = vectorizer.transform([query.lower()])
    scores = (tfidf_matrix*query_vec.T).toarray().flatten()
    top_indices = scores.argsort()[-num_results:][::-1]
    return [database[i] for i in top_indices]

In [13]:
def svd_search(query, num_results=5):
    documents = [entry['Processed Text'] for entry in database]
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(documents)
    svd = TruncatedSVD(n_components=100)
    svd_matrix = svd.fit_transform(tfidf_matrix)
    query_vec = svd.transform(vectorizer.transform([query.lower()]))
    scores = (svd_matrix @ query_vec.T).flatten()
    top_indices = scores.argsort()[-num_results:][::-1]
    return [database[i] for i in top_indices]

In [27]:
#Testing

In [23]:
query = "shop"
print("Simple Search:", simple_search(query))

Simple Search: [{'TEXT': ' Sonny was the only one happy there. His wife and daughters always seemed washed with dread and exhaustion. You would probably go past the small shop owned by Miss Jeanne and think the bottles of dinner mints and sweet plums dull.', 'NR': 38, 'Book ID': 'A Map to the Door of No Return - Dionne Brand', 'Processed Text': 'sonni wa one happi wife daughter alway wash dread exhaust would probabl past small shop miss think bottl dinner mint sweet plum dull'}, {'TEXT': ' Being in the Diaspora braces itself in virtuosity or despair. 9 One has this sense as one observes bodies in the Diaspora, virtuosity or despair, on the brink of both. A body pushing a grocery cart through the city housing at Lawrence and Bathurst in Toronto, her laundry, her shopping all contained there, dressed as if on her way to a party, gold chain around her neck, lipstick — as if moving with all her possessions.', 'NR': 122, 'Book ID': 'A Map to the Door of No Return - Dionne Brand', 'Processed

In [24]:
query = "shop"
print("TF-IDF Search:", tfidf_search(query))

TF-IDF Search: [{'TEXT': 'Early on, we did everything we could to avoid exhibiting work in a white cube and everything it stood for. We showed in shop windows, homes, shopping centers, cafes, gardens. But always, the work became about the space itself, or the context, rather than the ideas we wished to explore.', 'NR': 623, 'Book ID': 'Speculative Everything by Anthony Dunne', 'Processed Text': 'earli everyth could avoid work white cube everyth stood shop window home shop center garden alway work space context rather idea wish explor'}, {'TEXT': ' I remember another shebeen — a rum shop, it’s called there — in a village in Dominica. One night some friends and I drive through country darkness, stopped where the car could go no further, and climbed a hill, bumping into tree stumps, arriving at a rum shop.', 'NR': 443, 'Book ID': 'A Map to the Door of No Return - Dionne Brand', 'Processed Text': 'rememb anoth shebeen rum shop villag one night friend drive countri dark stop car could hill 

In [26]:
query = "shop"
print("SVD Search:", svd_search(query))

SVD Search: [{'TEXT': ' Figure 11.1 Van den Broek and Bakema, Het Dorp, Arnhem, Netherlands, 1963–65, aerial perspective view from the southwest (‘Het Dorp: A Village’) Photo: Collection Het Nieuwe Instituut/BROX Figure 11.2 Het Dorp, (a) (above) view of shops from plaza; (b) (below) residents in road among low housing units near the top of the site Photos: author Several things are striking about the village Bakema and the Dutch public built.', 'NR': 1786, 'Book ID': 'Architecture and the Body by Kim Sexton', 'Processed Text': 'figur van den het dorp aerial perspect view southwest het dorp villag photo collect het figur het dorp view shop plaza resid road among low hous unit near top site photo author sever thing strike villag dutch public built'}, {'TEXT': '1 Van den Broek and Bakema, Het Dorp, Arnhem, Netherlands, 1963–65 11.2 Het Dorp, view of shops from plaza; residents in road among low housing units near the top of the site 11.3 Het Dorp, sectional elevations of building phase 4