In [14]:
!jupyter nbconvert --to script search_tools.ipynb

[NbConvertApp] Converting notebook search_tools.ipynb to script
[NbConvertApp] Writing 1038 bytes to search_tools.py


In [13]:
!jupyter nbconvert --to script search_word.ipynb

[NbConvertApp] Converting notebook search_word.ipynb to script
[NbConvertApp] Writing 3008 bytes to search_word.py


In [3]:
import mistune

ImportError: cannot import name 'AstRenderer' from 'mistune.renderers' (C:\Users\Patrick\anaconda3\envs\urbandesignenv\Lib\site-packages\mistune\renderers\__init__.py)

In [2]:
#Import Modules
import os
import pickle
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
#Load Database (pickle_file)
pickle_folder = os.path.join(".","pickle_file")
pickle_folder = os.path.join(
    "/Users/patrick/Library/CloudStorage/OneDrive-Personal/Documents",
    "007-Study Life",
    "001-Urban Design",
    "RC11_Resources",
    "Pickle_File"
)

In [4]:
#Load all pickle files and merge into single database
database = []
for pickle_file in os.listdir(pickle_folder):
    if pickle_file.endswith('.pkl'):
        with open(os.path.join(pickle_folder, pickle_file), "rb") as f:
            database.extend(pickle.load(f))

In [5]:
#Simple Search
def simple_search(query, num_results=5):
    results = []
    query = query.lower()
    for entry in database:
        if query in entry['KEYWORD'].lower():
            results.append(entry)
    
    results = results[:num_results]
    
    for result in results:
        print(f"TEXT: {result['TEXT']}\nLINE: {result['LINE']}\nBOOK: {result['BOOK']}\nKEYWORD: {result['KEYWORD']}\n")
    
    return results

In [6]:
#TF-IDF Search
def tfidf_search(query, num_results=5):
    documents = [entry['KEYWORD'] for entry in database]
    vectorizer = TfidfVectorizer(min_df=2)
    tfidf_matrix = vectorizer.fit_transform(documents)
    query_vec = vectorizer.transform([query.lower()])
    scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_indices = scores.argsort()[-num_results:][::-1]
    
    results = [database[i] for i in top_indices]
    for result in results:
        print(f"TEXT: {result['TEXT']}\nLINE: {result['LINE']}\nBOOK: {result['BOOK']}\nKEYWORD: {result['KEYWORD']}\n")
    
    return results, vectorizer, tfidf_matrix

In [7]:
def svd_search(query, num_results=5):
    documents = [entry['KEYWORD'] for entry in database]
    vectorizer = TfidfVectorizer(min_df=2)
    tfidf_matrix = vectorizer.fit_transform(documents)
    svd = TruncatedSVD(n_components=100)
    svd_matrix = svd.fit_transform(tfidf_matrix)
    query_vec = svd.transform(vectorizer.transform([query.lower()]))
    scores = cosine_similarity(query_vec, svd_matrix).flatten()
    top_indices = scores.argsort()[-num_results:][::-1]
    top_terms = [vectorizer.get_feature_names_out()[idx] for idx in svd.components_.argsort()[::-1][:10]]
    
    results = [database[i] for i in top_indices]
    for result in results:
        print(f"TEXT: {result['TEXT']}\nLINE: {result['LINE']}\nBOOK: {result['BOOK']}\nKEYWORD: {result['KEYWORD']}\n")
    
    print("Top Keywords:\n")
    for terms in top_terms:
        print(terms)

    return results, top_terms

In [8]:
#Testing
query = "trend shop"

In [9]:
simple_search(query)

[]

In [10]:
tfidf_results, vectorizer, tfidf_matrix = tfidf_search(query)
print("TF-IDF Search:", tfidf_results)

TEXT: ·-•• future insight:social trends, drivers of change, environmental trends, economic trends, political trends- -------------·----------------------------------------------------------------------d!forestry worker perceptions of GM plant products
LINE: 174
BOOK: 017 - Philippe Morel VERB Nature Scan
KEYWORD: futur insight social trend driver chang environment trend econom trend polit trend forestri worker percept plant product

TEXT:  The first classification of these trends was proposed.
LINE: 1843
BOOK: Bernard Feltz Self-organisation and Emergence in Life Sciences
KEYWORD: first classif trend wa

TEXT: • •102 AN APPROACH TO CYBERNETiCSin the same way that the existence of AI must have been. So the man or computer manipulating 0 should. in facl, reward an evoiUlionary trend AI. At ... rather than a particular system. In order to do this, the trend must be recognized, which is much the same as recognising the similarity criteria of Chapter 5. But
LINE: 543
BOOK: Gordon Pask Appro

In [16]:
svd_results, top_terms = svd_search(query)

TEXT:  They had to purchase their supplies: shops and warehouses were constructed. Streets were pushed into the dunes behind the old village; the built-up area spilled south toward the mission and north toward Washerwoman’s Lagoon.
LINE: 1220
BOOK: The Age of Gold - H. W. Brands
KEYWORD: purchas suppli shop warehous street dune behind old villag built area south toward mission north toward washerwoman lagoon

TEXT:  He too defers to the siren as he bends double toward her, but he also engages our gaze as he simultaneously hunches forward and looks back over his shoulder down toward the street. Figure 5.4 Maison du Grand Ecuyer, (a) (above) vielle player; (b) (below) bagpipe player Photos: (a): author; (b): Pierre Blanc The deferent postures of both these musicians suggest that they are either serenading the siren in tribute, or, on the contrary, suggesting an antidote to evil, promoting to passersby the innocent distractions that were popular in both court and street settings.
LINE: 94