In [9]:
# Example code for NLP processing and LLM encoding
import spacy
from transformers import BertTokenizer, BertModel

# Load spaCy for basic NLP tasks
nlp = spacy.load("en_core_web_sm")

In [20]:
# Specify the model name
model_name = "bert-base-uncased"

# Download and save the tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name)

# Download and save the model weights
bert_model = BertModel.from_pretrained(model_name)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [55]:
import spacy

# Load a more comprehensive language model (e.g., en_core_web_lg)
nlp = spacy.load('en_core_web_lg')

# User input
user_input = "I want to explore scuba diving in the Great Barrier Reef organized by a local club."

# NLP processing
doc = nlp(user_input)

# Extract entities including verbs, nouns, and organizations
entities = [(ent.text, ent.label_) for ent in doc.ents] + [(token.text, token.pos_) for token in doc if token.pos_ in ["VERB", "NOUN", "ORG"]]

# Print entities
print(entities)


Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[('the Great Barrier Reef', 'EVENT'), ('want', 'VERB'), ('explore', 'VERB'), ('scuba', 'NOUN'), ('diving', 'NOUN'), ('organized', 'VERB'), ('club', 'NOUN')]


In [56]:
# Sentiment analysis (example using TextBlob)
from textblob import TextBlob
sentiment = TextBlob(user_input).sentiment.polarity

# LLM encoding
tokens = tokenizer(user_input, return_tensors="pt")
encoded_output = bert_model(**tokens)
embedding = encoded_output.last_hidden_state.mean(dim=1).squeeze().detach().numpy()

In [60]:
# ... (Previous code)

# Continue with semantic matching, dynamic query expansion, and database querying...

# Assume you have a database of tour packages with relevant information
package = [
    {
        "name": "Quest in Osaka town in the port town",
        "experience_and_labour_shortage_issue": ["Experience of labor shortage issues","Experience feeding farmed fish by local fishermen",
            "Experience casting a net for inshore fishing by local fishermen",
            "Experience helping with fish preparation and sorting",
            "Experience planting coral reefs while scuba diving",
            "Experience helping pick up trash at the beach"],
        "Natural activity experiences": [
            "Nature activity experience",
            "Canoe experience on the river",
            "Surfing experience in the sea",
            "Free diving fishing experience in the sea",
            "Experience seeing the starry night sky",
            "Limestone cave exploration experience"],
        "Local life experience":[
            "Experience staying in an old folk house",
            "Experience a roundtable discussion with local residents",
            "Experience helping with a town cleaning event with local residents",
            "Tour experience to learn about the history of the town",
            "Experience a dinner party with people of the same generation in town",
            "Experience a free day without interacting with the townspeople",
            "Experience cutting and eating the fish you caught yourself on the spot"]
        "other information" :[
            "X town population is 350 people, average annual income 4.2 million yen",
            "Local industries are fishing and tourism, making it difficult to find a new job",
            "The number of immigrants who love the sea has been increasing in recent years.",
            "Schools are closed and the educational environment and level are low.",
            "The hospital is located about 40 minutes by car from town.",
            "Traditional events are sea festivals held in summer",
            "Most of the residents cannot speak English, so only Japanese is available.",
            "Because it is a seaside town, seafood is more appealing than vegetables.",
            "The internet environment is equipped with Wi-Fi.",
            "Skills gained through experience include knowledge about fishing and practical use of machinery.",
            "Other skills gained"],
    },
    {
        "name": "Quest in Hakone town in the mountains",
        "experience_and_labour_shortage_issue": [
            "Experience of labor shortage issues",
            "Experience feeding farmed fish by local fishermen",
            "Experience casting a net for inshore fishing by local fishermen",
            "Experience helping with fish preparation and sorting",
            "Experience planting coral reefs while scuba diving",
            "Experience helping pick up trash at the beach"
        ],
        "Natural_activity_experiences": [
            "Nature activity experience",
            "Canoe experience on the river",
            "Surfing experience in the sea",
            "Free diving fishing experience in the sea",
            "Experience seeing the starry night sky",
            "Limestone cave exploration experience"
        ],
        "Local_life_experience": [
            "Experience staying in an old folk house",
            "Experience a roundtable discussion with local residents",
            "Experience helping with a town cleaning event with local residents",
            "Tour experience to learn about the history of the town",
            "Experience a dinner party with people of the same generation in town",
            "Experience a free day without interacting with the townspeople",
            "Experience cutting and eating the fish you caught yourself on the spot"
        ],
        "other_information": [
            "Y town population is 1500 people, average annual income 3.6 million yen",
            "The local industry is forestry, and it is difficult to find a new job.",
            "Although it is a closed environment, immigrants are welcomed.",
            "Traditional event is the Bon Odori festival held at the shrine in the summer",
            "Most of the residents cannot speak English, so only Japanese is available."
        ]
    },
    {
        "name": "Quest in Tsurui town in rural area",
        "experiences": [
            "Organic farming tour experience",
            "Harvesting experience at an orchard",
            "Experience planting rice in a rice field",
            "Experience distributing vegetables to residents from vegetable farmers",
            "Experience helping move fertilizer and agricultural equipment",
            "Experience setting traps to prevent animal damage",
            "Tea party experience with a view of the countryside",
            "Yoga/meditation experience on the field",
            "Horseback riding experience",
            "Lake hiking experience",
            "Barbecue experience in the great outdoors",
            "Experience staying in an old folk house",
            "Experience participating in a meeting of the town festival executive committee",
            "Experience making rice with luxurious ingredients only available at farmers",
            "Experience a roundtable discussion with local residents",
            "Experience helping with a town cleaning event with local residents",
            "Experience a dinner party with people of the same generation in town",
            "Experience a free day without interacting with the townspeople",
        ],
        "other_info": [
            "The population of Tsurui Town is 4,200 people, and the average annual income is 5 million yen.",
            "Local industries are agriculture and tourism, and there are many restaurants and companies in the town, making it easy to find a new job.",
            "The number of immigrants has been increasing in recent years, and the town is welcoming them.",
            "There is also support in the form of immigration subsidies.",
            "Traditional event is the Bon Odori festival held at the shrine in the summer",
            "There is a guide who can speak English",
            "Tsurui Town, a rural area, is known as one of the towns in Japan with delicious food because of its abundance of ingredients.",
            "Internet environment can be used anywhere in town",
            "Because it is a rural area, it is low above sea level. There is a risk of being submerged during heavy rain.",
            "Skills gained through experience are agricultural knowledge",
        ],
    }
]


def calculate_similarity(user_entities, user_input_tokens, package_entities):
    # Calculate similarity scores based on user input and package description
    description_similarity = calculate_similarity(user_entities, package_entities)
    
    # Consider experiences for additional relevance
    experiences_similarity = calculate_similarity(user_input_tokens, package_entities)
    
    # Combine similarity scores based on different aspects (you can adjust weights)
    total_similarity = 0.7 * description_similarity + 0.3 * experiences_similarity
    return total_similarity

# Dynamic Query Expansion
# Use more sophisticated methods for dynamic query expansion (e.g., word embeddings or synonyms API)
# For simplicity, I'm leaving this as a placeholder
def expand_query(user_input):
    return user_input + " Synonym1 Synonym2"

expanded_query = expand_query(user_input)

# Database Querying and Ranking
# Rank packages based on the enhanced similarity score
ranked_packages = []

for package in tour_packages:
    package_entities = nlp(" ".join(package["experiences"] + package["other_info"])).ents
    similarity_score = calculate_similarity(entities, nlp(user_input), package_entities)
    ranked_packages.append({"name": package["name"], "score": similarity_score})

# Sort packages based on the similarity score in descending order
ranked_packages = sorted(ranked_packages, key=lambda x: x["score"], reverse=True)

# Present the top suggestions to the user
top_suggestions = [package["name"] for package in ranked_packages[:3]]
print("Top Tour Package Suggestions:", top_suggestions)

# User Feedback Loop
# Assuming the user provides feedback (positive or negative)
user_feedback = "Positive"
# Update the model or ranking based on feedback (not implemented in detail here)
# Feedback can be used to adjust weights, improve recommendations, etc.

# Continue refining and enhancing the system based on user interactions and feedback.

TypeError: calculate_similarity() missing 1 required positional argument: 'package_entities'

In [61]:
pip install pymongo

Note: you may need to restart the kernel to use updated packages.Collecting pymongo
  Downloading pymongo-4.6.1-cp310-cp310-win_amd64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.5.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.6.1-cp310-cp310-win_amd64.whl (472 kB)
   -------------------------------------- 472.7/472.7 kB 477.3 kB/s eta 0:00:00
Downloading dnspython-2.5.0-py3-none-any.whl (305 kB)
   ---------------------------------------- 305.4/305.4 kB 1.1 MB/s eta 0:00:00
Installing collected packages: dnspython, pymongo
Successfully installed dnspython-2.5.0 pymongo-4.6.1




[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: C:\Users\adhna\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [72]:
import pymongo

client = pymongo.MongoClient("mongodb+srv://adhnansyed47:0wc8DPD6ZJIOyEiL@cluster0.blovmsa.mongodb.net/?retryWrites=true&w=majority")
db = client.sample_airbnb
collection = db.listingsAndReviews

In [76]:
items=collection.find().limit(5)
for item in items:
    print(item)

{'_id': '10006546', 'listing_url': 'https://www.airbnb.com/rooms/10006546', 'name': 'Ribeira Charming Duplex', 'summary': 'Fantastic duplex apartment with three bedrooms, located in the historic area of Porto, Ribeira (Cube) - UNESCO World Heritage Site. Centenary building fully rehabilitated, without losing their original character.', 'space': 'Privileged views of the Douro River and Ribeira square, our apartment offers the perfect conditions to discover the history and the charm of Porto. Apartment comfortable, charming, romantic and cozy in the heart of Ribeira. Within walking distance of all the most emblematic places of the city of Porto. The apartment is fully equipped to host 8 people, with cooker, oven, washing machine, dishwasher, microwave, coffee machine (Nespresso) and kettle. The apartment is located in a very typical area of the city that allows to cross with the most picturesque population of the city, welcoming, genuine and happy people that fills the streets with his o

In [83]:
hf_token = "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"
embedding_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"

def generate_embedding(text: str) -> list[float]:

  response = requests.post(
    embedding_url,
    headers={"Authorization": f"Bearer {hf_token}"},
    json={"inputs": text})

  if response.status_code != 200:
    raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")

  return response.json()

In [84]:
print(generate_embedding("aqui is cute"))

[-0.07806715369224548, -0.02445826679468155, 0.04289402440190315, 0.016511421650648117, -0.07045572996139526, 0.008923823945224285, 0.16619059443473816, -0.06125100329518318, 0.08013831824064255, 0.06315667182207108, 0.016574889421463013, -0.07001124322414398, 0.011025178246200085, 0.028087275102734566, -0.0322481207549572, 0.013886924833059311, 0.03624740242958069, -0.04330555349588394, 0.028637537732720375, -0.05636255070567131, -0.07401272654533386, 0.05483758822083473, 0.044280074536800385, 0.044367242604494095, -0.02951008453965187, 0.004079396370798349, 0.015859773382544518, 0.03805127739906311, 0.041639696806669235, -0.09861283749341965, -0.04752468690276146, 0.07272642850875854, 0.09280548989772797, -0.044940926134586334, -0.04762234166264534, 0.02823309600353241, 0.02996356599032879, 0.03435249626636505, -0.02085895836353302, 0.003047858364880085, 0.021668076515197754, -0.032519321888685226, 0.02422153577208519, 0.0041951267048716545, -0.02489510178565979, -0.03941572085022926

In [104]:
for doc in collection.find({'description':{"$exists": True}}).limit(100):
    doc['description_embedding_hf'] = generate_embedding(doc['description'])
    collection.replace_one({'_id': doc['_id']}, doc)



In [112]:
query = "i want hotel with peace atmosphere"

results = collection.aggregate([
  {"$vectorSearch": {
    "queryVector": generate_embedding(query),
    "path": "description_embedding_hf",
    "numCandidates": 100,
    "limit": 4
      ,
    "index": "descriptionSemanticSearch",
      }}
]);

for document in results:
    print(f' Name: {document["name"]},\nhotel des: {document["description"]}\n')

 Name: ,
hotel des: We have 2 cozy rooms available from january 12 to february 6 in a very centered location Hisarüstü, Sarıyer. Easy  access to places like beşiktaş, taksim, mecidiyeköy, istinye, bebek, etiler with just one bus or metro. Balcony with a Bosphorus view:)

 Name: GOLF ROYAL RESIDENCE SUİTES(2+1)-2,
hotel des: A BIG BED ROOM WITH A BIG SALOON INCLUDING A NICE BALAKON TO HAVE SOME FRESH AIR . OUR RESIDENCE SITUATED AT THE CENTRE OF THE IMPORTANT MARKETS SUCH AS NİŞANTAŞİ,OSMANBEY AND TAKSIM SQUARE,

 Name: Studio Apartment w/ Private Rooftop,
hotel des: Our light, cozy studio is in the heart of Sheung Wan and comes with private rooftop with BBQ! A comfortable double bed, kitchen, bathroom, living area with extendable sofa, WIFI and washer. 5 min walk from Central and lots of restaurants next door.

 Name: Big Private BR Steps to Barnard Columbia & Subways,
hotel des: This listing is for a large, comfy private BR in a quiet, sun-filled apartment on  a high floor in a safe, 

In [113]:
import pymongo
import openai

# Set your OpenAI API key
openai.api_key = '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'

client = pymongo.MongoClient("mongodb+srv://adhnansyed47:0wc8DPD6ZJIOyEiL@cluster0.blovmsa.mongodb.net/?retryWrites=true&w=majority")
db = client.sample_mflix
collection = db.embedded_movies

def generate_embedding(text: str) -> list[float]:

    response = openai.Embedding.create(
        model="text-embedding-ada-002", 
        input=text
    )
    return response['data'][0]['embedding']

query = "imaginary characters from outer space at war"

results = collection.aggregate([
  {"$vectorSearch": {
    "queryVector": generate_embedding(query),
    "path": "plot_embedding",
    "numCandidates": 100,
    "limit": 4,
    "index": "PlotSemanticSearch",
      }}
]);

for document in results:
    print(f'Movie Name: {document["title"]},\nMovie Plot: {document["plot"]}\n')

RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

In [92]:
results

<pymongo.command_cursor.CommandCursor at 0x252b0986770>