# Unzip the dataset

In [1]:
!unzip /content/archive.zip

Archive:  /content/archive.zip
  inflating: Temples/Alexandria/Alexandria_Opera_House/0.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/1.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/10.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/11.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/12.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/13.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/14.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/15.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/16.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/17.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/18.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/19.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/2.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/20.jpg  
  inflating: Temples/Alexandria/Alexandria_Opera_House/21.jpg  
  inflating:

# Trial

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import ast

# Load the dataset
file_path = "WanderWise_with_CrowdScores.xlsx"
df = pd.read_excel(file_path)

# Parse CrowdScores column from string to dictionary
df["CrowdScores"] = df["CrowdScores"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# USER INPUT SECTION
user_input = {
    "location": "Luxor",
    "budget": 800,
    "mood": {
        "Family": 0.7,
        "Art": 0.3,
        "Sports": 0.1,
        "History": 0.8,
        "Entertainment": 0.4,
        "Adventure": 0.6
    }
}

# STEP 1: Filter by Location
filtered_df = df[df["Location"].str.lower() == user_input["location"].lower()]

# STEP 2: Filter by Budget
per_item_budget = user_input["budget"] / 3
filtered_df = filtered_df[filtered_df["Price"] <= per_item_budget]

# STEP 3: Compute Mood Similarity
mood_cols = ["Family", "Art", "Sports", "History", "Entertainment", "Adventure"]
item_moods = filtered_df[mood_cols].values
user_mood = np.array([list(user_input["mood"].values())])
similarities = cosine_similarity(item_moods, user_mood).flatten()
filtered_df["MoodSimilarity"] = similarities

# STEP 4: Filter by Crowd Scores
def is_available(crowd_scores):
    return any(score < 0.7 for score in crowd_scores.values())

filtered_df = filtered_df[filtered_df["CrowdScores"].apply(is_available)]

# STEP 5: Recommend Top for Each Type
recommendations = []
for t in ["do", "eat", "stay"]:
    subset = filtered_df[filtered_df["Type"].str.lower() == t]
    if not subset.empty:
        top_item = subset.sort_values("MoodSimilarity", ascending=False).iloc[0]
        recommendations.append(top_item)

# STEP 6: Display Results
result_df = pd.DataFrame(recommendations)
result_df = result_df[["Type", "Trip_Title", "Location", "Price", "Rating", "MoodSimilarity", "CrowdScores"]]
result_df.reset_index(drop=True, inplace=True)
result_df


Unnamed: 0,Type,Trip_Title,Location,Price,Rating,MoodSimilarity,CrowdScores
0,do,Mummification Museum,Luxor,0,4.3,0.966051,"{'8-10': 0.47, '10-12': 0.87, '12-14': 0.87, '..."
1,eat,Arabia cafe,Luxor,0,4.0,0.848554,"{'8-10': 0.42, '10-12': 0.42, '12-14': 0.82, '..."
2,stay,Sofitel Winter Palace Luxor,Luxor,0,4.8,0.874273,"{'8-10': 0.5, '10-12': 0.5, '12-14': 0.5, '14-..."


# Deployement Using Gradio

In [None]:
!pip install transformers gradio --quiet  

import os
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import ast
import gradio as gr
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel

In [4]:
# Load dataset once
df = pd.read_excel("/content/WanderWise_with_CrowdScores.xlsx")
df["CrowdScores"] = df["CrowdScores"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df["Location_lower"] = df["Location"].str.lower()  # Optimization

mood_cols = ["Family", "Art", "Sports", "History", "Entertainment", "Adventure"]


In [5]:
# Load CLIP model once
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model.eval()

def encode_images_recursively(image_folder):
    image_features = []
    image_paths = []
    valid_exts = ['.jpg', '.jpeg', '.png']

    for root, _, files in os.walk(image_folder):
        for img_name in files:
            if not any(img_name.lower().endswith(ext) for ext in valid_exts):
                continue
            img_path = os.path.join(root, img_name)
            try:
                image = Image.open(img_path).convert("RGB")
            except Exception as e:
                print(f"Skipping {img_path} due to error: {e}")
                continue
            inputs = clip_processor(images=image, return_tensors="pt")
            with torch.no_grad():
                img_emb = clip_model.get_image_features(**inputs)
                img_emb = img_emb / img_emb.norm(dim=-1, keepdim=True)
            image_features.append(img_emb)
            image_paths.append(img_path)

    if len(image_features) == 0:
        raise RuntimeError("No valid images found in dataset folder!")
    return torch.cat(image_features), image_paths

# Only run once
image_folder = "/content/Temples"
image_embeddings, image_paths = encode_images_recursively(image_folder)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

In [None]:
# Assigns crowd level labels (Low, Medium, High) with colored indicators based on crowd score thresholds.
def label_crowd(crowd_scores):
    labels = {}
    for slot, score in crowd_scores.items():
        if score < 0.3:
            labels[slot] = f"{slot}: 🟢 Low"
        elif score < 0.7:
            labels[slot] = f"{slot}: 🟡 Medium"
        else:
            labels[slot] = f"{slot}: 🔴 High"
    return labels

def is_time_slot_ok(crowd_scores, start, end):
    # Checks if any crowd score within the given time range is below the medium threshold (0.7), indicating acceptable crowd levels.
    for slot in crowd_scores:
        h1, h2 = map(int, slot.split('-'))
        if start <= h1 < end and crowd_scores[slot] < 0.7:
            return True
    return False

def retrieve_similar_images(query_image, image_embeddings, image_paths, top_k=5):
    # Finds top-k visually similar images to the query image by comparing embeddings using CLIP and returns their paths with similarity scores.
    inputs = clip_processor(images=query_image, return_tensors="pt")
    with torch.no_grad():
        query_emb = clip_model.get_image_features(**inputs)
        query_emb = query_emb / query_emb.norm(dim=-1, keepdim=True)
    similarity = torch.matmul(query_emb, image_embeddings.T)
    top_scores, top_indices = similarity[0].topk(min(top_k, len(image_paths)))
    return [(image_paths[i], top_scores[j].item()) for j, i in enumerate(top_indices)]

In [None]:
def recommend(location, budget, f1, a1, s1, h1, e1, adv1, time_start, time_end, selected_types, image_input, image_folder_name):
    user_mood = np.array([[f1, a1, s1, h1, e1, adv1]])
    per_item_budget = float(budget) / 3
    filtered = df[df["Location_lower"] == location.lower()].copy()

    if filtered.empty:
        return "⚠️ No matching items for your location.", []

    item_moods = filtered[mood_cols].values
    filtered["MoodSimilarity"] = cosine_similarity(item_moods, user_mood).flatten()
    filtered = filtered[filtered["CrowdScores"].apply(lambda cs: is_time_slot_ok(cs, time_start, time_end))]

    if filtered.empty:
        return "⚠️ No activities available in your preferred time window.", []

    results = []
    for t in selected_types:
        sub = filtered[
            (filtered["Type"].str.lower() == t) &
            (filtered["Price"] <= per_item_budget)
        ].sort_values("MoodSimilarity", ascending=False).head(2)

        for _, item in sub.iterrows():
            crowd_labels = label_crowd(item["CrowdScores"])
            formatted_crowd = "<br>".join(crowd_labels.values())
            block = f"""
            <div style='flex: 1; min-width: 280px; max-width: 32%;
                        padding: 15px; margin: 10px; border: 1px solid #ccc;
                        border-radius: 10px; box-shadow: 1px 1px 5px rgba(0,0,0,0.1);'>
                <h3>{t.capitalize()}</h3>
                <b>{item['Trip_Title']}</b><br>
                📍 <i>{item['Location']}</i><br>
                💰 <b>Price:</b> {item['Price']} EGP &nbsp;&nbsp; ⭐ <b>Rating:</b> {item['Rating']}<br>
                🎯 <b>Mood Match:</b> {round(item['MoodSimilarity'], 2)}<br>
                🕒 <b>Crowd Forecast:</b><br>{formatted_crowd}
            </div>
            """
            results.append(block)

    # Image similarity section with uniqueness
    images_to_display = []
    if image_input is not None:
        try:
            if image_folder_name:
                filtered_paths = [p for p in image_paths if image_folder_name in p]
                filtered_embs = [image_embeddings[i] for i, p in enumerate(image_paths) if image_folder_name in p]

                if len(filtered_embs) > 0:
                    filtered_embs_tensor = torch.stack(filtered_embs)
                    top_matches = retrieve_similar_images(image_input, filtered_embs_tensor, filtered_paths, top_k=20)
                else:
                    top_matches = []
            else:
                top_matches = retrieve_similar_images(image_input, image_embeddings, image_paths, top_k=50)

            seen_folders = set()
            unique_matches = []

            for path, score in top_matches:
                folder_name = os.path.basename(os.path.dirname(path))
                if folder_name not in seen_folders:
                    seen_folders.add(folder_name)
                    unique_matches.append((path, score))
                if len(unique_matches) >= 3:  
                    break

            for path, score in unique_matches:
                img_pil = Image.open(path)
                label = f"{os.path.basename(os.path.dirname(path))} — Score: {score:.2f}"
                images_to_display.append((img_pil, label))

        except Exception as e:
            results.append(f"<p style='color:red;'>❌ Error processing uploaded image: {e}</p>")

    html_output = f"""
    <div style='display: flex; flex-wrap: wrap; justify-content: center; gap: 15px;'>
        {''.join(results)}
    </div>
    <div style='text-align:center; margin-top: 20px;'>
        <label for='rate'>⭐ Rate the Recommendation:</label><br>
        <input id='rate' type='range' min='1' max='5' step='1' value='3' onchange="document.getElementById('rate-output').innerHTML = 'You rated this plan <b>' + this.value + ' star' + (this.value == '1' ? '' : 's') + '</b>.'">
        <p id='rate-output'></p>
    </div>
    """
    return html_output, images_to_display

gr.Interface(
    fn=recommend,
    inputs=[
        gr.Textbox(label="Location", placeholder="e.g., Luxor"),
        gr.Number(label="Total Budget (EGP)", placeholder=800),
        gr.Slider(0, 1, value=0.5, label="Family"),
        gr.Slider(0, 1, value=0.5, label="Art"),
        gr.Slider(0, 1, value=0.5, label="Sports"),
        gr.Slider(0, 1, value=0.5, label="History"),
        gr.Slider(0, 1, value=0.5, label="Entertainment"),
        gr.Slider(0, 1, value=0.5, label="Adventure"),
        gr.Slider(8, 18, step=2, value=10, label="Preferred Start Time"),
        gr.Slider(10, 22, step=2, value=18, label="Preferred End Time"),
        gr.CheckboxGroup(["do", "eat", "stay"], value=["do", "eat", "stay"], label="What would you like recommendations for?"),
        gr.Image(type="pil", label="📷 Upload an Image for Visual Matching (Optional)"),
        gr.Textbox(label="Find Similar Places")

    ],
    outputs=[
        gr.HTML(label="🗺️ Your Personalized Plan"),
        gr.Gallery(label="📸 Visually Similar Images", columns=5, height="auto")
    ],
    title="🌍 WanderWise: Smart Tourism Recommender",
    description="Plan your day in Egypt based on mood, budget, timing, and even an image you upload!"
).launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ad70677820dcf85a70.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


