In [None]:
!pip install python-Levenshtein

Collecting python-Levenshtein
  Downloading python_Levenshtein-0.26.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.26.1 (from python-Levenshtein)
  Downloading levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.26.1->python-Levenshtein)
  Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading python_Levenshtein-0.26.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.7/162.7 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages:

In [None]:
#"d0538c68-2a77-4af5-b406-bdb5002a6f32"
import requests
import json
import Levenshtein
from google.colab import files

url = "https://graphql.canopyapi.co/"

headers = {
    "Content-Type": "application/json",
    #"API-KEY":"eeda37d1-b608-408c-aafd-9f9d48a63d64",
}

query = """
    query amazonProduct($searchKeyWord: String!) {
        amazonProductSearchResults(
            input: {
                searchTerm: $searchKeyWord,
                domain: CA
            }) {
            productResults {
                results {
                    title
                    brand
                    url
                    isNew
                    price {
                        display
                    }
                    rating
                    mainImageUrl
                }
            }
        }
    }
"""

def fetch_amazon_products(search_keyword):
    #Fetches Amazon products using the GraphQL API.
    variables = {"searchKeyWord": search_keyword}
    payload = {"query": query, "variables": variables}

    response = requests.post(url, headers=headers, json=payload)

    if response.status_code == 200:
        data = response.json()
        return data.get("data", {}).get("amazonProductSearchResults", {}).get("productResults", {}).get("results", [])
    else:
        print(f"Error {response.status_code}: {response.text}")
        return []

#Similarity search


def calculate_similarity(title, search_keyword):
    #Calculates similarity between the search keyword and product title using Levenshtein distance.
    return 1 - (Levenshtein.distance(title.lower(), search_keyword.lower()) / max(len(title), len(search_keyword)))

def filter_relevant_results(results, search_keyword, max_results=10, threshold=0.4):

    #Filters products using nearest neighbor logic with Levenshtein similarity.
    #- Ensures up to `max_results` are returned.
    #- Adjusted threshold lower to include more matches.

    scored_results = [(product, calculate_similarity(product.get("title", ""), search_keyword)) for product in results]

    # Sort all results by similarity score
    scored_results.sort(key=lambda x: x[1], reverse=True)

    # Filter only those above the similarity threshold
    relevant_results = [item[0] for item in scored_results if item[1] >= threshold]

    # If not enough results pass the threshold, return top `max_results` anyway
    if len(relevant_results) < max_results:
        relevant_results = [item[0] for item in scored_results[:max_results]]

    return relevant_results[:max_results]  # Ensure we only return `max_results` items

def generate_html(results):
    #Generates an HTML table with product details.
    if not results:
        return "<p>No relevant products found.</p>"

    html = """
    <html>
    <head>
        <title>Amazon Product Search Results</title>
        <style>
            table { width: 100%%; border-collapse: collapse; }
            th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
            th { background-color: #f4f4f4; }
            img { width: 100px; height: auto; }
        </style>
    </head>
    <body>
        <h2>Amazon Product Search Results</h2>
        <table>
            <tr>
                <th>Image</th>
                <th>Title</th>
                <th>Brand</th>
                <th>Price</th>
                <th>Rating</th>
                <th>Link</th>
            </tr>
    """

    for product in results:
        html += f"""
            <tr>
                <td><img src="{product.get('mainImageUrl', '#')}" alt="Product Image"></td>
                <td>{product.get('title', 'N/A')}</td>
                <td>{product.get('brand', 'N/A')}</td>
                <td>{product.get('price', {}).get('display', 'Not Available') if product.get('price') else 'Not Available'}</td>
                <td>{product.get('rating', 'N/A')}</td>
                <td><a href="{product.get('url', '#')}" target="_blank">View Product</a></td>
            </tr>
        """

    html += """
        </table>
    </body>
    </html>
    """
    return html

def generate_json(results):
    #Generates a JSON representation of the filtered results.
    return json.dumps(results, indent=4)

# Example usage
search_keyword = "Columbia shoes"
max_suggestions = 10  # Set max number of suggestions dynamically

all_results = fetch_amazon_products(search_keyword)
filtered_results = filter_relevant_results(all_results, search_keyword, max_results=max_suggestions)

# Generate HTML and JSON outputs
html_output = generate_html(filtered_results)
json_output = generate_json(filtered_results)

# Save files
html_filename = "amazon_results.html"
json_filename = "amazon_results.json"

with open(html_filename, "w", encoding="utf-8") as file:
    file.write(html_output)

with open(json_filename, "w", encoding="utf-8") as file:
    file.write(json_output)

# Download the files in Google Colab
files.download(html_filename)
files.download(json_filename)

print(f"Download started: {html_filename} and {json_filename} with max {max_suggestions} results")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download started: amazon_results.html and amazon_results.json with max 10 results
