In [5]:
!pip install pandas scikit-learn numpy





[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: C:\Users\prann\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [7]:
################3LOADING JSON DATA############
import json

file_path = "C:\\Users\\prann\\OneDrive\\Desktop\\UPES\\Amazon-Product-Recommendation-System-master\\sourceFiles\\meta_All_Beauty_sample.json"

def load_json(file_path):
    """Load JSON file into a list of dictionaries."""
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            data.append(json.loads(line))
    return data

data = load_json(file_path)
print("Data loaded successfully.")


Data loaded successfully.


In [8]:
#################PREPROCESS DATA###########
import pandas as pd

def preprocess_data(data):
    """Extract relevant fields, clean the data, and handle missing values."""
    processed_data = []
    for item in data:
        try:
            processed_data.append({
                'asin': item.get('asin', ''),
                'title': item.get('title', ''),
                'brand': item.get('brand', ''),
                'rank': int(item.get('rank', '0').split('in')[0].replace(',', '').strip()) if 'rank' in item else None,
                'price': float(item.get('price', '0').replace('$', '').replace(',', '').strip()) if 'price' in item else None,
                'description': item.get('description', [''])[0],
                'image_url': item.get('image', [None])[0]
            })
        except ValueError:
            continue
    
    df = pd.DataFrame(processed_data)
    df['price'] = df['price'].fillna(df['price'].median())  # Handle missing values
    df['rank'] = df['rank'].fillna(df['rank'].median())
    return df

processed_df = preprocess_data(data)
processed_df.to_csv("processed_data.csv", index=False)
print("Data preprocessed and saved as 'processed_data.csv'.")


Data preprocessed and saved as 'processed_data.csv'.


In [12]:
#######################SEARCH FUNCTIONALITY###################
def search_products(query, data, key="title"):
    """Search for products based on a query string."""
    regex = f"(?i).*{query}.*"  # Case-insensitive regex for the query
    results = data[data[key].str.contains(regex, na=False)]  # Filter data based on the query
    
    if results.empty:
        # Suggest similar titles if no exact matches are found
        suggestions = data[key].dropna().sample(n=min(5, len(data))).tolist()  # Random suggestions
        print("No products found. Suggestions:")
        for suggestion in suggestions:
            print(f"- {suggestion}")
    return results

query = "Workout Headphones"  # Example query
search_results = search_products(query, processed_df)  # Search for products matching the query
if not search_results.empty:
    print("Search Results:")
    print(search_results)


Search Results:
          asin                                   title            brand  \
0   0061073717  Workout Headphones by Arena Essentials    HarperCollins   
21  0984893229  Workout Headphones by Arena Essentials              NaN   
22  0990729702  Workout Headphones by Arena Essentials              NaN   
24  0996079203  Workout Headphones by Arena Essentials              NaN   
25  0996212205  Workout Headphones by Arena Essentials   College Pencil   
70  1933228032  Workout Headphones by Arena Essentials  Trivium Pursuit   

       rank   price                                        description  \
0   3235148  13.075                                                NaN   
21  1566583  13.075  Part 2 of the comic strip Queen of the Univers...   
22  1926486  13.075                                                NaN   
24  1352715  13.075  The Ubuntu Activity Guide is the key to enhanc...   
25  2229940  13.075  Get an edge in the competitive world of colleg...   
70  2483858  1

In [10]:
##################RECOMMENDATION ENGINE##############
from sklearn.neighbors import NearestNeighbors

def recommend_products(target_asin, data, num_recommendations=5):
    """Recommend products similar to the target product based on price and rank."""
    features = data[['price', 'rank']].dropna()
    target = data[data['asin'] == target_asin]
    if target.empty:
        print(f"ASIN {target_asin} not found.")
        return []

    nbrs = NearestNeighbors(n_neighbors=num_recommendations + 1, metric='euclidean').fit(features)
    distances, indices = nbrs.kneighbors(target[['price', 'rank']])

    recommendations = data.iloc[indices[0][1:]]  # Exclude the target product itself
    return recommendations

example_asin = "0061073717"
recommendations = recommend_products(example_asin, processed_df)
print("Recommendations:")
print(recommendations)


Recommendations:
           asin                                              title  \
50   1467591424                                   Knucklehead Fred   
79   3939888141              Buch der Erinnerung. Juden in Dresden   
95   7538626107  Unique Custom Cast Iron Liner Shader Tattoo Ma...   
8    0967140196  The Change Management Toolkit for Implementati...   
101  8985802089  Shiseido Aquair Shampoo/Conditioner/Spray SET ...   

               brand     rank   price  \
50                    3218451  13.075   
79   Enter The Arena  3213576  13.075   
95                    3206658  13.075   
8      WinHope Press  3193265  13.075   
101                   3187010  13.075   

                                           description  \
50   KnuckleHead Fred is a whimsical, rhyming story...   
79                                                       
95                                                       
8    The Change Management Toolkit is designed as a...   
101                      

In [13]:
############GUI##################
import tkinter as tk
from tkinter import messagebox
import pandas as pd

# Load Data
processed_df = pd.read_csv("processed_data.csv")

# Search Function
def search(query, data, key="title"):
    results = data[data[key].str.contains(query, case=False, na=False)]
    return results

# Recommendation Function
def recommend(target_asin, data, num_recommendations=5):
    features = data[['price', 'rank']].dropna()
    target = data[data['asin'] == target_asin]
    if target.empty:
        return []

    nbrs = NearestNeighbors(n_neighbors=num_recommendations + 1, metric='euclidean').fit(features)
    distances, indices = nbrs.kneighbors(target[['price', 'rank']])
    recommendations = data.iloc[indices[0][1:]]  # Exclude the target
    return recommendations

# GUI Implementation
def create_gui():
    def perform_search():
        query = search_entry.get()
        results = search(query, processed_df)
        result_list.delete(0, tk.END)
        for _, row in results.iterrows():
            result_list.insert(tk.END, f"{row['asin']}: {row['title']}")

    def show_recommendations():
        try:
            selection = result_list.get(result_list.curselection())
            target_asin = selection.split(":")[0].strip()
            recommendations = recommend(target_asin, processed_df)
            if recommendations.empty:
                messagebox.showinfo("Recommendations", "No recommendations found.")
            else:
                messagebox.showinfo(
                    "Recommendations",
                    "\n".join(f"{row['asin']}: {row['title']}" for _, row in recommendations.iterrows())
                )
        except tk.TclError:
            messagebox.showwarning("Warning", "Please select a product first.")
    
    # Create Main Window
    root = tk.Tk()
    root.title("Product Recommendation System")
    root.geometry("600x400")

    # Search Section
    tk.Label(root, text="Search Products:").pack(pady=5)
    search_entry = tk.Entry(root, width=50)
    search_entry.pack(pady=5)
    tk.Button(root, text="Search", command=perform_search).pack(pady=5)

    # Results Listbox
    result_list = tk.Listbox(root, width=80, height=15)
    result_list.pack(pady=10)

    # Recommendation Button
    tk.Button(root, text="Get Recommendations", command=show_recommendations).pack(pady=5)

    root.mainloop()

# Run the GUI
create_gui()
