## Importing the neccessary libraries and modules

In [1]:
import streamlit as st
import pandas as pd
import gzip
import json
import matplotlib.pyplot as plt

## Dataset Directory Path

In [2]:
file_path = "Dataset\meta_AMAZON_FASHION.json.gz"

## Function to read the gzipped JSON file

In [3]:
def read_gzipped_json(file_path, num_records=5):
    records = []
    with gzip.open(file_path, 'r') as file:
        for i, line in enumerate(file):
            if i >= num_records:
                break
            record = json.loads(line)
            records.append(record)
    return records

# Reading the first few records
sample_records = read_gzipped_json(file_path)
sample_records

[{'title': 'Slime Time Fall Fest [With CDROM and Collector Cards and Neutron Balls, Incredi-Ball and Glow Stick Necklace, Paper Fram',
  'image': ['https://images-na.ssl-images-amazon.com/images/I/51bSrINiWpL._US40_.jpg'],
  'brand': 'Group Publishing (CO)',
  'feature': ['Product Dimensions:\n                    \n8.7 x 3.6 x 11.4 inches',
   'Shipping Weight:\n                    \n2.4 pounds'],
  'rank': '13,052,976inClothing,Shoesamp;Jewelry(',
  'date': '8.70 inches',
  'asin': '0764443682'},
 {'title': "XCC Qi promise new spider snake preparing men's accessories alloy fittings magnet buckle bracelet jewelry",
  'image': ['https://images-na.ssl-images-amazon.com/images/I/41BCH7-4vHL._US40_.jpg',
   'https://images-na.ssl-images-amazon.com/images/I/41FpbB--SoL._US40_.jpg',
   'https://images-na.ssl-images-amazon.com/images/I/51twQYJo0NL._US40_.jpg'],
  'rank': '11,654,581inClothing,Shoesamp;Jewelry(',
  'date': '5 star',
  'asin': '1291691480'},
 {'description': ['For the professio

## Function to load the entire dataset and preprocess it

In [4]:
def load_and_preprocess_data(file_path):
    records = []
    with gzip.open(file_path, 'r') as file:
        for line in file:
            record = json.loads(line)
            title = record.get('title', '')
            brand = record.get('brand', '')
            price = record.get('price', None)
            asin = record.get('asin', '')
            records.append((title, brand, price, asin))

    # Creating a DataFrame from the extracted records
    df = pd.DataFrame(records, columns=['Title', 'Brand', 'Price', 'ASIN'])

    # Handling missing values
    df['Price'] = pd.to_numeric(df['Price'].str.replace('$', ''), errors='coerce')
    df['Brand'] = df['Brand'].fillna('Unknown')
    
    return df

# Loading and preprocessing the data
product_database = load_and_preprocess_data(file_path)

# Displaying the first few rows of the product database
product_database.head()


## Function to collect shopper preferences

In [None]:
def get_shopper_preferences(df):
    # Getting the top 10 most common brands
    top_brands = df['Brand'].value_counts().nlargest(10).index.tolist()
    top_brands.append('Any')

    print("Select a brand preference:")
    for i, brand in enumerate(top_brands):
        print(f"{i + 1}. {brand}")

    brand_choice = int(input("Enter the number corresponding to your brand preference (or 0 for no preference): ")) - 1
    selected_brand = top_brands[brand_choice] if brand_choice >= 0 else None

    min_price = input("Enter the minimum price (or 'Any' for no preference): ")
    max_price = input("Enter the maximum price (or 'Any' for no preference): ")

    min_price = float(min_price) if min_price.lower() != 'any' else None
    max_price = float(max_price) if max_price.lower() != 'any' else None

    preferences = {
        'Brand': selected_brand,
        'Min_Price': min_price,
        'Max_Price': max_price
    }
    
    return preferences

# NOTE: The following code will ask for user input. Uncomment and run it in a local environment to test the interface.
# shopper_preferences = get_shopper_preferences(product_database)
# shopper_preferences


## Function to recommend products based on shopper preferences

In [None]:
def recommend_products(df, preferences):
    # Filter by brand if specified
    if preferences['Brand'] and preferences['Brand'] != 'Any':
        df = df[df['Brand'] == preferences['Brand']]
    
    # Filter by minimum price if specified
    if preferences['Min_Price']:
        df = df[df['Price'] >= preferences['Min_Price']]
    
    # Filter by maximum price if specified
    if preferences['Max_Price']:
        df = df[df['Price'] <= preferences['Max_Price']]
    
    return df

# Example preferences for testing
example_preferences = {
    'Brand': 'Any',
    'Min_Price': 10,
    'Max_Price': 50
}

# Getting recommendations based on the example preferences
recommended_products_example = recommend_products(product_database, example_preferences)

# Displaying the first few recommended products
recommended_products_example.head()


Unnamed: 0,Title,Brand,Price,ASIN
17,"X. L. Carbon Fiber Money Clip, made in the USA",Roar Carbon,14.99,9654263246
69,Buxton Heiress Pik-Me-Up Framed Case,Buxton,16.95,B00007GDFV
118,Moria Orc Costume TM Mask from Lord of the Rings,,45.46,B0000AOE9U
272,Patriots Reebok Men's NFL Super Bowl XXXVIII L...,,39.99,B0001B97J8
331,Art Nouveau Sterling Silver Ornate Repousse He...,Silver Insanity,44.66,B00023JX9Y
