<a href="https://colab.research.google.com/github/PodiliSripoojitha/uprod/blob/main/Laptop_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
# Load the dataset
file_path = "/content/Dataset_Amazon.xlsx"
df = pd.read_excel(file_path, sheet_name='Sheet1')
df

Unnamed: 0,url,url.1,ASIN,title,price,rating
0,https://www.amazon.com/s?k=hp+laptops&crid=2H0...,,,,,
1,,,,"Pavilion 15.6"" HD Touchscreen Anti-Glare Lapto...",404.99,
2,,,B0947BJ67M,"14 Laptop, Intel Celeron N4020, 4 GB RAM, 64 G...",177.99,4.1
3,,,B0CKTZVB17,"17 Laptop, 17.3"" HD+ Touchscreen Display, 12th...",840.00,4.2
4,,,B0DYF98C59,"2025 Pavilion 15.6"" Touchscreen Laptop Compute...",599.99,5.0
...,...,...,...,...,...,...
1441,,,B075Y72PHZ,"Laptop Backpack B210, 15.6-Inch Laptop/Tablet,...",16.99,4.6
1442,,,B0DQCXYN3Q,"V-Series V15 Laptop, 15.6"" FHD Display, Intel ...",389.00,4.0
1443,,,B0B1V2KKZC,"ThinkPad P16s (16"" 4K UHD (3840x2160) OLED, AM...",1699.99,4.4
1444,,,B0C9VPR1SY,"Yoga 7i 2-in-1 16"" WUXGA Touchscreen Laptop, I...",598.00,4.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1446 entries, 0 to 1445
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   url     92 non-null     object 
 1   url.1   0 non-null      float64
 2   ASIN    1352 non-null   object 
 3   title   1444 non-null   object 
 4   price   1379 non-null   float64
 5   rating  1280 non-null   float64
dtypes: float64(3), object(3)
memory usage: 67.9+ KB


In [None]:
# Data Cleaning
df.drop(['url', 'url.1'], axis=1, inplace=True)
df.dropna(subset=['ASIN', 'title', 'price', 'rating'], inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
# Feature Engineering
df['price_scaled'] = (df['price'] - df['price'].min()) / (df['price'].max() - df['price'].min())
df['rating_scaled'] = df['rating'] / 5.0

In [None]:
# Combine Features, handling missing columns
desired_columns = ['title', 'price_scaled', 'rating_scaled', 'display_size', 'review', 'processor_type',
                   'brand', 'ram_memory_installed', 'operating_system', 'cpu_model_manufacturer',
                   'hard_drive_size', 'human_interface_input', 'hard_disk_description',
                   'special_features', 'graphics_coprocessor', 'laptop_weight', 'battery_life']

In [None]:
available_columns = [col for col in desired_columns if col in df.columns]
df['combined_features'] = df[available_columns].fillna('').astype(str).apply(' '.join, axis=1)


In [None]:
# Feature Extraction (TF-IDF Vectorization)
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['combined_features'].fillna(''))

In [None]:
# Recommendation Function
def recommend_products(product_details, min_price=0, max_price=1000, min_rating=4.0, top_n=5):
    query_vector = vectorizer.transform([product_details])
    similarity_scores = cosine_similarity(query_vector, X).flatten()
    df_filtered = df[(df['price'] >= min_price) & (df['price'] <= max_price) & (df['rating'] >= min_rating)]
    filtered_indices = df_filtered.index
    top_indices = sorted(filtered_indices, key=lambda i: similarity_scores[i], reverse=True)[:top_n]

In [None]:
    # Include 'ASIN' in the columns to be selected
    available_columns = ['ASIN', 'title', 'price', 'rating', 'display_size', 'review', 'processor_type', 'brand', 'ram_memory_installed', 'operating_system', 'cpu_model_manufacturer', 'hard_drive_size', 'human_interface_input', 'hard_disk_description', 'special_features', 'graphics_coprocessor', 'laptop_weight', 'battery_life']

In [None]:
    # Filter available columns to ensure they exist in the dataframe
    available_columns = [col for col in available_columns if col in df.columns]

In [None]:
# Recommendation Function
def recommend_products(product_details, min_price=0, max_price=1000, min_rating=4.0, top_n=5):
    query_vector = vectorizer.transform([product_details])
    similarity_scores = cosine_similarity(query_vector, X).flatten()
    df_filtered = df[(df['price'] >= min_price) & (df['price'] <= max_price) & (df['rating'] >= min_rating)]
    filtered_indices = df_filtered.index
    top_indices = sorted(filtered_indices, key=lambda i: similarity_scores[i], reverse=True)[:top_n]

    # Include 'ASIN' in the columns to be selected
    available_columns = ['ASIN', 'title', 'price', 'rating', 'display_size', 'review', 'processor_type', 'brand', 'ram_memory_installed', 'operating_system', 'cpu_model_manufacturer', 'hard_drive_size', 'human_interface_input', 'hard_disk_description', 'special_features', 'graphics_coprocessor', 'laptop_weight', 'battery_life']

    # Filter available columns to ensure they exist in the dataframe
    available_columns = [col for col in available_columns if col in df.columns]

    df_recommendations = df.iloc[top_indices][available_columns].copy()
    df_recommendations['product_link'] = "https://www.amazon.com/dp/" + df_recommendations['ASIN']

    return df_recommendations # Return the recommendations dataframe

In [None]:
# Example usage
product_input = "Touchscreen Laptop 15.6 inch 8GB RAM 1TB Backlit Keyboard Battery 1080P resolution integrated graphic card fingerprint reader windows 11pro operating system i7 CPU model"
price_limit = 1000
rating_limit = 5
recommendations = recommend_products(product_input, max_price=price_limit, min_rating=rating_limit)
print(recommendations)

            ASIN                                              title   price  \
1078  B0D4R89CH8  ThinkBook 15 Gen 4 Business Laptop, 15.6" FHD ...  729.99   
580   B0DQVS1HF5  Inspiron 15 Touchscreen Business Laptop | Inte...  699.99   
926   B0DSL82MWZ  Galaxy Book4 AI Business Laptop [windows 11 Pr...  799.00   
469   B0CN3XC5CL  Inspiron 3000 Series 3520 Laptop, 15.6" FHD To...  421.95   
569   B0D96RQD82  Inspiron 16" 7630 Business Convertible 2-in-1 ...  999.99   

      rating                          product_link  
1078     5.0  https://www.amazon.com/dp/B0D4R89CH8  
580      5.0  https://www.amazon.com/dp/B0DQVS1HF5  
926      5.0  https://www.amazon.com/dp/B0DSL82MWZ  
469      5.0  https://www.amazon.com/dp/B0CN3XC5CL  
569      5.0  https://www.amazon.com/dp/B0D96RQD82  


In [None]:
!apt-get install -y xvfb
!pip install pyvirtualdisplay

import os
os.system('Xvfb :1 -screen 0 1600x1200x16 &')
os.environ['DISPLAY'] = ':1'

from pyvirtualdisplay import Display
display = Display(visible=0, size=(1600, 1200))
display.start()

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.14).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl.metadata (943 bytes)
Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0


<pyvirtualdisplay.display.Display at 0x7e097bc2a850>

In [None]:
#User Interface

import ipywidgets as widgets
from IPython.display import display, clear_output

# Create input widgets
query_input = widgets.Textarea(
    value='',
    placeholder='Enter your query here...',
    description='Query:',
    disabled=False
)

min_price_input = widgets.FloatText(
    value=0,
    description='Min Price:',
    disabled=False
)

max_price_input = widgets.FloatText(
    value=1000,
    description='Max Price:',
    disabled=False
)

min_rating_input = widgets.FloatSlider(
    value=4.0,
    min=0.0,
    max=5.0,
    step=0.1,
    description='Min Rating:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)

top_n_input = widgets.IntText(
    value=5,
    description='Top N:',
    disabled=False
)

output = widgets.Output()

def on_button_clicked(b):
    with output:
        clear_output()
        product_input = query_input.value
        min_price = min_price_input.value
        max_price = max_price_input.value
        min_rating = min_rating_input.value
        top_n = top_n_input.value

        try:
            recommendations = recommend_products(product_input, min_price=min_price, max_price=max_price, min_rating=min_rating, top_n=top_n)
            display(recommendations)
        except Exception as e:
            print(f"An error occurred: {e}")


button = widgets.Button(description="Recommend Products")
button.on_click(on_button_clicked)


display(query_input, min_price_input, max_price_input, min_rating_input, top_n_input, button, output)



Textarea(value='', description='Query:', placeholder='Enter your query here...')

FloatText(value=0.0, description='Min Price:')

FloatText(value=1000.0, description='Max Price:')

FloatSlider(value=4.0, continuous_update=False, description='Min Rating:', max=5.0, readout_format='.1f')

IntText(value=5, description='Top N:')

Button(description='Recommend Products', style=ButtonStyle())

Output()

In [None]:
# prompt: create a separate website for this recommendation system use tinker

# This code is not compatible with creating a separate website.
# It's designed to run within a Jupyter Notebook or Google Colab environment.

# To create a separate website, you would need to use a web framework like Flask or Django,
# and deploy the application to a web server.  You would need to rewrite the code
# to integrate with such a framework, handle HTTP requests, and render HTML templates.
# The code below remains the same as in the original request, with no modification
# for website deployment.

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os
from pyvirtualdisplay import Display
import ipywidgets as widgets
from IPython.display import display, clear_output

# ... (rest of your existing code remains unchanged)
