In [51]:
import spacy
import pandas as pd
import difflib

# Load laptop data from CSV
laptops_df = pd.read_csv("laptops_cleaned.csv")
nlp = spacy.load("en_core_web_lg")

def preprocess_text(text):
    """Preprocess the text using spaCy."""
    doc = nlp(text)
    # Extract price, RAM, and GPU information
    price = None
    ram = None
    gpu = None
    # Initialize variables to store the words between "and" and "GPU"
    words_between = []
    # Iterate through tokens in the document
    for i, token in enumerate(doc):
        # Check if the token text is 'price'
        if token.text.lower() == '$':
            # Check if the next token resembles a number
            if i + 1 < len(doc) and doc[i + 1].like_num:
                # Extract the numerical value of the next token
                price = float(doc[i + 1].text)
        elif token.text.lower() == 'gb':
            # Check if the next token resembles a number
            if i + 1 < len(doc) and doc[i - 1].like_num:
                # Extract the numerical value of the next token
                ram = int(doc[i - 1].text)

        # Check if the token text is 'nvidia' or 'amd'
        if token.text.lower() in ['nvidia', 'amd', 'intel']:
            words_between.append(token.text)
            for j in range(i + 1, len(doc)):
                if doc[j].text.lower() in [".", "and", "gpu", ","]:
                    break
                else:
                    words_between.append(doc[j].text)

    # Convert the list of words between 'and' and 'GPU' to a string
    gpu = ' '.join(words_between)
    return price, ram, gpu

def recommend_laptop(price, ram, gpu):
    """Recommend a laptop based on price, RAM, and GPU."""
    # Filter laptops based on user input
    filtered_laptops = laptops_df
    if price:
        filtered_laptops = filtered_laptops[filtered_laptops['Price_euros'] <= price]
    if ram:
        filtered_laptops = filtered_laptops[filtered_laptops['Ram'] >= ram]
    # if gpu:
    #     filtered_laptops = filtered_laptops[filtered_laptops['Gpu'] == gpu]   
    if gpu:
        if not len(similar_gpus) == 0:
            filtered_laptops = filtered_laptops[filtered_laptops['Gpu'].isin(similar_gpus)]
        else:
            similar_gpus1 = []
            words = gpu.split()
            user_gpu = words[0]
            for gpu in gpus_list:
                similarity = difflib.SequenceMatcher(None, user_gpu.lower(), gpu.lower()).ratio()
                if similarity >= 0.4:
                    similar_gpus1.append(gpu)
            filtered_laptops = filtered_laptops[filtered_laptops['Gpu'].isin(similar_gpus1)]
    print(len(filtered_laptops))
    # Sort laptops by price and return the top recommendation
    if not filtered_laptops.empty:
        return filtered_laptops.sort_values(by='Price_euros').iloc[0]
        #return filtered_laptops
    else:
        return "No laptops match the specified criteria."

sample_text = "I want a laptop with a ram of 1GB, around $1200 and AMD Radeon 520 gpu"
sample_price, sample_ram, sample_gpu = preprocess_text(sample_text)

def find_similar_gpu(user_gpu, gpus_list, threshold=0.8):
    """
    Find GPUs in the list that are similar to the user-specified GPU.
    """
    similar_gpus = []
    words = user_gpu.split()
    for gpu in gpus_list:
        similarity = difflib.SequenceMatcher(None, user_gpu.lower(), gpu.lower()).ratio()
        if len(words) == 1:
            if similarity >= 0.4:
                similar_gpus.append(gpu)
        elif len(words) == 2:
            if similarity >= 0.6:
                similar_gpus.append(gpu)
        else:
            if similarity >= 0.8:
                similar_gpus.append(gpu)
    return similar_gpus

gpus_list = laptops_df['Gpu'].unique().tolist()
print(gpus_list)
similar_gpus = []
if sample_gpu in gpus_list:
    similar_gpus.append(sample_gpu)
else:
    similar_gpus = find_similar_gpu(sample_gpu, gpus_list)

# if (len(similar_gpus) == 0):
#     similar_gpus.append(sample_gpu)
print("\n",similar_gpus)
# Recommend a laptop based on sample inputs
recommendation = recommend_laptop(sample_price, sample_ram, sample_gpu)
print(recommendation)

['Intel Iris Plus Graphics 640', 'Intel HD Graphics 6000', 'Intel HD Graphics 620', 'AMD Radeon Pro 455', 'Intel Iris Plus Graphics 650', 'AMD Radeon R5', 'Intel Iris Pro Graphics', 'Nvidia GeForce MX150', 'Intel UHD Graphics 620', 'Intel HD Graphics 520', 'AMD Radeon Pro 555', 'AMD Radeon R5 M430', 'Intel HD Graphics 615', 'AMD Radeon Pro 560', 'Nvidia GeForce 940MX', 'Intel HD Graphics 400', 'Nvidia GeForce GTX 1050', 'AMD Radeon R2', 'AMD Radeon 530', 'Nvidia GeForce 930MX', 'Intel HD Graphics', 'Intel HD Graphics 500', 'Nvidia GeForce 930MX ', 'Nvidia GeForce GTX 1060', 'Nvidia GeForce 150MX', 'Intel Iris Graphics 540', 'AMD Radeon RX 580', 'Nvidia GeForce 920MX', 'AMD Radeon R4 Graphics', 'AMD Radeon 520', 'Nvidia GeForce GTX 1070', 'Nvidia GeForce GTX 1050 Ti', 'Nvidia GeForce MX130', 'AMD R4 Graphics', 'Nvidia GeForce GTX 940MX', 'AMD Radeon RX 560', 'Nvidia GeForce 920M', 'AMD Radeon R7 M445', 'AMD Radeon RX 550', 'Nvidia GeForce GTX 1050M', 'Intel HD Graphics 515', 'AMD Radeon