This cell will load and define the initial data.

In [83]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.neighbors import NearestNeighbors
from difflib import get_close_matches

In [84]:
# Step 1: Prepare the DataFrame with categorical CPU and GPU values
data_duy = pd.read_csv('laptop_duy.csv',usecols=['Laptop_name','Price(VND)','GPU','CPU','RAM(GB)','Storage(GB)','Screen_size(inches)'])
data_huyen = pd.read_csv('clean_laptop.csv',usecols=['Laptop_name','Price(VND)','GPU','CPU','RAM(GB)','Storage(GB)','Screen_size(inches)'])

# Concatenate DataFrames vertically
data = pd.concat([data_duy, data_huyen])
# Save the DataFrame to a CSV file
data.to_csv('alldata.csv', index=False)

In [85]:
# Convert all values to lowercase
data = data.apply(lambda col: col.map(lambda x: x.lower() if isinstance(x, str) else x))
# Set 'laptop_name' as the index column
data.set_index('Laptop_name', inplace=True)

Label Encoding

In [87]:
# Initialize label encoders
cpu_encoder = LabelEncoder()
gpu_encoder = LabelEncoder()

# Encode CPU and GPU columns
data['CPU'] = cpu_encoder.fit_transform(data['CPU'])
data['GPU'] = gpu_encoder.fit_transform(data['GPU'])

Data Preprocessing

In [88]:
# Step 1: Preprocess the data

features = ['CPU', 'GPU', 'RAM(GB)', 'Storage(GB)', 'Screen_size(inches)','Price(VND)']

# Drop rows with missing values in relevant features
data = data.dropna(subset=features)

scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[features])


In [93]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 927 entries,  laptop hp gaming victus 16-e0170ax 4r0u7pa  to macbook pro 14 inch m2 max 2023
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Price(VND)           927 non-null    float64
 1   GPU                  927 non-null    int64  
 2   CPU                  927 non-null    int64  
 3   RAM(GB)              927 non-null    float64
 4   Storage(GB)          927 non-null    float64
 5   Screen_size(inches)  927 non-null    float64
dtypes: float64(4), int64(2)
memory usage: 83.0+ KB


Model Initialization

In [89]:
k = 5  # Adjustable 'k' value
knn = NearestNeighbors(n_neighbors=k, metric='euclidean')
knn.fit(data_scaled)


User Input

In [None]:
def get_user_preferences():
    print("Enter your preferences for each feature:")
    
    # Collect CPU model name and handle unseen values
    cpu_name = input("CPU (e.g., amd ryzen 7 6800h): ")
    if cpu_name not in cpu_encoder.classes_:
        # Suggest the closest match
        closest_cpu = get_close_matches(cpu_name, cpu_encoder.classes_, n=1)
        if closest_cpu:
            print(f"'{cpu_name}' not found. Did you mean '{closest_cpu[0]}'? Using '{closest_cpu[0]}' as fallback.")
            cpu_name = closest_cpu[0]
        else:
            print(f"Error: '{cpu_name}' not found and no close matches are available.")
            return None, None, None  # Stop and prompt the user to try again
    
    # Collect GPU model name and handle unseen values
    gpu_name = input("Graphic Card (e.g., nvidia geforce rtx 3050): ")
    if gpu_name not in gpu_encoder.classes_:
        # Suggest the closest match
        closest_gpu = get_close_matches(gpu_name, gpu_encoder.classes_, n=1)
        if closest_gpu:
            print(f"'{gpu_name}' not found. Did you mean '{closest_gpu[0]}'? Using '{closest_gpu[0]}' as fallback.")
            gpu_name = closest_gpu[0]
        else:
            print(f"Error: '{gpu_name}' not found and no close matches are available.")
            return None, None, None  # Stop and prompt the user to try again

    # Encode CPU and GPU names using the fitted label encoders
    cpu = cpu_encoder.transform([cpu_name])[0]
    gpu = gpu_encoder.transform([gpu_name])[0]
    
    # Collecting other preferences as before
    ram = int(input("RAM (e.g., 16 for 16 GB): "))
    storage = int(input("Storage (e.g., 512 for 512 GB): "))
    screen_size = float(input("Screen Size (e.g., 13.4 for 13.4 inch): "))
    min_price = float(input("Minimum Price (e.g., 6000000 for 6 mill(VND)): "))
    max_price = float(input("Maximum Price (e.g., 100000000 for 100 mill(VND)): "))
    
    return [[cpu, gpu, ram, storage, screen_size,(min_price+max_price)/2]], min_price, max_price


Recommendation Logic

In [91]:
def recommend_laptops(user_input):
    # Transform user input with the same scaler
    user_input_scaled = scaler.transform(user_input)
    # Find nearest neighbors
    distances, indices = knn.kneighbors(user_input_scaled)
    # Retrieve recommendations
    recommended_laptops = data.iloc[indices[0]].copy()
    recommended_laptops['Distance'] = distances[0]
    # Filter based on price range
    recommended_laptops = recommended_laptops[
        (recommended_laptops['Price'] >= min_price) & (recommended_laptops['Price'] <= max_price)
    ]
    return recommended_laptops


Execution

In [92]:
# Step 6: Retrieve user preferences
user_input, min_price, max_price = get_user_preferences()
if user_input is None:
    print("User input was invalid.")
else:
    # Filter the dataset based on price range
    data_filtered = data[(data['Price(VND)'] >= min_price) & (data['Price(VND)'] <= max_price)]
    if data_filtered.empty:
        print("No laptops found within the specified price range.")
    else:
        # Scale the filtered data
        data_filtered_scaled = scaler.transform(data_filtered[features])

        # Transform and scale the user input
        user_input_scaled = scaler.transform(user_input)

        # Find nearest neighbors
        distances, indices = knn.kneighbors(user_input_scaled)

        # Map the recommended indices back to the original DataFrame
        recommended_indices = indices[0]
        recommended_laptops = data.iloc[recommended_indices]

        # Filter the recommendations based on the price range again
        recommended_laptops = recommended_laptops[(recommended_laptops['Price(VND)'] >= min_price) & (recommended_laptops['Price(VND)'] <= max_price)]
        # Decode the CPU and GPU labels to display the original names
        recommended_laptops['CPU'] = cpu_encoder.inverse_transform(recommended_laptops['CPU'])
        recommended_laptops['GPU'] = gpu_encoder.inverse_transform(recommended_laptops['GPU'])
        
        print("Recommended Laptops with Full Specifications:\n", recommended_laptops[['CPU', 'GPU', 'RAM(GB)', 'Storage(GB)', 'Screen_size(inches)', 'Price(VND)']])
        

Enter your preferences for each feature:
Recommended Laptops with Full Specifications:
                                                              CPU  \
Laptop_name                                                        
laptop ai hp victus 16-s0138ax - 9q985pa      amd ryzen 7 7840hs   
laptop gaming hp victus 16-s0142ax - 9q989pa  amd ryzen 5 7640hs   
laptop ai hp victus 16-s0139ax - 9q986pa      amd ryzen 7 7840hs   
laptop ai hp victus 16-s0141ax - 9q988pa      amd ryzen 7 7840hs   
laptop ai hp victus 16-s0140ax - 9q987pa      amd ryzen 7 7840hs   

                                                                  GPU  \
Laptop_name                                                             
laptop ai hp victus 16-s0138ax - 9q985pa      nvidia geforce rtx 4070   
laptop gaming hp victus 16-s0142ax - 9q989pa  nvidia geforce rtx 4060   
laptop ai hp victus 16-s0139ax - 9q986pa      nvidia geforce rtx 4060   
laptop ai hp victus 16-s0141ax - 9q988pa      nvidia geforce rtx 4050 

