<a href="https://colab.research.google.com/github/Monali2219/Festival-Data-Analysis-/blob/main/laptop_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd

# Load the data to inspect its contents
file_path = '/content/amazon_laptop_prices_v01.csv'
laptop_data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
laptop_data.head()


Unnamed: 0,brand,model,screen_size,color,harddisk,cpu,ram,OS,special_features,graphics,graphics_coprocessor,cpu_speed,rating,price
0,ROKC,,14 Inches,Blue,1000 GB,Intel Core i7,8 GB,Windows 11,,Integrated,Intel,1.2 GHz,,$589.99
1,HP,,15.6 Inches,Silver,1000 GB,Intel Core i5,64 GB,Windows 11 Pro,Backlit Keyboard,Integrated,Intel,,4.5,$999.99
2,MSI,Vector GP66 12UGS-267,15.66 Inches,Core Black,,Intel Core i9,32 GB,Windows 11 Home,,Dedicated,,1.8 GHz,5.0,"$1,599.00"
3,Apple,MacBook Air,13.3 Inches,Silver,256 GB,Unknown,8 GB,Mac OS,Backlit Keyboard,Integrated,,,4.8,$689.99
4,Apple,MacBook Air,15.3 Inches,Midnight,256 GB,Unknown,8 GB,Mac OS,,Integrated,,,4.8,"$1,144.48"


## **EDA**

In [3]:
# Performing Exploratory Data Analysis (EDA) and preprocessing on the laptop dataset - Monali debug.

# Check for null values, data types, and unique values in each column
eda_report = {
    "Column": laptop_data.columns,
    "Data Type": laptop_data.dtypes,
    "Null Count": laptop_data.isnull().sum(),
    "Unique Values": laptop_data.nunique()
 }
eda_df = pd.DataFrame(eda_report)


# Cleaning and Preprocessing steps
# 1. Convert price and rating columns to appropriate numeric types and handle any NaN values

# Remove '$' and convert 'price' column to float
laptop_data['price'] = laptop_data['price'].replace('[\$,]', '', regex=True).astype(float)

# Convert 'rating' column to float, and replace NaN values with 0 (assuming unrated laptops)
laptop_data['rating'] = pd.to_numeric(laptop_data['rating'], errors='coerce').fillna(0)

# 2. Standardizing text data in RAM and hard disk columns to numerical values for analysis
# Extract numerical values for RAM and harddisk and handle NaNs
laptop_data['ram'] = laptop_data['ram'].str.extract('(\d+)').astype(float).fillna(0)
laptop_data['harddisk'] = laptop_data['harddisk'].str.extract('(\d+)').astype(float).fillna(0)

# 3. Fill or drop remaining NaNs for categorical columns (like brand, OS) as per analysis needs
# For simplicity, replace NaN with 'Unknown' in categorical fields
laptop_data.fillna(value={'brand': 'Unknown', 'model': 'Unknown', 'screen_size': 'Unknown',
                          'color': 'Unknown', 'cpu': 'Unknown', 'OS': 'Unknown',
                          'special_features': 'None', 'graphics': 'Unknown',
                          'graphics_coprocessor': 'Unknown', 'cpu_speed': 'Unknown'}, inplace=True)

# Display cleaned data sample after preprocessing
laptop_data_sample = laptop_data.iloc[4445:4446, 4:7]  # Displaying selected rows for user verification
laptop_data_sample




Unnamed: 0,harddisk,cpu,ram
4445,256.0,Core i5,0.0


In [8]:
# Performing Exploratory Data Analysis (EDA) and preprocessing on the laptop dataset - Asha debug

# Check for null values, data types, and unique values in each column
eda_report = {
    "Column": laptop_data.columns,
    "Data Type": laptop_data.dtypes,
    "Null Count": laptop_data.isnull().sum(),
    "Unique Values": laptop_data.nunique()
}
eda_df = pd.DataFrame(eda_report)


# Cleaning and Preprocessing steps
# 1. Convert price and rating columns to appropriate numeric types and handle any NaN values

# Remove '$' and convert 'price' column to float
laptop_data['price'] = laptop_data['price'].replace('[\$,]', '', regex=True).astype(float)

# Convert 'rating' column to float, and replace NaN values with 0 (assuming unrated laptops)
laptop_data['rating'] = pd.to_numeric(laptop_data['rating'], errors='coerce').fillna(0)

# 2. Standardizing text data in RAM and hard disk columns to numerical values for analysis
# Extract numerical values for RAM and harddisk and handle NaNs
# laptop_data['ram'] = laptop_data['ram'].str.extract('(\d+)').astype(float).fillna(0)
# laptop_data['harddisk'] = laptop_data['harddisk'].str.extract('(\d+)').astype(float).fillna(0)

# 3. Fill or drop remaining NaNs for categorical columns (like brand, OS) as per analysis needs
# For simplicity, replace NaN with 'Unknown' in categorical fields
laptop_data.fillna(value={'brand': 'Unknown', 'model': 'Unknown', 'screen_size': 'Unknown',
                          'color': 'Unknown', 'cpu': 'Unknown', 'OS': 'Unknown',
                          'special_features': 'None', 'graphics': 'Unknown',
                          'graphics_coprocessor': 'Unknown', 'cpu_speed': 'Unknown'}, inplace=True)

# Display cleaned data sample after preprocessing
laptop_data_sample = laptop_data.iloc[4445:4446, 4:7]  # Displaying selected rows for user verification
laptop_data_sample
eda_df

Unnamed: 0,Column,Data Type,Null Count,Unique Values
brand,brand,object,0,50
model,model,object,0,1084
screen_size,screen_size,object,0,36
color,color,object,0,188
harddisk,harddisk,float64,0,32
cpu,cpu,object,0,141
ram,ram,float64,0,18
OS,OS,object,0,49
special_features,special_features,object,0,189
graphics,graphics,object,0,110


In [9]:
laptop_data.head()

Unnamed: 0,brand,model,screen_size,color,harddisk,cpu,ram,OS,special_features,graphics,graphics_coprocessor,cpu_speed,rating,price
0,ROKC,Unknown,14 Inches,Blue,1000.0,Intel Core i7,8.0,Windows 11,,Integrated,Intel,1.2 GHz,0.0,589.99
1,HP,Unknown,15.6 Inches,Silver,1000.0,Intel Core i5,64.0,Windows 11 Pro,Backlit Keyboard,Integrated,Intel,Unknown,4.5,999.99
2,MSI,Vector GP66 12UGS-267,15.66 Inches,Core Black,0.0,Intel Core i9,32.0,Windows 11 Home,,Dedicated,Unknown,1.8 GHz,5.0,1599.0
3,Apple,MacBook Air,13.3 Inches,Silver,256.0,Unknown,8.0,Mac OS,Backlit Keyboard,Integrated,Unknown,Unknown,4.8,689.99
4,Apple,MacBook Air,15.3 Inches,Midnight,256.0,Unknown,8.0,Mac OS,,Integrated,Unknown,Unknown,4.8,1144.48


In [5]:
laptop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4446 entries, 0 to 4445
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   brand                 4446 non-null   object 
 1   model                 4446 non-null   object 
 2   screen_size           4446 non-null   object 
 3   color                 4446 non-null   object 
 4   harddisk              4446 non-null   float64
 5   cpu                   4446 non-null   object 
 6   ram                   4446 non-null   float64
 7   OS                    4446 non-null   object 
 8   special_features      4446 non-null   object 
 9   graphics              4446 non-null   object 
 10  graphics_coprocessor  4446 non-null   object 
 11  cpu_speed             4446 non-null   object 
 12  rating                4446 non-null   float64
 13  price                 4441 non-null   float64
dtypes: float64(4), object(10)
memory usage: 486.4+ KB


In [6]:
laptop_data.head

In [19]:
# Simple Recommendation App in Python using the laptop dataset - Godson

def recommend_laptops(brand=None, min_ram=None, min_storage=None, max_price=None, min_rating=None):
    """
    Recommend laptops based on user preferences.

    Parameters:
    - brand (str): Preferred laptop brand
    - min_ram (str): Minimum RAM required (e.g., '8 GB')
    - min_storage (str): Minimum storage required (e.g., '256 GB')
    - max_price (float): Maximum price user is willing to pay
    - min_rating (float): Minimum rating required

    Returns:
    - DataFrame: Filtered DataFrame with recommended laptops
    """

    # Convert price and rating columns to appropriate data types for filtering
    laptop_data['price'] = laptop_data['price'].replace('[\$,]', '', regex=True).astype(float)
    laptop_data['rating'] = pd.to_numeric(laptop_data['rating'], errors='coerce')
    print(brand, min_ram, min_storage, max_price, min_rating)
    # Filter based on the user's preferences
    filtered_data = laptop_data.copy()

    # Filter by brand if specified
    if brand:
        filtered_data = filtered_data[filtered_data['brand'].str.contains(brand, case=False, na=False)]

    # Filter by minimum RAM if specified
    if min_ram:
        filtered_data = filtered_data[pd.to_numeric(filtered_data['ram'], error='coerce')>= float(min_ram.split()[0])]

    # Filter by minimum storage if specified
    if min_storage:
        filtered_data = filtered_data[filtered_data['harddisk'].str.extract('(\d+)').astype(float) >= float(min_storage.split()[0])]

    # Filter by maximum price if specified
    if max_price:
        filtered_data = filtered_data[filtered_data['price'] <= max_price]

    # Filter by minimum rating if specified
    if min_rating:
        filtered_data = filtered_data[filtered_data['rating'] >= min_rating]

    # Sort the filtered data by rating and price for better recommendations
    # filtered_data = filtered_data.sort_values(by=['rating', 'price'], ascending=[False, True])
    print(filtered_data)
    # Display the results to the user
   # import ace_tools as tools; tools.display_dataframe_to_user(name="Recommended Laptops", dataframe=filtered_data)
    return filtered_data
eda_df



Unnamed: 0,Column,Data Type,Null Count,Unique Values
brand,brand,object,0,50
model,model,object,0,1084
screen_size,screen_size,object,0,36
color,color,object,0,188
harddisk,harddisk,float64,0,32
cpu,cpu,object,0,141
ram,ram,float64,0,18
OS,OS,object,0,49
special_features,special_features,object,0,189
graphics,graphics,object,0,110


In [20]:
# Simple Recommendation App in Python using the laptop dataset - Godson

def recommend_laptops(brand=None, min_ram=None, min_storage=None, max_price=None, min_rating=None):
    """
    Recommend laptops based on user preferences.

    Parameters:
    - brand (str): Preferred laptop brand
    - min_ram (str): Minimum RAM required (e.g., '8 GB')
    - min_storage (str): Minimum storage required (e.g., '256 GB')
    - max_price (float): Maximum price user is willing to pay
    - min_rating (float): Minimum rating required

    Returns:
    - DataFrame: Filtered DataFrame with recommended laptops
    """


In [24]:
# Example usage of the recommendation function
recommend_laptops(brand='Apple', min_ram='8 GB',min_storage='256 GB', max_price=1500, min_rating=4.5)

In [16]:
# Using the recommendation function to filter laptops under $1000 with good ratings (e.g., rating >= 4.0)

# Setting criteria for budget-friendly and highly-rated laptops
recommend_laptops(max_price=1000, min_rating=4.0)


None None None 1000 4.0
      brand          model  screen_size       color  harddisk            cpu  \
1        HP        Unknown  15.6 Inches      Silver    1000.0  Intel Core i5   
3     Apple    MacBook Air  13.3 Inches      Silver     256.0        Unknown   
5      Acer  A315-24P-R7VH  15.6 Inches      Silver     128.0        Ryzen 3   
6     Apple    MacBook Pro  13.3 Inches  Space Gray     256.0        Unknown   
7      Acer      CB315-3HT  15.6 Inches      Silver      64.0  Celeron N4020   
...     ...            ...          ...         ...       ...            ...   
4414     HP        Unknown  15.6 Inches       Black    1000.0  Pentium N5000   
4421     HP        Unknown  15.6 Inches      Silver    1000.0  Intel Core i5   
4434   ROKC        Unknown  15.6 Inches        Blue    1152.0        Pentium   
4435     HP        Unknown    14 Inches   Rose Gold      64.0  Celeron N4000   
4436     HP        Unknown  15.6 Inches       Black    1000.0  Pentium N5000   

       ram     

Unnamed: 0,brand,model,screen_size,color,harddisk,cpu,ram,OS,special_features,graphics,graphics_coprocessor,cpu_speed,rating,price
1,HP,Unknown,15.6 Inches,Silver,1000.0,Intel Core i5,64.0,Windows 11 Pro,Backlit Keyboard,Integrated,Intel,Unknown,4.5,999.99
3,Apple,MacBook Air,13.3 Inches,Silver,256.0,Unknown,8.0,Mac OS,Backlit Keyboard,Integrated,Unknown,Unknown,4.8,689.99
5,Acer,A315-24P-R7VH,15.6 Inches,Silver,128.0,Ryzen 3,8.0,Windows 11 S,Backlit Keyboard,Integrated,Unknown,Unknown,4.5,299.99
6,Apple,MacBook Pro,13.3 Inches,Space Gray,256.0,Unknown,8.0,Mac OS,Backlit Keyboard,Integrated,Unknown,Unknown,4.7,965.08
7,Acer,CB315-3HT,15.6 Inches,Silver,64.0,Celeron N4020,4.0,Chrome OS,Support Stylus,Integrated,Unknown,Unknown,4.4,239.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4414,HP,Unknown,15.6 Inches,Black,1000.0,Pentium N5000,16.0,Windows 11,,Integrated,Intel,1.1 GHz,4.4,459.99
4421,HP,Unknown,15.6 Inches,Silver,1000.0,Intel Core i5,64.0,Windows 11 Pro,Backlit Keyboard,Integrated,Intel,Unknown,4.5,999.99
4434,ROKC,Unknown,15.6 Inches,Blue,1152.0,Pentium,20.0,Windows 11,,Integrated,Intel,1.1 GHz,5.0,389.99
4435,HP,Unknown,14 Inches,Rose Gold,64.0,Celeron N4000,16.0,Windows 11,,Integrated,Intel,1.1 GHz,4.7,389.99


In [25]:
# prompt: why is the output of recommend_laptops empty dataframe

def recommend_laptops(brand=None, min_ram=None, min_storage=None, max_price=None, min_rating=None):
  """
  Recommend laptops based on user preferences.

  Parameters:
  - brand (str): Preferred laptop brand
  - min_ram (str): Minimum RAM required (e.g., '8 GB')
  - min_storage (str): Minimum storage required (e.g., '256 GB')
  - max_price (float): Maximum price user is willing to pay
  - min_rating (float): Minimum rating required

  Returns:
  - DataFrame: Filtered DataFrame with recommended laptops
  """

# Convert price and rating columns to appropriate data types for filtering
#laptop_data['price'] = pd.to_numeric(laptop_data['price'], errors='coerce')
#laptop_data['rating'] = pd.to_numeric(laptop_data['rating'], errors='coerce')

  # Filter based on the user's preferences
  filtered_data = laptop_data.copy()

  # Filter by brand if specified
  if brand:
    filtered_data = filtered_data[filtered_data['brand'].str.contains(brand, case=False, na=False)]

  # # Filter by minimum RAM if specified
  if min_ram:
    filtered_data =  filtered_data[pd.to_numeric(filtered_data['ram'], errors='coerce') >= float(min_ram.split()[0])]

  # # Filter by minimum storage if specified
  if min_storage:
    filtered_data = filtered_data[pd.to_numeric(filtered_data['harddisk'], errors='coerce') >= float(min_storage.split()[0])]

  # # Filter by maximum price if specified
  if max_price:
    filtered_data = filtered_data[filtered_data['price'] <= max_price]

  # # Filter by minimum rating if specified
  if min_rating:
    filtered_data = filtered_data[filtered_data['rating'] >= min_rating]

  # Sort the filtered data by rating and price for better recommendations
  filtered_data = filtered_data.sort_values(by=['rating', 'price'], ascending=[False, True])

  return filtered_data

# Example usage of the recommendation function
recommend_laptops(brand='Apple', min_ram='8 GB',min_storage='256 GB', max_price=1500, min_rating=4.5)

Unnamed: 0,brand,model,screen_size,color,harddisk,cpu,ram,OS,special_features,graphics,graphics_coprocessor,cpu_speed,rating,price
111,Apple,"2022 Apple MacBook Air M2, 16GB RAM, 256GB Sto...",13.6 Inches,Unknown,256.0,Apple M1,16.0,macOS 12 Monterey,,Integrated,Unknown,3.4,5.0,1255.94
3,Apple,MacBook Air,13.3 Inches,Silver,256.0,Unknown,8.0,Mac OS,Backlit Keyboard,Integrated,Unknown,Unknown,4.8,689.99
4,Apple,MacBook Air,15.3 Inches,Midnight,256.0,Unknown,8.0,Mac OS,,Integrated,Unknown,Unknown,4.8,1144.48
86,Apple,"2022 Apple MacBook Air M2, 16GB RAM, 512GB Sto...",13.6 Inches,Unknown,512.0,Apple M1,16.0,macOS 12 Monterey,Fingerprint Reader,Integrated,Apple Integrated Graphics,Unknown,4.8,1459.94
6,Apple,MacBook Pro,13.3 Inches,Space Gray,256.0,Unknown,8.0,Mac OS,Backlit Keyboard,Integrated,Unknown,Unknown,4.7,965.08
95,Apple,MacBook Pro,13 Inches,Unknown,256.0,Apple M1,16.0,macOS 12 Monterey,,Integrated,Unknown,3.4 GHz,4.7,1457.99
