In [1]:
pip install beautifulsoup4 requests fake-useragent

Defaulting to user installation because normal site-packages is not writeable
Collecting fake-useragent
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Downloading fake_useragent-2.2.0-py3-none-any.whl (161 kB)
Installing collected packages: fake-useragent
Successfully installed fake-useragent-2.2.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import pandas as pd
import re

# Load dataset
df = pd.read_csv('smartphones_cleaned.csv')

# View basic info
print(df.shape)
print(df.columns)
print(df.head())

# Example: Clean and normalize price column (remove currency symbols, commas, convert to int)
def clean_price(price):
    if pd.isnull(price):
        return None
    price = str(price)
    price = re.sub(r'[^\d]', '', price)  # Remove anything not digit
    return int(price) if price else None

df['price_clean'] = df['price'].apply(clean_price)

# Normalize RAM column (e.g. '6GB' -> 6)
def clean_ram(ram):
    if pd.isnull(ram):
        return None
    ram = str(ram).upper()
    match = re.search(r'(\d+)', ram)
    return int(match.group(1)) if match else None

df['ram_clean'] = df['ram'].apply(clean_ram)

# Similarly, clean storage, battery, camera specs as needed

print(df[['price', 'price_clean', 'ram', 'ram_clean']].head())

(1020, 11)
Index(['model', 'price', 'rating', 'sim', 'processor', 'ram', 'battery',
       'display', 'camera', 'card', 'os'],
      dtype='object')
                       model   price  rating  \
0              OnePlus 11 5G  54,999    89.0   
1  OnePlus Nord CE 2 Lite 5G  19,989    81.0   
2      Samsung Galaxy A14 5G  16,499    75.0   
3       Motorola Moto G62 5G  14,999    81.0   
4         Realme 10 Pro Plus  24,999    82.0   

                                       sim  \
0  Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC   
1       Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi   
2       Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi   
3       Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi   
4       Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi   

                                         processor                        ram  \
0  Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor  12 GB RAM, 256 GB inbuilt   
1     Snapdragon 695, Octa Core, 2.2 GHz Processor   6 GB RAM, 128 GB inbuilt   
2        Exynos 1330, Octa Core, 2.4 GHz P

In [None]:
print(df.isnull().sum())

model            0
price            0
rating         141
sim              0
processor        0
ram              0
battery          0
display          0
camera           1
card             7
os              17
price_clean      0
ram_clean        1
dtype: int64


In [14]:
# Fill missing rating with "N/A" or average
df['rating'] = df['rating'].fillna('N/A')

# Fill missing camera, card, os with "Unknown"
df['camera'] = df['camera'].fillna('Unknown')
df['card'] = df['card'].fillna('Unknown')
df['os'] = df['os'].fillna('Unknown')

# Fill missing ram_clean with median RAM value
df['ram_clean'] = df['ram_clean'].fillna(df['ram_clean'].median())

# Final check
print(df.isnull().sum())


model          0
price          0
rating         0
sim            0
processor      0
ram            0
battery        0
display        0
camera         0
card           0
os             0
price_clean    0
ram_clean      0
dtype: int64


In [22]:
import json
import re
import spacy

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Load your phone data JSON file
with open('phones_data_formatted.json', 'r', encoding='utf-8') as f:
    phones = json.load(f)  # List of phone dicts

def extract_filters(text):
    doc = nlp(text.lower())
    
    # Initialize filters with None or default values
    filters = {
        "price_max": None,
        "battery_min": None,
        "camera_min": None,
        "ram_min": None,
        "os": None,
        "brand": None
    }
    
    # Extract price max (e.g. "under 30k", "below 40000")
    price_patterns = [r'under\s*₹?(\d+[kK]?)', r'below\s*₹?(\d+[kK]?)', r'max(?:imum)?\s*₹?(\d+[kK]?)']
    for pattern in price_patterns:
        match = re.search(pattern, text)
        if match:
            val = match.group(1)
            val = val.lower().replace('k','000')
            filters['price_max'] = int(val)
            break
    
    # Extract battery min (e.g. "battery at least 4000 mAh")
    battery_match = re.search(r'battery.*?(\d{3,5})\s*mAh', text)
    if battery_match:
        filters['battery_min'] = int(battery_match.group(1))
    
    # Extract camera min (e.g. "camera 12 MP")
    camera_match = re.search(r'camera.*?(\d{2,3})\s*mp', text)
    if camera_match:
        filters['camera_min'] = int(camera_match.group(1))
    
    # Extract RAM min (e.g. "at least 6 GB RAM")
    ram_match = re.search(r'(\d{1,2})\s*gb\s*ram', text)
    if ram_match:
        filters['ram_min'] = int(ram_match.group(1))
    
    # Extract OS keywords (Android, iOS)
    if 'android' in text:
        filters['os'] = 'android'
    elif 'ios' in text or 'iphone' in text:
        filters['os'] = 'ios'
    
    # Extract brand (simple check for common brands)
    brands = ['oneplus', 'samsung', 'apple', 'realme', 'xiaomi', 'vivo', 'oppo', 'motorola', 'google']
    for brand in brands:
        if brand in text:
            filters['brand'] = brand
            break
    
    return filters

def phone_matches(phone, filters):
    # price check
    price = int(phone['price'].replace(',', ''))
    if filters['price_max'] and price > filters['price_max']:
        return False
    
    # battery check (extract numeric from "5000 mAh Battery with 100W Fast Charging")
    battery_val = int(re.search(r'(\d+)', phone['battery']).group(1))
    if filters['battery_min'] and battery_val < filters['battery_min']:
        return False
    
    # camera check (take highest MP from e.g. "50 MP + 48 MP + 32 MP Triple Rear")
    mp_list = re.findall(r'(\d{2,3})\s*MP', phone['camera'].upper())
    if mp_list:
        max_camera = max(int(mp) for mp in mp_list)
        if filters['camera_min'] and max_camera < filters['camera_min']:
            return False
    
    # ram check (extract RAM from "12 GB RAM, 256 GB inbuilt")
    ram_match = re.search(r'(\d+)\s*GB\s*RAM', phone['ram'].upper())
    if ram_match:
        ram_val = int(ram_match.group(1))
        if filters['ram_min'] and ram_val < filters['ram_min']:
            return False
    
    # os check (simple lowercase comparison)
    if filters['os'] and filters['os'] not in phone['os'].lower():
        return False
    
    # brand check
    if filters['brand'] and filters['brand'] not in phone['model'].lower():
        return False
    
    return True

def recommend_phones(user_input):
    filters = extract_filters(user_input)
    matched = [phone for phone in phones if phone_matches(phone, filters)]
    
    if not matched:
        return "Sorry, no phones found matching your criteria."
    
    # Return top 3 sorted by rating
    matched_sorted = sorted(matched, key=lambda x: x['rating'], reverse=True)[:3]
    response = "Here are some phones I recommend:\n"
    for phone in matched_sorted:
        response += f"- {phone['model']} priced at ₹{phone['price']}, Rating: {phone['rating']}\n"
    return response

# Example usage:
while True:
    query = input("Ask me about phones: ")
    if query.lower() in ['exit', 'quit']:
        break
    print(recommend_phones(query))


AttributeError: 'NoneType' object has no attribute 'group'