# Realme Mobiles

In [103]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import os
import numpy as np
from itertools import zip_longest

In [104]:
def bs4_soup(pages):
    URL = f'https://www.flipkart.com/search?q=realme+mobiles&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page={pages}'
    request = requests.get(URL)
    soup = bs(request.text,'html.parser')
    soup = soup.find('div',class_ = '_1YokD2 _3Mn1Gg')

    return soup , URL

In [105]:
def mob_specs(soup):
    names = soup.find_all('div', class_='_4rR01T')
    mob_names = [i.text if i else None for i in names]

    ratings = soup.find_all('div', class_='_3LWZlK')
    mob_ratings = [i.text if i else None for i in ratings]

    price = soup.find_all('div', class_='_30jeq3 _1_WHN1')
    mob_price = [i.text if i else None for i in price]

    img = soup.find_all('div', class_='CXW8mj')
    mob_img_URL = [i.find('img')['src'] if i.find('img') else None for i in img]

    return mob_names, mob_ratings, mob_price, mob_img_URL


In [106]:
def features_from_mob_links(soup):
    mobile_links = ['https://www.flipkart.com' + i['href'] for i in soup.find_all('a', class_='_1fQZEK')]
    mobile_req = [requests.get(url=i) for i in mobile_links]
    mobile_soup = [bs(i.text, 'html.parser').find('div', class_='_1YokD2 _2GoDe3') for i in mobile_req]
    mobile_specs_soup = [i.find_all('div', class_='_3k-BhJ') for i in mobile_soup]

    storage_ram = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Memory' in str(specs_.text) and 'Storage' in str(specs_.text)]
    storage_ram_cross_checked = [i for i in storage_ram if 'Internal' in i.split()]

    os_processor = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Os' in str(specs_.text) and 'Processor' in str(specs_.text)]
    os_processor_cross_checked = [i for i in os_processor if 'Operating' in i.split()]

    camera = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Camera' in str(specs_.text) and 'FeaturesPrimary' in str(specs_.text)]
    camera_cross_checked = [i for i in camera if 'Primary' and 'Camera' in i.split()]

    display = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Display' in str(specs_.text) and 'FeaturesDisplay' in str(specs_.text)]
    display_cross_checked = [i for i in display if 'Display' and 'cm' in i.split()]

    network = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Connectivity' in str(specs_.text) and 'FeaturesNetwork' in str(specs_.text)]
    network_cross_checked = [i for i in network if 'Network' in i.split()]

    battery = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Battery' in str(specs_.text) and 'FeaturesBattery' in str(specs_.text)]
    battery_cross_checked = [i for i in battery if 'Battery' in i.split()]

    return mobile_specs_soup, storage_ram_cross_checked, os_processor_cross_checked, camera_cross_checked, display_cross_checked, network_cross_checked, battery_cross_checked


In [107]:
def fill_nan_values_func(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery):
    tuple_data = zip_longest(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery,fillvalue=np.nan)
    
    data = [i for i in tuple_data]
    return data
    

In [108]:
def print_len(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery):
    print(f"Length of mob_names: {len(mob_names)}")
    print(f"Length of mob_ratings: {len(mob_ratings)}")
    print(f"Length of mob_price: {len(mob_price)}")
    print(f"Length of mob_img_URL: {len(mob_img_URL)}")
    print(f"Length of storage_ram: {len(storage_ram)}")
    print(f"Length of os_processor: {len(os_processor)}")
    print(f"Length of camera: {len(camera)}")
    print(f"Length of display: {len(display)}")
    print(f"Length of network: {len(network)}")
    print(f"Length of battery: {len(battery)}")   

In [None]:
df = pd.DataFrame()

page_start = 1

for pages in range(page_start, 20):

    soup, URL = bs4_soup(pages=pages)
    
    mob_names, mob_ratings, mob_price, mob_img_URL = mob_specs(soup)
    mobile_specs_soup, storage_ram, os_processor, camera, display, network, battery = features_from_mob_links(soup=soup)
    
    data = fill_nan_values_func(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery)

    if mobile_specs_soup is not None and soup is not None:
        
        print_len(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery)

        page_df = pd.DataFrame(data=data,columns=['name', 'ratings', 'price', 'imgURL', 'storage_ram', 'camera',
       'oS_Processor', 'display', 'network', 'battery'])

        if not page_df.empty:
            df = pd.concat(objs=[df, page_df], ignore_index=True)
            print(f'scraping data from page:{pages}')
            print(f'working in {URL} Page')
            print(f'page df : page_shape : {page_df.shape} \n {page_df.sample(2)}')
            print(f'df : df shape : {df.shape} \n {df.sample(2)}')
            print()

    else:
        print(f"Skipping page {pages} due to empty mobile_specs_soup or soup.")


In [120]:
print(os.getcwd())
os.chdir(r'd:\vscode_machineLearning\BEST_PROJECTS\mobileRecommenderSystem')
print(os.getcwd())

d:\vscode_machineLearning\BEST_PROJECTS\mobileRecommenderSystem\notebook\data_Ingestion\scraping
d:\vscode_machineLearning\BEST_PROJECTS\mobileRecommenderSystem


In [121]:
df.to_csv(r'data\raw_data\realme_mobile_data.csv',index=False)

## Resolving the Error

In [8]:
soup, URL = bs4_soup(pages=4)

In [9]:
mob_names, mob_ratings, mob_price, mob_img_URL = mob_specs(soup)

In [18]:
soup

<div class="_1YokD2 _3Mn1Gg" style="flex-grow:1;overflow:auto"><div class="_1YokD2 _2GoDe3 col-12-12" style="background-color:#ffffff;align-items:flex-end"><div class="_1AtVbE" style="flex-grow:1;overflow:auto"><div class="W_R1IA"><div class="_1MR4o5"><div class="_3GIHBu"><a class="_2whKao" href="/">Home</a><svg class="_39X-Og" height="27" viewbox="0 0 16 27" width="16" xmlns="http://www.w3.org/2000/svg"><path class="DpXnhQ" d="M16 23.207L6.11 13.161 16 3.093 12.955 0 0 13.161l12.955 13.161z" fill="#fff"></path></svg></div><div class="_3GIHBu"><a class="_2whKao" href="/mobiles-accessories/pr?sid=tyy&amp;marketplace=FLIPKART">Mobiles &amp; Accessories</a><svg class="_39X-Og" height="27" viewbox="0 0 16 27" width="16" xmlns="http://www.w3.org/2000/svg"><path class="DpXnhQ" d="M16 23.207L6.11 13.161 16 3.093 12.955 0 0 13.161l12.955 13.161z" fill="#fff"></path></svg></div><div class="_3GIHBu"><a class="_2whKao" href="/mobiles/pr?sid=tyy,4io&amp;marketplace=FLIPKART">Mobiles</a><svg class=

In [19]:
mobile_links = ['https://www.flipkart.com' + i['href'] for i in soup.find_all('a', class_='_1fQZEK')]
mobile_req = [requests.get(url=i) for i in mobile_links]
mobile_soup = [bs(i.text, 'html.parser').find('div', class_='_1YokD2 _2GoDe3') for i in mobile_req]
mobile_specs_soup = [i.find_all('div', class_='_3k-BhJ') for i in mobile_soup]

## features_from_mob_links Method

In [20]:
mobile_specs_soup

[[<div class="_3k-BhJ"><div class="flxcaE">General</div><table class="_14cfVK"><tbody><tr class="_1s_Smc row"><td class="_1hKmbr col col-3-12">In The Box</td><td class="URwL2w col col-9-12"><ul><li class="_21lJbe">Handset, Adapter, USB Cable, Important Info Booklet with Warranty Card, Quick Guide, Sim Card Tool</li></ul></td></tr><tr class="_1s_Smc row"><td class="_1hKmbr col col-3-12">Model Number</td><td class="URwL2w col col-9-12"><ul><li class="_21lJbe">RMX3627</li></ul></td></tr><tr class="_1s_Smc row"><td class="_1hKmbr col col-3-12">Model Name</td><td class="URwL2w col col-9-12"><ul><li class="_21lJbe">C33 2023</li></ul></td></tr><tr class="_1s_Smc row"><td class="_1hKmbr col col-3-12">Color</td><td class="URwL2w col col-9-12"><ul><li class="_21lJbe">Aqua Blue</li></ul></td></tr><tr class="_1s_Smc row"><td class="_1hKmbr col col-3-12">Browse Type</td><td class="URwL2w col col-9-12"><ul><li class="_21lJbe">Smartphones</li></ul></td></tr><tr class="_1s_Smc row"><td class="_1hKmbr 

In [44]:
lst = []
for mob in mobile_specs_soup:
    for spc in mob:
        if 'Memory' in str(spc.text) and 'Storage' in str(spc.text) and 'Features' in str(spc.text) :
            # for i in spc:
            #     print(i.text)
            print(lst.append(spc.find('table', class_='_14cfVK').text))


None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None


In [66]:
rmove = [i for i in lst ]
rmove

['Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage256 GBMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage256 GBSupported Memory Card TypemicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'SmartphoneYesTouchscreen TypeCapacitiveSIM SizeNano SimUser InterfaceRealme UI 2.0 (Based on Android 11)Graphics PPI401 PPISensorsMagnetic Induction Sensor, Light Sensor, Proximity Sensor, Acceleration SensorRingtones FormatOGGOther FeaturesGoogle Lens, Dual Speakers, 33 W Charging Power,

In [59]:
for i in lst:
    splitted = i.split()
    if 'Internal' in splitted:
        print(splitted)

['Internal', 'Storage64', 'GBRAM4', 'GBExpandable', 'Storage1', 'TBSupported', 'Memory', 'Card', 'TypeMicroSDMemory', 'Card', 'Slot', 'TypeDedicated', 'Slot']
['Internal', 'Storage64', 'GBRAM4', 'GBExpandable', 'Storage256', 'GBMemory', 'Card', 'Slot', 'TypeDedicated', 'Slot']
['Internal', 'Storage64', 'GBRAM4', 'GBExpandable', 'Storage1', 'TBSupported', 'Memory', 'Card', 'TypeMicroSDMemory', 'Card', 'Slot', 'TypeDedicated', 'Slot']
['Internal', 'Storage64', 'GBRAM4', 'GBExpandable', 'Storage1', 'TBSupported', 'Memory', 'Card', 'TypeMicroSDMemory', 'Card', 'Slot', 'TypeDedicated', 'Slot']
['Internal', 'Storage64', 'GBRAM4', 'GBExpandable', 'Storage256', 'GBSupported', 'Memory', 'Card', 'TypemicroSDMemory', 'Card', 'Slot', 'TypeDedicated', 'Slot']
['Internal', 'Storage64', 'GBRAM4', 'GBExpandable', 'Storage1', 'TBSupported', 'Memory', 'Card', 'TypeMicroSDMemory', 'Card', 'Slot', 'TypeDedicated', 'Slot']
['Internal', 'Storage128', 'GBRAM4', 'GBExpandable', 'Storage1', 'TBSupported', 'Mem

In [75]:
ls = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Memory' in str(specs_.text) and 'Storage' in str(specs_.text)]


In [None]:
camera = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Camera' in str(specs_.text) and 'FeaturesPrimary' in str(specs_.text)]
display = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Display' in str(specs_.text) and 'FeaturesDisplay' in str(specs_.text)]
network = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Connectivity' in str(specs_.text) and 'FeaturesNetwork' in str(specs_.text)]
battery = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Battery' in str(specs_.text) and 'FeaturesBattery' in str(specs_.text)]


In [91]:
battery

['Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity6000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAhBattery TypeLi-ion',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity6000 mAh',
 'Battery Capacity6000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity4000 mAhTalk Time15hrs',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity4300 mAh',
 'Battery Capacity4300 mAh']

In [92]:
batteryss = [i for i in battery if 'Battery' in i.split()]
batteryss

['Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity6000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAhBattery TypeLi-ion',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity6000 mAh',
 'Battery Capacity6000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity4000 mAhTalk Time15hrs',
 'Battery Capacity5000 mAh',
 'Battery Capacity5000 mAh',
 'Battery Capacity4300 mAh',
 'Battery Capacity4300 mAh']

In [88]:
network

['Network Type4G VOLTE, 4G, 3G, 2GSupported Networks4G VoLTE, 4G LTE, WCDMA, GSMInternet Connectivity4G, 3G, Wi-FiPre-installed BrowserGoogle ChromeMicro USB VersionUSB 2.0Bluetooth SupportYesBluetooth Versionv5.0Wi-Fi Version802.11 b/g/n (2.4 GHz)Wi-Fi HotspotYesInfraredNoUSB ConnectivityYesAudio Jack3.5mmMap SupportGoogle MapsGPS SupportYes',
 'Network Type4G VOLTE, 4G, 3G, 2GSupported Networks4G VoLTE, 4G LTE, WCDMA, GSMInternet Connectivity4G, 3G, Wi-Fi, EDGE, GPRS3GYesGPRSYesPre-installed BrowserGoogle ChromeBluetooth SupportYesBluetooth Versionv4.2Wi-Fi VersionIEEE 802.11 b/g/n (WiFi 4)Wi-Fi HotspotYesUSB ConnectivityYesEDGEYesAudio Jack3.5mmMap SupportGoogle MapsGPS SupportYes',
 'Network Type4G VOLTE, 4G, 3G, 2GSupported Networks4G VoLTE, 4G LTE, WCDMA, GSMInternet Connectivity4G, 3G, Wi-FiPre-installed BrowserGoogle ChromeMicro USB VersionUSB 2.0Bluetooth SupportYesBluetooth Versionv5.0Wi-Fi Version802.11 b/g/n (2.4 GHz)Wi-Fi HotspotYesInfraredNoUSB ConnectivityYesAudio Jack3.

In [90]:
networkss = [i for i in network if 'Network' in i.split()]
networkss

['Network Type4G VOLTE, 4G, 3G, 2GSupported Networks4G VoLTE, 4G LTE, WCDMA, GSMInternet Connectivity4G, 3G, Wi-FiPre-installed BrowserGoogle ChromeMicro USB VersionUSB 2.0Bluetooth SupportYesBluetooth Versionv5.0Wi-Fi Version802.11 b/g/n (2.4 GHz)Wi-Fi HotspotYesInfraredNoUSB ConnectivityYesAudio Jack3.5mmMap SupportGoogle MapsGPS SupportYes',
 'Network Type4G VOLTE, 4G, 3G, 2GSupported Networks4G VoLTE, 4G LTE, WCDMA, GSMInternet Connectivity4G, 3G, Wi-Fi, EDGE, GPRS3GYesGPRSYesPre-installed BrowserGoogle ChromeBluetooth SupportYesBluetooth Versionv4.2Wi-Fi VersionIEEE 802.11 b/g/n (WiFi 4)Wi-Fi HotspotYesUSB ConnectivityYesEDGEYesAudio Jack3.5mmMap SupportGoogle MapsGPS SupportYes',
 'Network Type4G VOLTE, 4G, 3G, 2GSupported Networks4G VoLTE, 4G LTE, WCDMA, GSMInternet Connectivity4G, 3G, Wi-FiPre-installed BrowserGoogle ChromeMicro USB VersionUSB 2.0Bluetooth SupportYesBluetooth Versionv5.0Wi-Fi Version802.11 b/g/n (2.4 GHz)Wi-Fi HotspotYesInfraredNoUSB ConnectivityYesAudio Jack3.

In [87]:
displayss = [i for i in display if 'Display' and 'cm' in i.split()]
displayss

['Display Size16.51 cm (6.5 inch)Resolution1600 x 720 PixelResolution TypeHD+GPUARM Mali G57Display TypeHD+ LCD DisplayDisplay Colors16.7MOther Display FeaturesRefresh Rate: 60 Hz, Aspect Ratio: 20:9, Screen-to-Body Ratio: 88.7%, Screen Contrast: 1500:1, Maximum Brightness: 400 nits, Color Saturation: 70%, Sunlight Screen Support',
 'Display Size16.51 cm (6.5 inch)Resolution1600 x 720 PixelsResolution TypeHD+GPUIMG 8322Display TypeHD+ LCD In-cellDisplay Colors16.7MOther Display Features60 Hz Refresh Rate, 20:09 Aspect Ratio, 89.50% Screen-to-Body Ratio, 120 Hz Touch Sampling Rate, Brightness: 360 nits (Min), 400nits (Typ), Color Saturation: NTSC 70% (Typ), Sunlight Screen Support, COG Sealing Process',
 'Display Size16.51 cm (6.5 inch)Resolution1600 x 720 PixelResolution TypeHD+GPUARM Mali G57Display TypeHD+ LCD DisplayDisplay Colors16.7MOther Display FeaturesRefresh Rate: 60 Hz, Aspect Ratio: 20:9, Screen-to-Body Ratio: 88.7%, Screen Contrast: 1500:1, Maximum Brightness: 400 nits, Col

In [85]:
camerass = [i for i in camera if 'Primary' and 'Camera' in i.split()]
camerass

['Primary Camera AvailableYesPrimary Camera50MP + 0.3MPPrimary Camera FeaturesDual Camera Setup: 50MP (Four-in-One Output: 12.5MP) Main Camera (Hynix Hi-5021S, f/1.8 Aperture, FOV: 79 Degree, Focal Length: 25.11 mm, 5P Lens, 1/2.55 inch Sensor Size, 07um Pixel Size, PDAF) + 0.3MP (BYD BF20A1CS, f/2.8 Aperture, FOV: 50 Degree, Focal Length: 42.28 mm, 2P Lens, 1/10 inch, 2.2um Pixel Size, Fixed Focus), CMOS, Camera Feature: 50MP Mode, Beauty, Filter, HDR, Panoramic View, Portrait, Timelapse, Expert, Super Night, 20 Continous ShootingSecondary Camera AvailableYesSecondary Camera5MP Front CameraSecondary Camera Features5MP Camera Setup: (SmartSens SC501CS-CRMNN00, f/2.2 Aperture, FOV: 77 Degree, 1/5 inch Sensor Size, 1.12um Pixel Size, 3P Lens, Fixed Focus), CMOS, Camera Feature: Beauty, Filter, HDRFlashRear: Single LED | Front: Screen FlashHD RecordingYesFull HD RecordingYesVideo RecordingYesVideo Recording ResolutionRear Camera: 1080p (at 30 fps), 720p (at 30 fps), 480p (at 30 fps) | Fro

In [82]:
os_processor = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Os' in str(specs_.text) and 'Processor' in str(specs_.text)]
os_processorsss = [i for i in os_processor if 'Operating' in i.split()]
os_processorsss

['Operating SystemAndroid 12Processor TypeUnisoc T612Processor CoreOcta CorePrimary Clock Speed1.82 GHzSecondary Clock Speed1.82 GHzOperating Frequency2G GSM: 850/900/1800/1900 MHz, 3G WCDMA: B1/B5/B8, 4G FDD-LTE: B1/B3/B5/B8, 4G TD-LTE: B38/B40/B41 (2535-2655MHz)',
 'Operating SystemAndroid 11Processor TypeOcta-coreProcessor CoreOcta CorePrimary Clock Speed1.6 GHzSecondary Clock Speed1.2 GHzOperating Frequency2G GSM: 850/900/1800/1900 MHz, 3G WCDMA: 850/900/2100 MHz, 4G FDD-LTE: B1/B3/B5/B8, 4G TD-LTE: B38/B40/B41 (2535 - 2655 MHz)',
 'Operating SystemAndroid 12Processor TypeUnisoc T612Processor CoreOcta CorePrimary Clock Speed1.82 GHzSecondary Clock Speed1.82 GHzOperating Frequency2G GSM: 850/900/1800/1900 MHz, 3G WCDMA: B1/B5/B8, 4G FDD-LTE: B1/B3/B5/B8, 4G TD-LTE: B38/B40/B41 (2535-2655MHz)',
 'Operating SystemAndroid 12Processor TypeUnisoc T612Processor CoreOcta CorePrimary Clock Speed1.82 GHzSecondary Clock Speed1.82 GHzOperating Frequency2G GSM: 850/900/1800/1900 MHz, 3G WCDMA: 

In [80]:
lsss = [i for i in ls if 'Internal' in i.split()]
lsss

['Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage256 GBMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage256 GBSupported Memory Card TypemicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage128 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage256 GBRAM12 GB',
 'Internal Storage128 GBRAM8 GB',
 'Internal Storage64 GBRAM4 GBExpandable Storage512 GBMemory Card Slot TypeDedicated Slot',
 'Inte

In [10]:
mobile_specs_soup, storage_ram, os_processor, camera, display, network, battery = features_from_mob_links(soup)


In [12]:
print(f"Length of mob_names: {len(mob_names)}")
print(f"Length of mob_ratings: {len(mob_ratings)}")
print(f"Length of mob_price: {len(mob_price)}")
print(f"Length of mob_img_URL: {len(mob_img_URL)}")
print(f"Length of storage_ram: {len(storage_ram)}")
print(f"Length of os_processor: {len(os_processor)}")
print(f"Length of camera: {len(camera)}")
print(f"Length of display: {len(display)}")
print(f"Length of network: {len(network)}")
print(f"Length of battery: {len(battery)}")

Length of mob_names: 24
Length of mob_ratings: 24
Length of mob_price: 24
Length of mob_img_URL: 24
Length of storage_ram: 26
Length of os_processor: 24
Length of camera: 24
Length of display: 24
Length of network: 24
Length of battery: 24


In [13]:
storage_ram

['Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage256 GBMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage256 GBSupported Memory Card TypemicroSDMemory Card Slot TypeDedicated Slot',
 'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
 'SmartphoneYesTouchscreen TypeCapacitiveSIM SizeNano SimUser InterfaceRealme UI 2.0 (Based on Android 11)Graphics PPI401 PPISensorsMagnetic Induction Sensor, Light Sensor, Proximity Sensor, Acceleration SensorRingtones FormatOGGOther FeaturesGoogle Lens, Dual Speakers, 33 W Charging Power,

In [17]:
pd.DataFrame(fill_nan_values_func(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"realme C33 2023 (Aqua Blue, 64 GB)",4.4,"₹9,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 12Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
1,"realme C11 2021 (Cool Blue, 64 GB)",4.3,"₹8,999",https://rukminim2.flixcart.com/image/312/312/k...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 11Processor TypeOcta-c...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
2,"realme C33 (Aqua Blue, 64 GB)",4.4,"₹9,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 12Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
3,"realme C33 (Night Sea, 64 GB)",4.4,"₹9,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 12Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
4,"realme Narzo 50A (Oxygen Blue, 64 GB)",4.4,"₹11,498",https://rukminim2.flixcart.com/image/312/312/k...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 11Processor TypeMediaT...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity6000 mAh
5,"realme 9i (Prism Blue, 64 GB)",4.5,"₹11,999",https://rukminim2.flixcart.com/image/312/312/k...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 11Processor TypeQualco...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2412 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
6,"realme C35 (Glowing Black, 128 GB)",4.4,"₹10,699",https://rukminim2.flixcart.com/image/312/312/l...,SmartphoneYesTouchscreen TypeCapacitiveSIM Siz...,Operating SystemAndroid 11Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
7,"realme GT NEO 2 (NEO Black, 256 GB)",4.4,"₹35,999",https://rukminim2.flixcart.com/image/312/312/l...,Internal Storage128 GBRAM4 GBExpandable Storag...,Operating SystemAndroid 11Processor TypeQualco...,Primary Camera AvailableYesPrimary Camera64MP ...,Display Size16.81 cm (6.62 inch)Resolution2400...,"Network Type5G, 4G VOLTE, 4G, 3G, 2GSupported ...",Battery Capacity5000 mAh
8,"realme GT NEO 2 (NEO Black, 128 GB)",4.4,"₹31,999",https://rukminim2.flixcart.com/image/312/312/l...,Internal Storage256 GBRAM12 GB,Operating SystemAndroid 11Processor TypeQualco...,Primary Camera AvailableYesPrimary Camera64MP ...,Display Size16.81 cm (6.62 inch)Resolution2400...,"Network Type5G, 4G VOLTE, 4G, 3G, 2GSupported ...",Battery Capacity5000 mAh
9,"realme Narzo 50A Prime (Flash Black, 64 GB)",4.1,"₹12,799",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 11Processor CoreOcta C...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2408 ...,Network Type4G VOLTESupported Networks4G VoLTE...,Battery Capacity5000 mAhBattery TypeLi-ion


In [14]:
lst = zip_longest(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery,fillvalue=np.nan)

In [15]:
[i for i in lst]

[('realme C33 2023 (Aqua Blue, 64 GB)',
  '4.4',
  '₹9,999',
  'https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/9/z/o/-original-imaghuf9guqmb65z.jpeg?q=70',
  'Internal Storage64 GBRAM4 GBExpandable Storage1 TBSupported Memory Card TypeMicroSDMemory Card Slot TypeDedicated Slot',
  'Operating SystemAndroid 12Processor TypeUnisoc T612Processor CoreOcta CorePrimary Clock Speed1.82 GHzSecondary Clock Speed1.82 GHzOperating Frequency2G GSM: 850/900/1800/1900 MHz, 3G WCDMA: B1/B5/B8, 4G FDD-LTE: B1/B3/B5/B8, 4G TD-LTE: B38/B40/B41 (2535-2655MHz)',
  'Primary Camera AvailableYesPrimary Camera50MP + 0.3MPPrimary Camera FeaturesDual Camera Setup: 50MP (Four-in-One Output: 12.5MP) Main Camera (Hynix Hi-5021S, f/1.8 Aperture, FOV: 79 Degree, Focal Length: 25.11 mm, 5P Lens, 1/2.55 inch Sensor Size, 07um Pixel Size, PDAF) + 0.3MP (BYD BF20A1CS, f/2.8 Aperture, FOV: 50 Degree, Focal Length: 42.28 mm, 2P Lens, 1/10 inch, 2.2um Pixel Size, Fixed Focus), CMOS, Camera Feature: 50MP Mode, Bea

In [None]:
# pd.DataFrame(data=data)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"realme 10 Pro+ 5G (Dark Matter, 128 GB)",4.4,"₹24,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM6 GB,Operating SystemAndroid 13Processor TypeMediat...,Primary Camera AvailableYesPrimary Camera108MP...,Display Size17.02 cm (6.7 inch)Resolution2412 ...,"Network Type5G, 4G VOLTE, 4G, 3G, 2GSupported ...",Battery Capacity5000 mAh
1,"realme Narzo 50i (Mint Green, 32 GB)",4.5,"₹7,324",https://rukminim2.flixcart.com/image/312/312/k...,Internal Storage32 GBRAM2 GBExpandable Storage...,Operating SystemAndroid 11Processor TypeSC9863...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
2,"realme Narzo 50i Prime (Dark Blue, 64 GB)",4.2,"₹9,989",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage64 GBRAM4 GBMemory Card Slot T...,Operating SystemAndroid 11Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution720 ×...,Network Type4G VOLTESupported Networks4G VoLTE...,Battery Capacity5000 mAh
3,"realme Narzo 50i (Carbon Black, 32 GB)",4.5,"₹7,059",https://rukminim2.flixcart.com/image/312/312/k...,Internal Storage32 GBRAM2 GBExpandable Storage...,Operating SystemAndroid 11Processor TypeSC9863...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
4,"realme Narzo 50 (Speed Black, 128 GB)",4.3,"₹12,999",https://rukminim2.flixcart.com/image/312/312/l...,Internal Storage128 GBRAM6 GBExpandable Storag...,Operating SystemAndroid 11Processor TypeMediat...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2412 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
5,"realme 9 Pro+ 5G (Midnight Black, 128 GB)",4.4,"₹26,999",https://rukminim2.flixcart.com/image/312/312/k...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 12Processor TypeMediat...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.26 cm (6.4 inch)Resolution2400 ...,"Network Type5G, 4G VOLTE, 4G, 3G, 2GSupported ...",Battery Capacity4500 mAh
6,"realme C30s (Stripe Black, 32 GB)",4.4,"₹6,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage32 GBRAM2 GBExpandable Storage...,Operating SystemAndroid 12Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
7,"realme Narzo 50 (Speed Black, 64 GB)",4.4,"₹13,990",https://rukminim2.flixcart.com/image/312/312/l...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 11Processor TypeMediat...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.76 cm (6.6 inch)Resolution2412 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
8,"realme C30s (Stripe Black, 64 GB)",4.2,"₹8,499",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage64 GBRAM4 GBExpandable Storage...,Operating SystemAndroid 12Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution1600 ...,"Network Type4G VOLTE, 4G, 3G, 2GSupported Netw...",Battery Capacity5000 mAh
9,"realme Narzo 50i Prime (Dark Blue, 32 GB)",4.2,"₹7,995",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage32 GBRAM3 GBMemory Card Slot T...,Operating SystemAndroid 11Processor TypeUnisoc...,Primary Camera AvailableYesPrimary Camera8MP R...,Display Size16.51 cm (6.5 inch)Resolution720 x...,Network Type4G VOLTESupported Networks4G VoLTE...,Battery Capacity5000 mAh


## mob_specs

### I not able to find out any common expression here.

In [None]:
def mob_specs(soup):
    names = soup.find_all('div', class_='_4rR01T')
    mob_names = [i.text if i else None for i in names]

    ratings = soup.find_all('div', class_='_3LWZlK')
    mob_ratings = [i.text if i else None for i in ratings]

    price = soup.find_all('div', class_='_30jeq3 _1_WHN1')
    mob_price = [i.text if i else None for i in price]

    img = soup.find_all('div', class_='CXW8mj')
    mob_img_URL = [i.find('img')['src'] if i.find('img') else None for i in img]

    return mob_names, mob_ratings, mob_price, mob_img_URL


In [93]:
mob_names, mob_ratings, mob_price, mob_img_URL = mob_specs(soup)

In [94]:
mob_names

['realme C33 2023 (Aqua Blue, 64 GB)',
 'realme C11 2021 (Cool Blue, 64 GB)',
 'realme C33 (Aqua Blue, 64 GB)',
 'realme C33 (Night Sea, 64 GB)',
 'realme Narzo 50A (Oxygen Blue, 64 GB)',
 'realme 9i (Prism Blue, 64 GB)',
 'realme C35 (Glowing Black, 128 GB)',
 'realme GT NEO 2 (NEO Black, 256 GB)',
 'realme GT NEO 2 (NEO Black, 128 GB)',
 'realme Narzo 50A Prime (Flash Black, 64 GB)',
 'realme NARZO 50A PRIME (FLASH BLUE, 64 GB)',
 'realme 9 5G (Meteor Black, 64 GB)',
 'realme C25s (Watery Blue, 128 GB)',
 'realme Narzo 50A (Oxygen Blue, 128 GB)',
 'realme 9i (Prism Blue, 128 GB)',
 'realme GT NEO 2 (NEO Blue, 256 GB)',
 'realme Narzo 50 Pro 5G (Hyper Blue, 128 GB)',
 'realme 8 (Cyber Black, 128 GB)',
 'realme Narzo N55 (Prime Black, 64 GB)',
 'realme XT (Pearl White, 64 GB)',
 'realme 8 (Punk Black, 128 GB)',
 'realme Narzo 50i Prime (Mint Green, 64 GB)',
 'realme GT Master Edition (Voyager Grey, 128 GB)',
 'realme GT Master Edition (Luna White, 128 GB)']

In [95]:
mob_ratings

['4.4',
 '4.3',
 '4.4',
 '4.4',
 '4.4',
 '4.5',
 '4.4',
 '4.4',
 '4.4',
 '4.1',
 '4.1',
 '4.5',
 '4.4',
 '4.4',
 '4.5',
 '4.4',
 '4.2',
 '4.4',
 '4.2',
 '4.5',
 '4.4',
 '4.2',
 '4.3',
 '4.3']

In [102]:
pricesss = [i.split() for i in mob_price 
            # if 'Display' and 'cm' in i.split()
            ]
pricesss

[['₹9,999'],
 ['₹8,999'],
 ['₹9,999'],
 ['₹9,999'],
 ['₹11,498'],
 ['₹11,999'],
 ['₹10,699'],
 ['₹35,999'],
 ['₹31,999'],
 ['₹12,799'],
 ['₹10,890'],
 ['₹13,499'],
 ['₹11,990'],
 ['₹12,999'],
 ['₹13,499'],
 ['₹35,999'],
 ['₹21,495'],
 ['₹15,499'],
 ['₹11,754'],
 ['₹16,940'],
 ['₹13,499'],
 ['₹9,989'],
 ['₹22,999'],
 ['₹22,999']]

In [96]:
mob_price

['₹9,999',
 '₹8,999',
 '₹9,999',
 '₹9,999',
 '₹11,498',
 '₹11,999',
 '₹10,699',
 '₹35,999',
 '₹31,999',
 '₹12,799',
 '₹10,890',
 '₹13,499',
 '₹11,990',
 '₹12,999',
 '₹13,499',
 '₹35,999',
 '₹21,495',
 '₹15,499',
 '₹11,754',
 '₹16,940',
 '₹13,499',
 '₹9,989',
 '₹22,999',
 '₹22,999']