In [13]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import os
import numpy as np
from itertools import zip_longest

In [14]:
def bs4_soup(pages):
    URL = f'https://www.flipkart.com/search?q=google+mobile&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page={pages}'
    request = requests.get(URL)
    soup = bs(request.text,'html.parser')
    soup = soup.find('div',class_ = '_1YokD2 _3Mn1Gg')

    return soup , URL

In [15]:
def mob_specs(soup):
    names = soup.find_all('div', class_='_4rR01T')
    mob_names = [i.text if i else None for i in names]

    ratings = soup.find_all('div', class_='_3LWZlK')
    mob_ratings = [i.text if i else None for i in ratings]

    price = soup.find_all('div', class_='_30jeq3 _1_WHN1')
    mob_price = [i.text if i else None for i in price]

    img = soup.find_all('div', class_='CXW8mj')
    mob_img_URL = [i.find('img')['src'] if i.find('img') else None for i in img]

    return mob_names, mob_ratings, mob_price, mob_img_URL


In [16]:
def features_from_mob_links(soup):
    mobile_links = ['https://www.flipkart.com' + i['href'] for i in soup.find_all('a', class_='_1fQZEK')]
    mobile_req = [requests.get(url=i) for i in mobile_links]
    mobile_soup = [bs(i.text, 'html.parser').find('div', class_='_1YokD2 _2GoDe3') for i in mobile_req]
    mobile_specs_soup = [i.find_all('div', class_='_3k-BhJ') for i in mobile_soup]

    storage_ram = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Memory' in str(specs_.text) and 'Storage' in str(specs_.text)]
    storage_ram_cross_checked = [i for i in storage_ram if 'Internal' in i.split()]

    os_processor = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Os' in str(specs_.text) and 'Processor' in str(specs_.text)]
    os_processor_cross_checked = [i for i in os_processor if 'Operating' in i.split()]

    camera = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Camera' in str(specs_.text) and 'FeaturesPrimary' in str(specs_.text)]
    camera_cross_checked = [i for i in camera if 'Primary' and 'Camera' in i.split()]

    display = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Display' in str(specs_.text) and 'FeaturesDisplay' in str(specs_.text)]
    display_cross_checked = [i for i in display if 'Display' and 'cm' in i.split()]

    network = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Connectivity' in str(specs_.text) and 'FeaturesNetwork' in str(specs_.text)]
    network_cross_checked = [i for i in network if 'Network' in i.split()]

    battery = [specs_.find('table', class_='_14cfVK').text if specs_ else None for mob_specs in mobile_specs_soup for specs_ in mob_specs if 'Battery' in str(specs_.text) and 'FeaturesBattery' in str(specs_.text)]
    battery_cross_checked = [i for i in battery if 'Battery' in i.split()]

    return mobile_specs_soup, storage_ram_cross_checked, os_processor_cross_checked, camera_cross_checked, display_cross_checked, network_cross_checked, battery_cross_checked


In [17]:
def fill_nan_values_func(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery):
    tuple_data = zip_longest(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery,fillvalue=np.nan)
    
    data = [i for i in tuple_data]
    return data
    

In [18]:
def print_len(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery):
    print(f"Length of mob_names: {len(mob_names)}")
    print(f"Length of mob_ratings: {len(mob_ratings)}")
    print(f"Length of mob_price: {len(mob_price)}")
    print(f"Length of mob_img_URL: {len(mob_img_URL)}")
    print(f"Length of storage_ram: {len(storage_ram)}")
    print(f"Length of os_processor: {len(os_processor)}")
    print(f"Length of camera: {len(camera)}")
    print(f"Length of display: {len(display)}")
    print(f"Length of network: {len(network)}")
    print(f"Length of battery: {len(battery)}")   

In [19]:
df = pd.DataFrame()

page_start = 1
page_ends = 2

for pages in range(page_start, page_ends):

    soup, URL = bs4_soup(pages=pages)
    
    mob_names, mob_ratings, mob_price, mob_img_URL = mob_specs(soup)
    mobile_specs_soup, storage_ram, os_processor, camera, display, network, battery = features_from_mob_links(soup=soup)
    
    data = fill_nan_values_func(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery)

    if mobile_specs_soup is not None and soup is not None:
        
        print_len(mob_names, mob_ratings, mob_price, mob_img_URL, storage_ram, os_processor, camera, display, network, battery)

        page_df = pd.DataFrame(data=data,columns=['name', 'ratings', 'price', 'imgURL', 'storage_ram', 'camera',
       'oS_Processor', 'display', 'network', 'battery'])

        if not page_df.empty:
            df = pd.concat(objs=[df, page_df], ignore_index=True)
            print(f'scraping data from page:{pages}')
            print(f'working in {URL} Page')
            print(f'page df : page_shape : {page_df.shape} \n {page_df.sample(2)}')
            print(f'df : df shape : {df.shape} \n {df.sample(2)}')
            print()

    else:
        print(f"Skipping page {pages} due to empty mobile_specs_soup or soup.")


Length of mob_names: 19
Length of mob_ratings: 16
Length of mob_price: 19
Length of mob_img_URL: 19
Length of storage_ram: 19
Length of os_processor: 19
Length of camera: 19
Length of display: 19
Length of network: 19
Length of battery: 19
scraping data from page:1
working in https://www.flipkart.com/search?q=google+mobile&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=1 Page
page df : page_shape : (19, 10) 
                                       name ratings    price   
11  Google Pixel 3A (Clearly White, 64 GB)     4.5  ₹17,499  \
9    Google Pixel 7 Pro (Obsidian, 128 GB)     4.4  ₹70,999   

                                               imgURL   
11  https://rukminim2.flixcart.com/image/312/312/x...  \
9   https://rukminim2.flixcart.com/image/312/312/x...   

                                          storage_ram   
11  Internal Storage64 GBRAM4 GBMemory Card Slot T...  \
9                      Internal Storage128 GBRAM12 GB   

                       

In [20]:
df

Unnamed: 0,name,ratings,price,imgURL,storage_ram,camera,oS_Processor,display,network,battery
0,"Google Pixel 7a (Sea, 128 GB)",4.0,"₹43,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 13Processor TypeTensor...,Primary Camera AvailableYesPrimary Camera64MP ...,Display Size15.49 cm (6.1 inch)Resolution2400 ...,"Network Type5GSupported Networks5G, 4G VoLTE, ...",Battery Capacity4300 mAh
1,"Google Pixel 7a (Charcoal, 128 GB)",4.0,"₹43,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 13Processor TypeTensor...,Primary Camera AvailableYesPrimary Camera64MP ...,Display Size15.49 cm (6.1 inch)Resolution2400 ...,"Network Type5GSupported Networks5G, 4G VoLTE, ...",Battery Capacity4300 mAh
2,"Google Pixel 7a (Snow, 128 GB)",4.0,"₹43,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 13Processor TypeTensor...,Primary Camera AvailableYesPrimary Camera64MP ...,Display Size15.49 cm (6.1 inch)Resolution2400 ...,"Network Type5GSupported Networks5G, 4G VoLTE, ...",Battery Capacity4300 mAh
3,"Google Pixel 6a (Charcoal, 128 GB)",4.3,"₹27,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM6 GB,Operating SystemAndroid 12Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera12.2M...,Display Size15.6 cm (6.14 inch)Resolution2400 ...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4410 mAh
4,"Google Pixel 6a (Chalk, 128 GB)",4.3,"₹27,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM6 GB,Operating SystemAndroid 12Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera12.2M...,Display Size15.6 cm (6.14 inch)Resolution2400 ...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4410 mAh
5,"Google Pixel 7 (Snow, 128 GB)",4.4,"₹49,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 13Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.0 cm (6.3 inch)Resolution2400 x...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4270 mAh
6,"Google Pixel 7 (Obsidian, 128 GB)",4.4,"₹49,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 13Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.0 cm (6.3 inch)Resolution2400 x...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4270 mAh
7,"Google Pixel 7 (Lemongrass, 128 GB)",4.4,"₹49,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM8 GB,Operating SystemAndroid 13Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size16.0 cm (6.3 inch)Resolution2400 x...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4270 mAh
8,"Google Pixel 7 Pro (Snow, 128 GB)",4.4,"₹70,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM12 GB,Operating SystemAndroid 13Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size17.02 cm (6.7 inch)Resolution3120 ...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4926 mAh
9,"Google Pixel 7 Pro (Obsidian, 128 GB)",4.4,"₹70,999",https://rukminim2.flixcart.com/image/312/312/x...,Internal Storage128 GBRAM12 GB,Operating SystemAndroid 13Processor TypeGoogle...,Primary Camera AvailableYesPrimary Camera50MP ...,Display Size17.02 cm (6.7 inch)Resolution3120 ...,"Network Type5G, 4G, 3G, 2GSupported Networks5G...",Battery Capacity4926 mAh


In [21]:
print(os.getcwd())
os.chdir(r'd:\vscode_machineLearning\BEST_PROJECTS\mobileRecommenderSystem')
print(os.getcwd())

d:\vscode_machineLearning\BEST_PROJECTS\mobileRecommenderSystem
d:\vscode_machineLearning\BEST_PROJECTS\mobileRecommenderSystem


In [22]:
df.to_csv(r'data\raw_data\google_mobile_data.csv',index=False)