# Aim
This project aims in scraping the data such as various laptop's retail price, maximum retail price, ratings from the Flipkart e-commerce site using Beautifulsoup library and storing it in a CSV file.

 ---

# Importing libraries

In [7]:
# Importing libraries
import pandas as pd
import csv
from bs4 import BeautifulSoup 
import requests

---
# Creating empty lists

In [8]:
# Creating empty lists
product=[]
price=[]
rating=[]
mrp=[]

---
# Scraping required data. 

In [25]:
# Using for loop to iterate through 63 pages
for j in range(1,64):

    # creating url variable
    url = 'https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page='+str(j)

    # Requesting the webpage
    request=requests.get(url).text

    # Creating soup object
    soup=BeautifulSoup(request,'lxml')

    # Looping through each line
    for i in soup.find_all('div',class_='_13oc-S') :
        
        # using conditional statement to check the presence of required data and fill `None` in absence of the data
        # scraping Product name
        if i.find('div',class_="_4rR01T"):
            name=i.find('div',class_="_4rR01T")
            product.append(name.text)
        else:
            product.append(None)

        # scraping Price
        if i.find('div',class_='_30jeq3 _1_WHN1'):
            rate=i.find('div',class_='_30jeq3 _1_WHN1')
            price.append(rate.text)
        else:
            price.append(None)

        # scraping MRP
        if i.find('div',class_='_3I9_wc _27UcVY'):
            max_price=i.find('div',class_='_3I9_wc _27UcVY')
            mrp.append(max_price.text)
        else:
            mrp.append(None)
            
        # scraping Rating
        if i.find('div',class_='_3LWZlK'):
            ratings=i.find('div',class_='_3LWZlK')
            rating.append(ratings.text)
        else:
            rating.append(None)

        

In [10]:
soup

<!DOCTYPE html>
<html lang="en"><head><link href="https://rukminim1.flixcart.com" rel="preconnect"/><link href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app_modules.chunk.905c37.css" rel="stylesheet"/><link href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app.chunk.4fb0ab.css" rel="stylesheet"/><meta content="text/html; charset=utf-8" http-equiv="Content-type"/><meta content="IE=Edge" http-equiv="X-UA-Compatible"/><meta content="102988293558" property="fb:page_id"/><meta content="658873552,624500995,100000233612389" property="fb:admins"/><meta content="noodp" name="robots"/><link href="https:///www/promos/new/20150528-140547-favicon-retina.ico" rel="shortcut icon"/><link href="/osdd.xml?v=2" rel="search" type="application/opensearchdescription+xml"/><meta content="website" property="og:type"/><meta content="Flipkart.com" name="og_site_name" property="og:site_name"/><link href="/apple-touch-icon-57x57.png" rel="apple-touch-icon" sizes="57x

---
# Checking the lenght of each variable to ensure that we scraped data correctly.

In [11]:
print (len(product))
print (len(price))
print (len(mrp))
print (len(rating))

984
984
984
984


Since the lenght of all variables are equal, we can ensure the data is scraped correctly.

----

# Showing data stored in each variable.

In [14]:
product[:5]

['Lenovo IdeaPad Slim 3 Intel Core i3 11th Gen - (8 GB/512 GB SSD/Windows 11 Home) 81X800J3IN|81X800LGIN...',
 'Lenovo IdeaPad 3 Intel Core i3 11th Gen - (8 GB/256 GB SSD/Windows 11 Home) 14ITL05 Thin and Light Lap...',
 'ASUS Vivobook 15 Core i3 11th Gen - (8 GB/512 GB SSD/Windows 11 Home) X515EA-EJ322WS | X515EA-EJ328WS ...',
 'ASUS VivoBook 14 (2021) Celeron Dual Core - (4 GB/256 GB SSD/Windows 11 Home) X415MA-BV011W Thin and L...',
 'HP Laptop Core i3 11th Gen - (8 GB/512 GB SSD/Windows 11 Home) 15s-fq2717TU Thin and Light Laptop']

In [16]:
price[:5]

['₹37,500', '₹33,990', '₹35,990', '₹21,990', '₹39,990']

In [17]:
mrp[:5]

['₹68,790', '₹60,890', '₹50,990', '₹33,990', '₹49,025']

In [19]:
rating[:5]

['4.3', '4.2', '4.2', '4.1', '4.2']

---
# Creating a pandas dataframe from the scraped data

In [20]:
df=pd.DataFrame({'product':product,'price':price,'MRP':mrp,'ratings':rating})
df

Unnamed: 0,product,price,MRP,ratings
0,Lenovo IdeaPad Slim 3 Intel Core i3 11th Gen -...,"₹37,500","₹68,790",4.3
1,Lenovo IdeaPad 3 Intel Core i3 11th Gen - (8 G...,"₹33,990","₹60,890",4.2
2,ASUS Vivobook 15 Core i3 11th Gen - (8 GB/512 ...,"₹35,990","₹50,990",4.2
3,ASUS VivoBook 14 (2021) Celeron Dual Core - (4...,"₹21,990","₹33,990",4.1
4,HP Laptop Core i3 11th Gen - (8 GB/512 GB SSD/...,"₹39,990","₹49,025",4.2
...,...,...,...,...
979,Lenovo IdeaPad 3 Intel Core i5 12th Gen - (16 ...,"₹60,990","₹82,490",4.1
980,Nokia PureBook S14 Core i5 11th Gen - (16 GB/5...,"₹39,999","₹82,990",3.9
981,LG Gram Core i5 12th Gen - (8 GB/512 GB SSD/Wi...,"₹89,990","₹1,27,000",
982,DELL Inspiron Core i3 11th Gen - (8 GB/1 TB HD...,"₹43,000","₹50,693",4


In [23]:
df.shape

(984, 4)

The scraped data is stored in `984*4`  dataframe.

---
# Saving the dataframe as `csv` file

In [30]:
df.to_csv('flipkart_laptop_scraping.csv')

The data from the `Flipkart` site is scraped using Beautifulsoup and stored in a `csv` file.


----