# # Webscraping flipkart website data and storing it in MongoDB

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import pymongo

url = 'https://www.flipkart.com/search?sid=tyy%2C4io&otracker=CLP_Filters&p%5B%5D=facets.brand%255B%255D%3DApple'
req_source = requests.get(url)
soup = BeautifulSoup(req_source.content,'html.parser') #Parsing the HTML content from the website url
#print(soup.prettify())  #Prettify() helps to print the HTML code in structured mannner

name = soup.select('._3wU53n')  #Selecting the tag for the product name. We can also use find_all()
print(len(name))   #printing the number of products displayed in single web page


rating = soup.select('.hGSR34') #Selecting the tag for the rating. We can also use find_all()
print(len(rating))   #printing the number of ratings displayed in single web page

price = soup.find_all(class_ = "_1vC4OE _2rQ-NK")  #selecting the tags for the price.
print(len(price))  #printing the number of product prices displayed in single web page

# Create a dataframe and store all column names in it. 
# I have stored all columns in list because list stores all columns in sequential order. 
df = pd.DataFrame(columns = ['Name','Rating','Price']) 

product = 0  #Initialize product = 0 and we use this variable to traverse from one product to next product in single web page
for i,j,z in zip(name,rating,price):  #zip function iterates through multiple iterables(lists) and aggregates them.
    df.loc[product] = i.text,j.text,z.text  
    product = product + 1

print(df) 

24
24
24
                                      Name Rating    Price
0            Apple iPhone 7 (Black, 32 GB)    4.5  ₹29,499
1          Apple iPhone 11 (White, 128 GB)    4.7  ₹73,600
2       Apple iPhone 7 Plus (Black, 32 GB)    4.5  ₹36,999
3           Apple iPhone 11 (White, 64 GB)    4.7  ₹68,300
4   Apple iPhone 7 Plus (Rose Gold, 32 GB)    4.5  ₹36,999
5           Apple iPhone 11 (Black, 64 GB)    4.7  ₹68,300
6       Apple iPhone 8 (Space Grey, 64 GB)    4.5  ₹38,999
7          Apple iPhone 11 (Purple, 64 GB)    4.7  ₹68,300
8            Apple iPhone XS (Gold, 64 GB)    4.7  ₹62,999
9           Apple iPhone 8 (Silver, 64 GB)    4.5  ₹38,999
10         Apple iPhone 11 (Black, 128 GB)    4.7  ₹73,600
11          Apple iPhone 11 (Green, 64 GB)    4.7  ₹68,300
12       Apple iPhone 7 Plus (Gold, 32 GB)    4.5  ₹36,999
13         Apple iPhone XR (Black, 128 GB)    4.6  ₹57,800
14         Apple iPhone XS (Silver, 64 GB)    4.7  ₹62,999
15     Apple iPhone XS (Space Grey, 64 GB)    4

In [2]:
dict = df.to_dict('records')  # Converting the dataframe into dictionary (Json doc structured) since MongoDB stores the data in dictionary format.
print(dict)

[{'Name': 'Apple iPhone 7 (Black, 32 GB)', 'Rating': '4.5', 'Price': '₹29,499'}, {'Name': 'Apple iPhone 11 (White, 128 GB)', 'Rating': '4.7', 'Price': '₹73,600'}, {'Name': 'Apple iPhone 7 Plus (Black, 32 GB)', 'Rating': '4.5', 'Price': '₹36,999'}, {'Name': 'Apple iPhone 11 (White, 64 GB)', 'Rating': '4.7', 'Price': '₹68,300'}, {'Name': 'Apple iPhone 7 Plus (Rose Gold, 32 GB)', 'Rating': '4.5', 'Price': '₹36,999'}, {'Name': 'Apple iPhone 11 (Black, 64 GB)', 'Rating': '4.7', 'Price': '₹68,300'}, {'Name': 'Apple iPhone 8 (Space Grey, 64 GB)', 'Rating': '4.5', 'Price': '₹38,999'}, {'Name': 'Apple iPhone 11 (Purple, 64 GB)', 'Rating': '4.7', 'Price': '₹68,300'}, {'Name': 'Apple iPhone XS (Gold, 64 GB)', 'Rating': '4.7', 'Price': '₹62,999'}, {'Name': 'Apple iPhone 8 (Silver, 64 GB)', 'Rating': '4.5', 'Price': '₹38,999'}, {'Name': 'Apple iPhone 11 (Black, 128 GB)', 'Rating': '4.7', 'Price': '₹73,600'}, {'Name': 'Apple iPhone 11 (Green, 64 GB)', 'Rating': '4.7', 'Price': '₹68,300'}, {'Name': '

In [3]:
dbConn = pymongo.MongoClient("mongodb://localhost:27017/")  # opening a connection to MongoDB
db = dbConn['flipdb']  #Creating  database name as "flipdb"
print("Connection established to mongoDb")
data= db.product_info   #creating collection name as "product_info"
data.insert_many(dict)  #inserting all dictionary "dict" keys and values into MongoDB
print('stored data in DB')


Connection established to mongoDb
stored data in DB
