# Trying to Scrape Samsung phone data from Flipkart

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas

In [2]:
headers = {"User-Agent": "Mozilla/5.0"}
r = requests.get("https://www.flipkart.com/search?q=samsung&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off", headers=headers) # &page=
c = r.content

In [3]:
soup = BeautifulSoup(c, "html.parser")

In [4]:
products = soup.find_all("div", {"class":"_1AtVbE col-12-12"})
len(products)

30

## Locating details of the mobile phones
For example: Model Name, Price, Stars, Specifications, User Ratings and Reviews

In [5]:
products[2].find("div", {"class": "_4rR01T"}).text

'SAMSUNG Galaxy F22 (Denim Blue, 128 GB)'

In [6]:
products[2].find("div", {"class": "_3LWZlK"}).text

'4.3'

In [7]:
products[2].find("div", {"class": "_30jeq3 _1_WHN1"}).text.lstrip("₹")

'14,999'

In [8]:
products[2].find("ul", {"class": "_1xgFaf"}).find_all("li")[0].text

'6 GB RAM | 128 GB ROM | Expandable Upto 1 TB'

In [107]:
products[2].find("span", {"class": "_2_R_DZ"}).text.split()

['18,496', 'Ratings', '&', '1,612', 'Reviews']

## Automating the process and creating a list for the DataFrame


In [9]:
def make_df_list(products):
    df_li = []
    for item in products:
        d = {}
        try:
            d["Name"] = item.find("div", {"class": "_4rR01T"}).text
        except:
            continue
        try:
            d["Stars"] = item.find("div", {"class": "_3LWZlK"}).text
        except:
            d["Stars"] = "Unrated"
        try:
            d["Price"] = item.find("div", {"class": "_30jeq3 _1_WHN1"}).text.lstrip("₹")
        except:
            continue
        try:
            temp = item.find("ul", {"class": "_1xgFaf"}).find_all("li")[0].text.split("|")
            d["RAM"] = temp[0]
            d["ROM"] = temp[1]
        except:
            d["RAM"] = "No info"
            d["ROM"] = "No info"
        try:
            temp = item.find("span", {"class": "_2_R_DZ"}).text.split()
            d["Rating"] = temp[0]
            d["Review"] = temp[3]
        except:
            d["Rating"] = "0"
            d["Review"] = "0"
        df_li.append(d)
    return df_li
li = make_df_list(products)
li

[{'Name': 'SAMSUNG Galaxy F22 (Denim Blue, 128 GB)',
  'Stars': '4.3',
  'Price': '14,999',
  'RAM': '6 GB RAM ',
  'ROM': ' 128 GB ROM ',
  'Rating': '18,496',
  'Review': '1,612'},
 {'Name': 'SAMSUNG Galaxy F12 (Sky Blue, 64 GB)',
  'Stars': '4.2',
  'Price': '11,499',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '1,65,748',
  'Review': '13,168'},
 {'Name': 'SAMSUNG Galaxy F12 (Celestial Black, 64 GB)',
  'Stars': '4.2',
  'Price': '11,499',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '1,65,748',
  'Review': '13,168'},
 {'Name': 'SAMSUNG Galaxy F12 (Sea Green, 64 GB)',
  'Stars': '4.2',
  'Price': '11,499',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '1,65,748',
  'Review': '13,168'},
 {'Name': 'SAMSUNG Galaxy F22 (Denim Blue, 64 GB)',
  'Stars': '4.3',
  'Price': '12,999',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '33,205',
  'Review': '3,172'},
 {'Name': 'SAMSUNG Galaxy F22 (Denim Black, 64 GB)',
  'Stars': '4.3',
  'Price': 

In [10]:
df = pandas.DataFrame(li)

In [11]:
df

Unnamed: 0,Name,Stars,Price,RAM,ROM,Rating,Review
0,"SAMSUNG Galaxy F22 (Denim Blue, 128 GB)",4.3,14999,6 GB RAM,128 GB ROM,18496,1612
1,"SAMSUNG Galaxy F12 (Sky Blue, 64 GB)",4.2,11499,4 GB RAM,64 GB ROM,165748,13168
2,"SAMSUNG Galaxy F12 (Celestial Black, 64 GB)",4.2,11499,4 GB RAM,64 GB ROM,165748,13168
3,"SAMSUNG Galaxy F12 (Sea Green, 64 GB)",4.2,11499,4 GB RAM,64 GB ROM,165748,13168
4,"SAMSUNG Galaxy F22 (Denim Blue, 64 GB)",4.3,12999,4 GB RAM,64 GB ROM,33205,3172
5,"SAMSUNG Galaxy F22 (Denim Black, 64 GB)",4.3,12999,4 GB RAM,64 GB ROM,33205,3172
6,"SAMSUNG Galaxy M12 (Blue, 64 GB)",4.2,11277,4 GB RAM,64 GB ROM,4799,362
7,SAMSUNG Guru 1200,4.3,1410,No info,No info,174399,15129
8,"SAMSUNG M32 5G (Slate Black, 128 GB)",4.2,18999,6 GB RAM,128 GB ROM,160,8
9,"SAMSUNG Galaxy F22 (Denim Black, 128 GB)",4.3,14999,6 GB RAM,128 GB ROM,18496,1612


## Sorting according to the price

In [12]:
df.sort_values(by=["Price"], ascending=False)

Unnamed: 0,Name,Stars,Price,RAM,ROM,Rating,Review
15,"SAMSUNG Galaxy M02 (Gray, 32 GB)",4.0,8999,2 GB RAM,32 GB ROM,1094,94
18,"SAMSUNG Galaxy M52 5G (Icy Blue, 128 GB)",4.5,31489,8 GB RAM,128 GB ROM,56,6
14,"SAMSUNG Galaxy M52 5G (Icy Blue, 128 GB)",4.4,26999,6 GB RAM,128 GB ROM,89,9
22,"SAMSUNG Galaxy A70s (Prism Crush Red, 128 GB)",4.4,19999,8 GB RAM,128 GB ROM,1224,124
19,"SAMSUNG Galaxy A70s (Prism Crush White, 128 GB)",4.4,19999,8 GB RAM,128 GB ROM,1224,124
21,"SAMSUNG M32 5G (Sky blue, 128 GB)",4.2,19269,6 GB RAM,128 GB ROM,160,8
11,"SAMSUNG Galaxy A22 5G (Gray, 128 GB)",4.3,19188,6 GB RAM,128 GB ROM,408,48
10,"SAMSUNG Galaxy M31 (Ocean Blue, 128 GB)",4.3,19125,8 GB RAM,128 GB ROM,2033,135
8,"SAMSUNG M32 5G (Slate Black, 128 GB)",4.2,18999,6 GB RAM,128 GB ROM,160,8
0,"SAMSUNG Galaxy F22 (Denim Blue, 128 GB)",4.3,14999,6 GB RAM,128 GB ROM,18496,1612


## Automating for multiple pages
Using 10 pages for less use of resources and time of code.

In [13]:
base_url = "https://www.flipkart.com/search?q=samsung&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page="
main_list = []
for page in range(1, 10):
    r = requests.get(base_url+str(page))
    soup = BeautifulSoup(r.content, "html.parser")
    products = soup.find_all("div", {"class":"_1AtVbE col-12-12"})
    main_list.extend(make_df_list(products))
main_list

[{'Name': 'SAMSUNG Galaxy F22 (Denim Blue, 128 GB)',
  'Stars': '4.3',
  'Price': '14,999',
  'RAM': '6 GB RAM ',
  'ROM': ' 128 GB ROM ',
  'Rating': '18,496',
  'Review': '1,612'},
 {'Name': 'SAMSUNG Galaxy F12 (Sky Blue, 64 GB)',
  'Stars': '4.2',
  'Price': '11,499',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '1,65,748',
  'Review': '13,168'},
 {'Name': 'SAMSUNG Galaxy F12 (Celestial Black, 64 GB)',
  'Stars': '4.2',
  'Price': '11,499',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '1,65,748',
  'Review': '13,168'},
 {'Name': 'SAMSUNG Galaxy F12 (Sea Green, 64 GB)',
  'Stars': '4.2',
  'Price': '11,499',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '1,65,748',
  'Review': '13,168'},
 {'Name': 'SAMSUNG Galaxy F22 (Denim Blue, 64 GB)',
  'Stars': '4.3',
  'Price': '12,999',
  'RAM': '4 GB RAM ',
  'ROM': ' 64 GB ROM ',
  'Rating': '33,205',
  'Review': '3,172'},
 {'Name': 'SAMSUNG Galaxy F22 (Denim Black, 64 GB)',
  'Stars': '4.3',
  'Price': 

In [14]:
df = pandas.DataFrame(main_list)
df

Unnamed: 0,Name,Stars,Price,RAM,ROM,Rating,Review
0,"SAMSUNG Galaxy F22 (Denim Blue, 128 GB)",4.3,14999,6 GB RAM,128 GB ROM,18496,1612
1,"SAMSUNG Galaxy F12 (Sky Blue, 64 GB)",4.2,11499,4 GB RAM,64 GB ROM,165748,13168
2,"SAMSUNG Galaxy F12 (Celestial Black, 64 GB)",4.2,11499,4 GB RAM,64 GB ROM,165748,13168
3,"SAMSUNG Galaxy F12 (Sea Green, 64 GB)",4.2,11499,4 GB RAM,64 GB ROM,165748,13168
4,"SAMSUNG Galaxy F22 (Denim Blue, 64 GB)",4.3,12999,4 GB RAM,64 GB ROM,33205,3172
...,...,...,...,...,...,...,...
211,"SAMSUNG Galaxy J5 Prime (Black, 16 GB)",4,14500,2 GB RAM,16 GB ROM,1205,345
212,"SAMSUNG Galaxy S6 Edge (Black Sapphire, 32 GB)",4,37299,3 GB RAM,32 GB ROM,1219,264
213,"SAMSUNG Galaxy A10 (Blue, 32 GB)",4.4,7990,2 GB RAM,32 GB ROM,13142,1051
214,"SAMSUNG Galaxy A6 (Black, 64 GB)",4.3,23748,4 GB RAM,64 GB ROM,692,70


In [15]:
df.to_csv("SamsungData.csv")