-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebscrap2.py
139 lines (135 loc) · 5.76 KB
/
webscrap2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from operator import lt, div
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq
my_url="https://www.flipkart.com/search?q=samsung+mobiles&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_0_2&otracker1=AS_QueryStore_HistoryAutoSuggest_0_2&as-pos=0&as-type=HISTORY&as-searchtext=sa"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("div", { "class": "_3O0U0u"})
print(len(containers))
print(soup.prettify(containers[0]))
<div class="_3O0U0u">
<div data-id="MOBFRZZHMHQVNDFA" style="width:100%">
<div class="_1UoZlX">
<a class="_31qSD5" href="/samsung-galaxy-m01-blue-32-gb/p/itmc068b26305a0d?pid=MOBFRZZHMHQVNDFA&amp;lid=LSTMOBFRZZHMHQVNDFAZXGBO6&amp;marketplace=FLIPKART&amp;srno=s_1_1&amp;otracker=AS_QueryStore_HistoryAutoSuggest_0_2&amp;otracker1=AS_QueryStore_HistoryAutoSuggest_0_2&amp;fm=organic&amp;iid=f9a57085-7ab9-4aba-b59d-5a4cbecd03e9.MOBFRZZHMHQVNDFA.SEARCH&amp;ssid=zu9bg122ao0000001596818422200&amp;qH=0258c7d48242959a" rel="noopener noreferrer" target="_blank">
<div class="_3SQWE6">
<div class="_1OCn9C">
<div>
<div class="_3BTv9X" style="height:200px;width:200px">
<img alt="Samsung Galaxy M01 (Blue, 32 GB)" class="_1Nyybr" src="//img1a.flixcart.com/www/linchpin/fk-cp-zion/img/placeholder_9951d0.svg"/>
</div>
</div>
</div>
<div class="_2lesQu">
<div class="_1O_CiZ">
<span class="_1iHA1p">
<div class="_2kFyHg">
<label>
<input class="_3uUUD5" readonly="" type="checkbox"/>
<div class="_1p7h2j">
</div>
</label>
</div>
</span>
<label class="_10TB-Q">
<span>
Add to Compare
</span>
</label>
</div>
</div>
<div class="_3gDSOa _32A6AP">
<div class="DsQ2eg">
<svg class="_2oLiqr" height="16" viewbox="0 0 20 16" width="16" xmlns="http://www.w3.org/2000/svg">
<path class="_35Y7Yo" d="M8.695 16.682C4.06 12.382 1 9.536 1 6.065 1 3.219 3.178 1 5.95 1c1.566 0 3.069.746 4.05 1.915C10.981 1.745 12.484 1 14.05 1 16.822 1 19 3.22 19 6.065c0 3.471-3.06 6.316-7.695 10.617L10 17.897l-1.305-1.215z" fill="#2874F0" fill-rule="evenodd" opacity=".9" stroke="#FFF">
</path>
</svg>
</div>
</div>
</div>
<div class="_1-2Iqu row">
<div class="col col-7-12">
<div class="_3wU53n">
Samsung Galaxy M01 (Blue, 32 GB)
</div>
<div class="niH0FQ">
<span class="_2_KrJI" id="productRating_LSTMOBFRZZHMHQVNDFAZXGBO6_MOBFRZZHMHQVNDFA_">
<div class="hGSR34">
4.2
<img class="_2lQ_WZ" src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxMyIgaGVpZ2h0PSIxMiI+PHBhdGggZmlsbD0iI0ZGRiIgZD0iTTYuNSA5LjQzOWwtMy42NzQgMi4yMy45NC00LjI2LTMuMjEtMi44ODMgNC4yNTQtLjQwNEw2LjUuMTEybDEuNjkgNC4wMSA0LjI1NC40MDQtMy4yMSAyLjg4Mi45NCA0LjI2eiIvPjwvc3ZnPg=="/>
</div>
</span>
<span class="_38sUEc">
<span>
<span>
5,040 Ratings
</span>
<span class="_1VpSqZ">
&amp;
</span>
<span>
371 Reviews
</span>
</span>
</span>
</div>
<div class="_3ULzGw">
<ul class="vFw0gD">
<li class="tVe95H">
3 GB RAM | 32 GB ROM | Expandable Upto 512 GB
</li>
<li class="tVe95H">
14.48 cm (5.7 inch) HD+ Display
</li>
<li class="tVe95H">
13MP + 2MP | 5MP Front Camera
</li>
<li class="tVe95H">
4000 mAh Lithium-ion Battery
</li>
<li class="tVe95H">
Qualcomm Snapdragon (SDM439) Octa Core Processor
</li>
<li class="tVe95H">
1 Year Manufacturer Warranty for Phone and 6 Months Warranty for in the Box Accessories
</li>
</ul>
</div>
</div>
<div class="col col-5-12 _2o7WAb">
<div class="_6BWGkk">
<div class="_1uv9Cb">
<div class="_1vC4OE _2rQ-NK">
₹9,899
</div>
</div>
</div>
<div class="_3n6o0t">
<img height="21" src="//img1a.flixcart.com/www/linchpin/fk-cp-zion/img/fa_8b4b59.png"/>
</div>
</div>
</div>
</a>
</div>
</div>
</div>
container = containers[0]
print(container.div.img["alt"])
price = container.findAll("div", {"class": "col col-5-12 _2o7WAb"})
print(price[0].text)
ratings = container.findAll("div", {"class": "niH0FQ"})
print(ratings[0].text)
filename = "products.csv"
f = open(filename, "w")
headers = "Product_Name, Pricing, Ratings \n"
f.write(headers)
for container in containers:
product_name = container.div.img["alt"]
price_container = container.findAll("div", {"class": "col col-5-12 _2o7WAb"})
price = price_container[0].text.strip()
rating_container = container.findAll("div", {"class": "niH0FQ"})
rating = rating_container[0].text
print("Product_Name:"+ product_name)
print("Price: " + price)
print("Ratings:" + rating)