# Webscraping - Adidas
https://www.adidas.com

In [1]:
!pip install bs4
!pip install requests



# Import the required libraries

In [None]:
#importing required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Send request to the website server 

In [None]:
page = requests.get('https://www.adidas.co.uk/search?q=shoes')
page

# Page contents

In [None]:
soup = BeautifulSoup(page.content)
soup

In [13]:
#Scrapping the shoe
name = soup.find('h3', class_="w-full mobile:text-sm mobile:pr-0 font-bold text-base pr-5 line-clamp-2")
name

<h3 class="w-full mobile:text-sm mobile:pr-0 font-bold text-base pr-5 line-clamp-2">PUMA x LAMELO BALL MB.03 Toxic Men's Basketball Shoes<span class="sr-only">Purple Glimmer-Green Gecko</span></h3>

In [14]:
name.text

"PUMA x LAMELO BALL MB.03 Toxic Men's Basketball ShoesPurple Glimmer-Green Gecko"

In [16]:
#Scrapping the price
price = soup.find('div', class_="flex flex-col flex-none mobile:items-start items-end text-sm md:text-base mobile:mt-2")
price

<div class="flex flex-col flex-none mobile:items-start items-end text-sm md:text-base mobile:mt-2"><span class="whitespace-nowrap text-base font-bold override:opacity-100" data-test-id="price">$125.00</span></div>

In [17]:
price.text

'$125.00'

In [33]:
#Scrapping the number of available color
color = soup.find('span', class_="group-hover:hidden group-focus-within:hidden text-puma-black-300 uppercase text-xs whitespace-nowrap")
color

<span class="group-hover:hidden group-focus-within:hidden text-puma-black-300 uppercase text-xs whitespace-nowrap">4<!-- --> <!-- -->Colors</span>

In [34]:
color.text

'4 Colors'

In [24]:
#retain only the numeric value and make it an integer
no_of_color = int(''.join(filter(str.isdigit, color.text)))
no_of_color

1

In [25]:
#Scrapping the review
review = soup.find('span', class_="ml-1 pl-px text-sm")
review

<span class="ml-1 pl-px text-sm">(<!-- -->2<!-- -->)<span class="sr-only">Reviews</span></span>

In [26]:
review.text

'(2)Reviews'

# Scraping multiple 'names', 'price', 'no of color' and 'category'

In [39]:
#scraping multiple names
name = []

for i in soup.find_all('h3', class_="w-full mobile:text-sm mobile:pr-0 font-bold text-base pr-5 line-clamp-2"):
    name.append(i.text)

name

["PUMA x LAMELO BALL MB.03 Toxic Men's Basketball ShoesPurple Glimmer-Green Gecko",
 "Viz Runner Repeat Men's Running SneakersPuma Black-Puma White",
 "Smash 3.0 Men's SneakersGray Tile-PUMA Black-PUMA White",
 "Trinity Women's SneakersPUMA White-PUMA Black-Cool Light Gray",
 "PUMA x LAMELO BALL MB.03 LaFrancé Men's Basketball ShoesFluro Green Pes-PUMA Green-Fluro Yellow Pes",
 "Carina 2.0 Women's SneakersPuma White-Puma White-Puma Team Gold-Puma Black",
 "Star Vital Women's Training ShoesPUMA Black-PUMA Silver",
 "Carina Street Women's SneakersPUMA White-PUMA White-PUMA Gold",
 "Pacer Future Men's SneakersPuma Black-Puma Black",
 "SCUDERIA FERRARI x JOSHUA VIDES Speedcat Pro Men's Driving ShoesPUMA White-Rosso Corsa",
 "ST Runner v3 L Men's SneakersPuma White-Puma White-Gum",
 "Star Vital Refresh Men's Running ShoesPUMA Black-PUMA White",
 "PUMA x Formula 1® Las Vegas Grand Prix Speedcat Pro Men's Driving ShoesPUMA Black-PUMA Silver-Pop Red",
 "ST Runner v3 L Men's SneakersPuma Black-

In [38]:
#scraping multiple prices
price = []

for i in soup.find_all('div', class_="flex flex-col flex-none mobile:items-start items-end text-sm md:text-base mobile:mt-2"):
    price.append(i.text)

price

['$125.00',
 '$37.99$65.00',
 '$30.99$60.00',
 '$49.99$90.00',
 '$125.00',
 '$34.99$70.00',
 '$39.99$65.00',
 '$34.99$75.00',
 '$39.99$70.00',
 '$450.00',
 '$34.99$60.00',
 '$34.99$70.00',
 '$450.00',
 '$34.99$60.00',
 '$450.00',
 '$32.99$65.00',
 '$450.00',
 '$160.00',
 '$337.99$450.00',
 '$125.00',
 '$39.99$65.00',
 '$34.99$70.00',
 '$69.99$120.00',
 '$39.99$65.00']

In [35]:
#scraping multiple number of available color
no_of_colors = []

for i in soup.find_all('span', class_="group-hover:hidden group-focus-within:hidden text-puma-black-300 uppercase text-xs whitespace-nowrap"):
    no_of_colors.append(i.text)

no_of_colors

['4 Colors',
 '3 Colors',
 '2 Colors',
 '2 Colors',
 '3 Colors',
 '5 Colors',
 '2 Colors',
 '5 Colors',
 '2 Colors',
 '2 Colors',
 '4 Colors',
 '2 Colors',
 '2 Colors']

In [37]:
#scraping multiple category
review = []

for i in soup.find_all('span', class_="ml-1 pl-px text-sm"):
    review.append(i.text)
                  
review

['(2)Reviews',
 '(37)Reviews',
 '(48)Reviews',
 '(73)Reviews',
 '(5)Reviews',
 '(42)Reviews',
 '(89)Reviews',
 '(131)Reviews',
 '(1)Reviews',
 '(131)Reviews',
 '(13)Reviews',
 '(2)Reviews',
 '(1)Reviews',
 '(3)Reviews']

In [42]:
#Finding the length
print(len(name),len(price),len(no_of_colors),len(review))

24 24 13 14


In [48]:
import numpy as np
import pandas as pd

In [53]:
# Pad shorter arrays with a placeholder (np.nan)

# Find the maximum length among the arrays
max_length = max(len(name),len(price),len(no_of_colors),len(review))
                 
no_of_colors += [np.nan] * (max_length - len(no_of_colors))
review += [np.nan] * (max_length - len(review))

In [54]:
# Create a DataFrame
df = pd.DataFrame({
    'Name': name,
    'Price': price,
    'No of Color': no_of_colors,
    'Review': review
})

# Display the DataFrame
print(df)

                                                 Name           Price  \
0   PUMA x LAMELO BALL MB.03 Toxic Men's Basketbal...         $125.00   
1   Viz Runner Repeat Men's Running SneakersPuma B...    $37.99$65.00   
2   Smash 3.0 Men's SneakersGray Tile-PUMA Black-P...    $30.99$60.00   
3   Trinity Women's SneakersPUMA White-PUMA Black-...    $49.99$90.00   
4   PUMA x LAMELO BALL MB.03 LaFrancé Men's Basket...         $125.00   
5   Carina 2.0 Women's SneakersPuma White-Puma Whi...    $34.99$70.00   
6   Star Vital Women's Training ShoesPUMA Black-PU...    $39.99$65.00   
7   Carina Street Women's SneakersPUMA White-PUMA ...    $34.99$75.00   
8    Pacer Future Men's SneakersPuma Black-Puma Black    $39.99$70.00   
9   SCUDERIA FERRARI x JOSHUA VIDES Speedcat Pro M...         $450.00   
10  ST Runner v3 L Men's SneakersPuma White-Puma W...    $34.99$60.00   
11  Star Vital Refresh Men's Running ShoesPUMA Bla...    $34.99$70.00   
12  PUMA x Formula 1® Las Vegas Grand Prix Speedca.

In [55]:
# To extract numeric part of Color column and convert to int
def extract_numeric(s):
    return int(''.join(filter(str.isdigit, s)))

In [60]:
df

Unnamed: 0,Name,Price,No of Color,Review
0,PUMA x LAMELO BALL MB.03 Toxic Men's Basketbal...,$125.00,4 Colors,(2)Reviews
1,Viz Runner Repeat Men's Running SneakersPuma B...,$37.99$65.00,3 Colors,(37)Reviews
2,Smash 3.0 Men's SneakersGray Tile-PUMA Black-P...,$30.99$60.00,2 Colors,(48)Reviews
3,Trinity Women's SneakersPUMA White-PUMA Black-...,$49.99$90.00,2 Colors,(73)Reviews
4,PUMA x LAMELO BALL MB.03 LaFrancé Men's Basket...,$125.00,3 Colors,(5)Reviews
5,Carina 2.0 Women's SneakersPuma White-Puma Whi...,$34.99$70.00,5 Colors,(42)Reviews
6,Star Vital Women's Training ShoesPUMA Black-PU...,$39.99$65.00,2 Colors,(89)Reviews
7,Carina Street Women's SneakersPUMA White-PUMA ...,$34.99$75.00,5 Colors,(131)Reviews
8,Pacer Future Men's SneakersPuma Black-Puma Black,$39.99$70.00,2 Colors,(1)Reviews
9,SCUDERIA FERRARI x JOSHUA VIDES Speedcat Pro M...,$450.00,2 Colors,(131)Reviews


In [62]:
df2 = df.copy()
df2

Unnamed: 0,Name,Price,No of Color,Review
0,PUMA x LAMELO BALL MB.03 Toxic Men's Basketbal...,$125.00,4 Colors,(2)Reviews
1,Viz Runner Repeat Men's Running SneakersPuma B...,$37.99$65.00,3 Colors,(37)Reviews
2,Smash 3.0 Men's SneakersGray Tile-PUMA Black-P...,$30.99$60.00,2 Colors,(48)Reviews
3,Trinity Women's SneakersPUMA White-PUMA Black-...,$49.99$90.00,2 Colors,(73)Reviews
4,PUMA x LAMELO BALL MB.03 LaFrancé Men's Basket...,$125.00,3 Colors,(5)Reviews
5,Carina 2.0 Women's SneakersPuma White-Puma Whi...,$34.99$70.00,5 Colors,(42)Reviews
6,Star Vital Women's Training ShoesPUMA Black-PU...,$39.99$65.00,2 Colors,(89)Reviews
7,Carina Street Women's SneakersPUMA White-PUMA ...,$34.99$75.00,5 Colors,(131)Reviews
8,Pacer Future Men's SneakersPuma Black-Puma Black,$39.99$70.00,2 Colors,(1)Reviews
9,SCUDERIA FERRARI x JOSHUA VIDES Speedcat Pro M...,$450.00,2 Colors,(131)Reviews


In [63]:
# Save DataFrame to a CSV file
df2.to_csv('Table1puma.csv', index=False)