In [None]:
#installing selenium
pip install selenium
pip install pymongo


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd
import re
from pymongo import MongoClient
import config

# Now you can access the MongoDB connection URL from the config module
mongo_url = config.MONGO_URL

# creating a chrome webdriver to run automation
driver = webdriver.Chrome()

# going to the link
driver.get("https://www.nike.com/ie/w/mens-shoes-nik1zy7ok")


# extracting the link of all the shoes from the product page and stroring it in a list
shoe_ele = driver.find_elements(
    By.CSS_SELECTOR, '[data-testid="product-card__link-overlay"]')

all_shoe_links = []

for i in range(len(shoe_ele)):
    link = shoe_ele[i].get_attribute("href")
    print(link)
    all_shoe_links.append(link)


# creating an empty list to store the values
product_title_list = []
product_category_list = []
product_price_list = []
product_description_list = []
review_count_list = []
product_rating_list = []

for i in range(len(all_shoe_links)):
    driver.get(all_shoe_links[i])
    time.sleep(3)

    # product title
    product_title = driver.find_elements(
        By.CSS_SELECTOR, '[data-test="product-title"]')[1].text
    product_title_list.append(product_title)
    print(product_title)

    # product category
    product_category = driver.find_elements(
        By.CSS_SELECTOR, '[data-test="product-sub-title"]')[1].text
    product_category_list.append(product_category)
    print(product_category)

    # product_price
    product_price = driver.find_elements(
        By.CSS_SELECTOR, '[data-test="product-price"]')[1].text
    product_price_list.append(product_price)
    print(product_price)

    # product_description
    product_description = driver.find_element(
        By.CSS_SELECTOR, '[class="description-preview body-2 css-1pbvugb"]').text
    product_description_list.append(product_description)
    print(product_description)

    # review_count
    review_count = driver.find_elements(
        By.CSS_SELECTOR, '[class="headline-4"]')[-1].text
    review_count_list.append(review_count)
    print(review_count)

    # product rating
    product_rating_sel = driver.find_element(By.CSS_SELECTOR, '.css-n209rx')

    # Get the value of the aria-label attribute
    product_rating = product_rating_sel.get_attribute("aria-label")

    product_rating_list.append(product_rating)

    # Extract the rating from the aria-label attribute
    print("Rating:", product_rating)


# creating a Dataframe from the dictonary
product_details = {
    "product_title": product_title_list,
    "product_category": product_category_list,
    "product_price": product_price_list,
    "product_description": product_description_list,
    "review_count": review_count_list,
    "product_rating": product_rating_list
}

df = pd.DataFrame(product_details)

print(df)


""" 
data cleaning using python library called re(regex) to remove the euro sign and other strings to convert the price into float and also extract the value of review_count from reviews
"""

# Extract numeric values from strings and convert to float
df['product_price'] = df['product_price'].str.extract(
    r'(\d+\.\d+)').astype(float)


# Extract numeric values from strings and replace non-matching rows with "Not Applicable"
df['review_count'] = df['review_count'].apply(lambda x: re.findall(
    r'\d+', x)[0] if re.findall(r'\d+', x) else "Not Applicable")


""" 
Sending the dataframe to a NoSQL Database called Mongodb

"""

try:
    # Connect to MongoDB
    client = MongoClient(
        mongo_url)
    db = client['nike_product_scraper']
    collection = db['nike_product_details']

    # Convert DataFrame to dictionary
    data = df.to_dict(orient='records')
    print(data)

    # Insert data into MongoDB collection
    collection.insert_many(data)

    print("Data inserted successfully into MongoDB")

except Exception as e:
    print("Error:", e)

finally:
    # Close connection
    client.close()

In [28]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

In [2]:

# creating a chrome webdriver to run automation
driver = webdriver.Chrome()
driver.get("https://www.nike.com/ie/w/mens-shoes-nik1zy7ok")

In [6]:
driver.get("https://www.nike.com/ie/w/mens-shoes-nik1zy7ok")

In [4]:
shoe_ele = driver.find_elements(
    By.CSS_SELECTOR, '[data-testid="product-card__link-overlay"]')

In [5]:
len(shoe_ele)

24

In [8]:
all_shoe_links = []

for i in range(len(shoe_ele)):
    link = shoe_ele[i].get_attribute("href")
    print(link)
    all_shoe_links.append(link)

https://www.nike.com/ie/t/air-jordan-1-low-shoes-WgK3MP/553558-140
https://www.nike.com/ie/t/dunk-low-retro-shoe-wwlDHh/DD1391-100
https://www.nike.com/ie/t/air-jordan-1-low-se-shoes-8Npn8f/CZ8455-100
https://www.nike.com/ie/t/air-max-90-shoes-56b7nN/FJ4218-001
https://www.nike.com/ie/t/air-force-1-flyknit-2-shoes-L5q9GR/AV3042-100
https://www.nike.com/ie/t/air-max-alpha-trainer-5-workout-shoes-rdbbk2/DM0829-010
https://www.nike.com/ie/t/air-max-plus-shoes-1gMZsK/604133-050
https://www.nike.com/ie/t/air-max-dn-shoes-rZwC5C/DV3337-003
https://www.nike.com/ie/t/jordan-max-aura-5-shoes-xstMgH/DZ4353-001
https://www.nike.com/ie/t/air-max-90-shoes-ggrFKJ/FB9658-400
https://www.nike.com/ie/t/air-force-1-07-shoes-CFVMS0/CW2288-111
https://www.nike.com/ie/t/air-max-pulse-shoes-tLDqhN/FN7459-400
https://www.nike.com/ie/t/air-max-plus-iii-shoes-mJkmzK/CJ9684-002
https://www.nike.com/ie/t/air-jordan-1-low-shoes-QnthX1/DC0774-101
https://www.nike.com/ie/t/court-vision-low-shoes-qNJnrx/FD0320-133
h

In [9]:
driver.get(all_shoe_links[6])

In [44]:
# dictornary to store all product details
product_title_list = []
product_category_list = []
product_price_list = []
product_description_list = []
review_count_list = []
product_rating_list = []

for i in range(6, 7):
    # for i in range(len(all_shoe_links)):
    driver.get(all_shoe_links[i])
    time.sleep(3)

    # product title
    product_title = driver.find_elements(
        By.CSS_SELECTOR, '[data-test="product-title"]')[1].text
    product_title_list.append(product_title)
    print(product_title)

    # product category
    product_category = driver.find_elements(
        By.CSS_SELECTOR, '[data-test="product-sub-title"]')[1].text
    product_category_list.append(product_category)
    print(product_category)

    # product_price
    product_price = driver.find_elements(
        By.CSS_SELECTOR, '[data-test="product-price"]')[1].text
    product_price_list.append(product_price)
    print(product_price)

    # product_description
    product_description = driver.find_element(
        By.CSS_SELECTOR, '[class="description-preview body-2 css-1pbvugb"]').text
    product_description_list.append(product_description)
    print(product_description)

    # review_count
    review_count = driver.find_elements(
        By.CSS_SELECTOR, '[class="headline-4"]')[-1].text
    review_count_list.append(review_count)
    print(review_count)

    # product rating
    product_rating_sel = driver.find_element(By.CSS_SELECTOR, '.css-n209rx')

    # Get the value of the aria-label attribute
    product_rating = product_rating_sel.get_attribute("aria-label")

    product_rating_list.append(product_rating)

    # Extract the rating from the aria-label attribute
    print("Rating:", product_rating)

Nike Air Max Plus
Men's Shoes
€189.99
Let your attitude have the edge in your Nike Air Max Plus, a Tuned Air experience that offers premium stability and unbelievable cushioning. Featuring the original's wavy design lines, TPU accents and airy mesh on the upper, it celebrates defiant style.
Colour Shown: Black/Black/Black
Style: 604133-050
Reviews (361)
Rating: 4.3


In [33]:
product_details = {
    "product_title": product_title_list,
    "product_category": product_category_list,
    "product_price": product_price_list,
    "product_description": product_description_list,
    "review_count": review_count_list,
    "product_rating": product_rating_list
}

df = pd.DataFrame(product_details)

Unnamed: 0,product_title,product_category,product_price,product_description,review_count,product_rating
0,Air Jordan 1 Low,Men's Shoes,€129.99,"Inspired by the original that debuted in 1985,...",Reviews (1058),4.8
1,Nike Dunk Low Retro,Men's Shoe,€119.99,Created for the hardwood but taken to the stre...,Reviews (956),4.7
2,Air Jordan 1 Low SE,Men's Shoes,€139.99,"Always fresh and never out of style, the Air J...",Reviews (3),4.3
3,Nike Air Max 90,Men's Shoes,€159.99,"Nothing as fly, nothing as comfortable, nothin...",Reviews (8),4.0
4,Nike Air Force 1 Flyknit 2.0,Shoes,€119.99,Inspired by the shoe that's been reigning the ...,Reviews (19),4.4
5,Nike Air Max Alpha Trainer 5,Men's Workout Shoes,€84.99,Finish your last rep with power and rack it wi...,Reviews (219),4.4
6,Nike Air Max Plus,Men's Shoes,€189.99,Let your attitude have the edge in your Nike A...,Free Delivery and Returns,4.3
7,Nike Air Max Dn,Men's Shoes,€169.99,Say hello to the next generation of Air techno...,How This Was Made,4.7
8,Jordan Max Aura 5,Men's Shoes,Discounted from\n€129.99,"Whenyou need a shoe that's ready 24/7, it's go...",Free Delivery and Returns,4.6
9,Nike Air Max 90,Men's Shoes,€159.99,Lace up and feel the legacy in this champion r...,Reviews (41),4.9


df


In [38]:
# cleaning up the dataframe
import re
# Extract numeric values from strings and convert to float
df['product_price'] = df['product_price'].str.extract(
    r'(\d+\.\d+)').astype(float)

# Now df['product_price'] contains only float values
print(df)

                   product_title          product_category  product_price  \
0               Air Jordan 1 Low               Men's Shoes         129.99   
1            Nike Dunk Low Retro                Men's Shoe         119.99   
2            Air Jordan 1 Low SE               Men's Shoes         139.99   
3                Nike Air Max 90               Men's Shoes         159.99   
4   Nike Air Force 1 Flyknit 2.0                     Shoes         119.99   
5   Nike Air Max Alpha Trainer 5       Men's Workout Shoes          84.99   
6              Nike Air Max Plus               Men's Shoes         189.99   
7                Nike Air Max Dn               Men's Shoes         169.99   
8              Jordan Max Aura 5               Men's Shoes         129.99   
9                Nike Air Max 90               Men's Shoes         159.99   
10          Nike Air Force 1 '07               Men's Shoes         119.99   
11            Nike Air Max Pulse               Men's Shoes         159.99   

In [41]:
# Extract numeric values from strings and replace non-matching rows with "Not Applicable"
df['review_count'] = df['review_count'].apply(lambda x: re.findall(
    r'\d+', x)[0] if re.findall(r'\d+', x) else "Not Applicable")

In [42]:
df

Unnamed: 0,product_title,product_category,product_price,product_description,review_count,product_rating
0,Air Jordan 1 Low,Men's Shoes,129.99,"Inspired by the original that debuted in 1985,...",1058,4.8
1,Nike Dunk Low Retro,Men's Shoe,119.99,Created for the hardwood but taken to the stre...,956,4.7
2,Air Jordan 1 Low SE,Men's Shoes,139.99,"Always fresh and never out of style, the Air J...",3,4.3
3,Nike Air Max 90,Men's Shoes,159.99,"Nothing as fly, nothing as comfortable, nothin...",8,4.0
4,Nike Air Force 1 Flyknit 2.0,Shoes,119.99,Inspired by the shoe that's been reigning the ...,19,4.4
5,Nike Air Max Alpha Trainer 5,Men's Workout Shoes,84.99,Finish your last rep with power and rack it wi...,219,4.4
6,Nike Air Max Plus,Men's Shoes,189.99,Let your attitude have the edge in your Nike A...,Not Applicable,4.3
7,Nike Air Max Dn,Men's Shoes,169.99,Say hello to the next generation of Air techno...,Not Applicable,4.7
8,Jordan Max Aura 5,Men's Shoes,129.99,"Whenyou need a shoe that's ready 24/7, it's go...",Not Applicable,4.6
9,Nike Air Max 90,Men's Shoes,159.99,Lace up and feel the legacy in this champion r...,41,4.9


In [19]:
mongodb+srv://sohailansari1969:Liahos15%40@nikeproductscraper.aph6abq.mongodb.net/

Reviews (1058)


Defaulting to user installation because normal site-packages is not writeable
Collecting pymongo
  Downloading pymongo-4.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (676 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m676.9/676.9 KB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting dnspython<3.0.0,>=1.16.0
  Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 KB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.6.1 pymongo-4.6.3
Note: you may need to restart the kernel to use updated packages.


In [36]:

from pymongo import MongoClient

try:
    # Connect to MongoDB
    client = MongoClient(
        mongo_url)
    db = client['nike_product_scraper']
    collection = db['nike_product_details']

    # Convert DataFrame to dictionary
    data = df.to_dict(orient='records')
    print(data)

    # Insert data into MongoDB collection
    collection.insert_many(data)

    print("Data inserted successfully into MongoDB")

except Exception as e:
    print("Error:", e)

finally:
    # Close connection
    client.close()

[{'product_title': 'Air Jordan 1 Low', 'product_category': "Men's Shoes", 'product_price': '€129.99', 'product_description': "Inspired by the original that debuted in 1985, the Air Jordan 1 Low offers a clean, classic look that's familiar yet always fresh. With an iconic design that pairs perfectly with any 'fit, these kicks ensure you'll always be on point.\nColour Shown: White/Black/White/Royal Blue\nStyle: 553558-140", 'review_count': 'Reviews (1058)', 'product_rating': '4.8'}, {'product_title': 'Nike Dunk Low Retro', 'product_category': "Men's Shoe", 'product_price': '€119.99', 'product_description': "Created for the hardwood but taken to the streets, the Nike Dunk Low Retro returns with crisp overlays and original team colours. This basketball icon channels '80s vibes with premium leather in the upper that looks good and breaks in even better. Modern footwear technology helps bring the comfort into the 21st century.\nColour Shown: White/White/Black\nStyle: DD1391-100", 'review_cou