In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL of the website with pagination (assuming the page number is added at the end)
base_url = "https://www.driveswarehouse.com/drives?page=1&max_items=96"

# Define the columns for the DataFrame
columns = ['Drive_Name', 'Description', 'Price', 'Stock']

# Initialize an empty list to store all data frames before concatenation
data_frames = []

def drives_data():
    # Iterate through the first 10 pages
    for page in range(1, 11):  # Pages 1 to 10
        # Construct the URL for each page
        url = f'https://www.driveswarehouse.com/drives?page={page}&max_items=96'
        
        # Send a GET request to fetch the content of the page
        response = requests.get(url)
        print(response.status_code)  # Check if the request was successful
        
        # Parse the page content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all product containers on the page
        drives = soup.find_all('div', attrs={'class': 'type-1 BlockBox item-holder'})
        
        # Initialize a list to store the current page's data
        page_data = []

        # Iterate through each product container
        for drive in drives:
            # Extract the product name
            product_name = drive.find('div', attrs={'class': 'name'})
            product_name_text = product_name.get_text(strip=True) if product_name else 'N/A'
            
            # Extract the product description
            product_description = drive.find('div', attrs={'class': 'description'})
            product_description_text = product_description.get_text(strip=True) if product_description else 'N/A'
            
            # Extract the product price
            product_price = drive.find('span', attrs={'itemprop': 'price'})
            product_price_text = product_price.get_text(strip=True) if product_price else 'N/A'
            
            # Extract the stock status
            stock_status = drive.find('div', attrs={'class': 'stock'})
            stock_status_text = stock_status.get_text(strip=True) if stock_status else 'N/A'
            
            # Add the row data to the list for this page
            page_data.append([product_name_text, product_description_text, product_price_text, stock_status_text])

        # Convert the list of this page's data into a DataFrame
        page_df = pd.DataFrame(page_data, columns=columns)
        # Append this DataFrame to the list of data frames
        data_frames.append(page_df)

# Call the function to start scraping
drives_data()

# Concatenate all data frames in the list into a single DataFrame
product_data = pd.concat(data_frames, ignore_index=True)

# Display the first few rows of the DataFrame
print(product_data.head())

#save the DataFrame to a CSV file
product_data.to_csv(r'C:\Users\srich\Documents\Innomatics Course\Assignments\Assignment Tasks\Project 1\drives_warehouse_products.csv', index=False)


200
200
200
200
200
200
200
200
200
200
          Drive_Name                                        Description  \
0  ODE-3-120070-1F12                                                      
1  ODE-3-120023-1F1B                                                      
2  ODE-3-210058-104B  1.5 HP, 5.8 Amp, 110-115V AC INPUT, 230V AC Ou...   
3    EP66-0075T3I2U5  EP66-0075T3I2U5: Constant Torque, Sensorless V...   
4    EP66-0110T3I3U5  EP66-0110T3I3U5: Constant Torque, Sensorless V...   

      Price      Stock  
0    298.00   In stock  
1    475.20   In stock  
2    681.60  2-4 weeks  
3  1,265.49   In stock  
4  1,380.36   In stock  
