Merge pull request #364 from AHNAF14924/main

larymak · web-flow · commit c3e096529c18 · 2023-11-16T08:33:52.000+03:00
Added Scripts by AHNAF14924
diff --git a/AUTOMATION/Web_Scraper/Graphics Card.xlsx b/AUTOMATION/Web_Scraper/Graphics Card.xlsx
diff --git a/AUTOMATION/Web_Scraper/README.md b/AUTOMATION/Web_Scraper/README.md
@@ -0,0 +1,44 @@
+# Introduction
+
+This Python program is a web scraper that extracts data about graphics cards from a specific website. It uses the BeautifulSoup library to parse the HTML content of the website and requests library to fetch the web page.
+
+## Requirements
+
+- Python 3.x
+- BeautifulSoup library (`beautifulsoup4`)
+- Requests library (`requests`)
+- Openpyxl library (`openpyxl`)
+
+You can install the required libraries using pip:
+
+```
+pip install beautifulsoup4 requests openpyxl
+```
+
+## How to Use
+
+1. Clone this repository or download the files.
+
+2. Open a terminal or command prompt and navigate to the project directory.
+
+3. Run the Python script `app.py`:
+
+```
+app.py
+```
+
+4. The program will start scraping data from the website and display the brand, name, and price of each graphics card on the console.
+
+5. Once the scraping is complete, the program will save the data to an Excel file named `Graphics Card.xlsx`.
+
+## Configuration
+
+You can modify the URL in the `scrape_graphics_cards_data()` function inside the `app.py` file to scrape data from a different website or adjust the parameters as needed.
+
+## Output
+
+The program will generate an Excel file `Graphics Card.xlsx` containing the scraped data. Each row in the Excel file represents a graphics card and includes the columns `Brand`, `Name`, and `Price`.
+
+## Disclaimer
+
+This web scraper is provided for educational and informational purposes only. Please be respectful of the website's terms of service and scraping policies. Always obtain proper authorization before scraping any website, and use the scraper responsibly and ethically.
diff --git a/AUTOMATION/Web_Scraper/app.py b/AUTOMATION/Web_Scraper/app.py
@@ -0,0 +1,54 @@
+from bs4 import BeautifulSoup
+import requests
+import openpyxl
+
+
+def extract_brand_name_and_title(name):
+    # Split the name and return the first word as the brand name and the rest as title
+    brand, title = name.split(' ', 1)
+    return brand, title
+
+
+def scrape_graphics_cards_data():
+    try:
+        # Create a new Excel workbook and set up the worksheet
+        excel = openpyxl.Workbook()
+        sheet = excel.active
+        sheet.title = "price"
+        sheet.append(['Brand', 'Name', 'Price'])
+
+        url = 'https://www.techlandbd.com/pc-components/graphics-card?sort=p.price&order=ASC&fq=1&limit=100'
+        response = requests.get(url)
+        response.raise_for_status()
+
+        # Parse the HTML content
+        soup = BeautifulSoup(response.text, 'html.parser')
+
+        # Find all product cards on the webpage
+        cards = soup.find('div', class_='main-products product-grid').find_all(
+            'div', class_='product-layout has-extra-button')
+
+        for card in cards:
+            # Extract the product name
+            name = card.find('div', class_='name').a.text
+
+            # Split the name to get the brand and title
+            brand, title = extract_brand_name_and_title(name)
+
+            # Extract the product price
+            price = card.find('div', class_='price').span.text
+
+            # Print the product details and add them to the Excel sheet
+            print(brand, title, price)
+            sheet.append([brand, title, price])
+
+        # Save the Excel file
+        excel.save('Graphics Card.xlsx')
+
+    except Exception as e:
+        print("An error occurred:", e)
+
+
+if __name__ == "__main__":
+    # Call the main scraping function
+    scrape_graphics_cards_data()