In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument('--headless')  # Run in headless mode
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--window-size=1920,1080')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')

# Initialize the WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# URL to scrape
url = "https://nanoreview.net/en/soc-list/rating?page=2"

try:
    # Open the URL
    print("Loading page...")
    driver.get(url)
    
    # Wait for the table to be present
    print("Waiting for table to load...")
    wait = WebDriverWait(driver, 20)
    table = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "table-list")))
    
    # Give some time for any lazy-loaded content
    time.sleep(5)
    
    # Get all rows
    rows = table.find_elements(By.TAG_NAME, "tr")
    print(f"Found {len(rows)} rows in the table")
    
    # Initialize lists to store data
    data = []
    
    # Process each row
    for row in rows[1:]:  # Skip header row
        try:
            cols = row.find_elements(By.TAG_NAME, "td")
            if len(cols) >= 8:  # Ensure we have enough columns
                row_data = {
                    'Rank': cols[0].text.split('\n')[0].strip(),
                    'Processor': cols[1].text.split('\n')[0].strip(),
                    'Rating': cols[2].text.strip(),
                    'AnTuTu10': cols[3].text.split('\n')[0].strip(),
                    'Geekbench6': cols[4].text.split('\n')[0].strip(),
                    'Cores': cols[5].text.strip(),
                    'Clock': cols[6].text.strip(),
                    'GPU': cols[7].text.strip()
                }
                data.append(row_data)
        except Exception as e:
            print(f"Error processing row: {e}")
            continue
    
    # Create DataFrame
    df = pd.DataFrame(data)
    
    # Clean the data
    df['Company'] = df['Processor'].apply(lambda x: 'MediaTek' if 'Dimensity' in x else ('Qualcomm' if 'Snapdragon' in x else 'Other'))
    df['Processor'] = df['Processor'].str.replace('MediaTek', '').str.replace('Qualcomm', '').str.strip()
    
    # Clean AnTuTu score (remove commas and convert to int)
    df['AnTuTu10'] = df['AnTuTu10'].str.replace(',', '').astype(int, errors='ignore')
    
    # Split Geekbench scores
    df[['Single-Core', 'Multi-Core']] = df['Geekbench6'].str.extract(r'(\d+)\s*/\s*(\d+)')
    df = df.drop('Geekbench6', axis=1)
    
    # Reorder columns
    df = df[['Rank', 'Company', 'Processor', 'Rating', 'AnTuTu10', 'Single-Core', 'Multi-Core', 'Cores', 'Clock', 'GPU']]
    
    # Save to CSV
    df.to_csv('processor_ratings2.csv', index=False)
    print("Data has been successfully scraped and saved to 'processor_ratings.csv'")
    
    # Display first few rows
    print("\nFirst 5 rows of the scraped data:")
    display(df.head())
    
except Exception as e:
    print(f"An error occurred: {str(e)}")
    
finally:
    # Close the browser
    driver.quit()
    print("\nBrowser closed.")

Loading page...
Waiting for table to load...
Found 9 rows in the table
Data has been successfully scraped and saved to 'processor_ratings.csv'

First 5 rows of the scraped data:


Unnamed: 0,Rank,Company,Processor,Rating,AnTuTu10,Single-Core,Multi-Core,Cores,Clock,GPU
0,201,Other,Exynos 7870,17 D,144731,172,492,8 (8),1600 MHz,Mali-T830 MP1
1,202,Other,Unisoc SC9863A,15 D,119450,164,725,8 (4+4),1600 MHz,PowerVR GE8322
2,203,Other,MT6739,15 D,59165,456,1499,4 (4),1500 MHz,PowerVR GE8100
3,204,Qualcomm,Snapdragon 430,14 D,134889,205,827,8 (8),1400 MHz,Adreno 505
4,205,Qualcomm,Snapdragon 801,13 D,125245,205,698,4 (4),2500 MHz,Adreno 330



Browser closed.
