In [7]:
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import pandas as pd
import time
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import plotly.express as px
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

In [None]:
url = 'https://www.christies.com/en/results?language=en&month=11&year=2025&component=e7d92272-7bcc-4dba-ae5b-28e4f3729ae8'

In [23]:
def  parse_art_auction_page(driver, auction_title, month, year):
    auction_data = []
    try:
        print(f"Парсим аукцион: {auction_title}")
        time.sleep(5)
        
        img_folder = f"auction_images/{year}_{month:02d}_{auction_title}"
        os.makedirs(img_folder, exist_ok=True)
        lots = []
        
        try:
            found_lots = driver.find_elements(By.CSS_SELECTOR, "chr-lot-tile")
            if found_lots:
                lots.extend(found_lots)
        except Exception as e:
            print(f"Ошибка при поиске лотов: {e}")
        for i, lot in enumerate(lots):
            try:
                lot_info = extract_lot_info(driver, lot, img_folder, i+1)
                if lot_info:
                    lot_info.update({
                        'auction_title': auction_title,
                        'auction_month': month,
                        'auction_year': year,
                        'auction_url': driver.current_url
                    })
                    auction_data.append(lot_info)
            except Exception as e:
                print(f"Ошибка парсинга лота {i+1}: {e}")
                continue
        if auction_data:
            save_to_csv(auction_data, f"christies_auctions_{year}_{month:02d}_{auction_title}.csv")
    except Exception as e:
        print(f"Ошибка при парсинге страницы аукциона: {e}")
    return auction_data

In [24]:
def extract_lot_info(driver, lot_element, img_folder, lot_number):
    lot_info = {}
    try:
        #название
        try:
            title_element = lot_element.find_element(By.CLASS_NAME, "chr-lot-tile__link")
            lot_info['title'] = title_element.text.strip()
            print(title_element.text.strip())
        except:
            lot_info['title'] = "Название не найдено"
        #estimate стоимость
        try:
            estimate_element = lot_element.find_element(By.CLASS_NAME, "chr-lot-tile__price-value")
            lot_info['estimate_price'] = estimate_element.text
        except:
            lot_info['estimate_price'] = "Не указана"
        #realized price
        try:
            realized_element = lot_element.find_element(By.CLASS_NAME, "chr-lot-tile__secondary-price-value")
            lot_info['realized_price'] = realized_element.text
        except:
            lot_info['realized_price'] = "Не указана"
        #изображение
        try:
            img_url = download_lot_image(driver, lot_element, img_folder, lot_number, lot_info['title'])
            lot_info['image_path'] = img_url
        except Exception as e:
            print(f"Ошибка загрузки изображения: {e}")
            lot_info['image_path'] = "Не загружено"
        #ссылка на лот
        try:
            link_element = lot_element.find_element(By.CSS_SELECTOR, "a[href]")
            lot_info['lot_url'] = link_element.get_attribute('href')
        except:
            lot_info['lot_url'] = driver.current_url
    except Exception as e:
        print(f"Ошибка извлечения информации о лоте: {e}")
        return None
    return lot_info
            

In [25]:
def download_lot_image(driver, lot_element, folder_path, lot_number, title):
    try:
        time.sleep(3)
        try:
            driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", lot_element)
            time.sleep(3)
        except:
            pass
        img_element = lot_element.find_element(By.CSS_SELECTOR, "img")
        img_url = img_element.get_attribute('srcset')
        time.sleep(3)
        
        safe_title = "".join(c for c in title[:50] if c.isalnum() or c in (' ', '-', '_')).rstrip()
        safe_title = safe_title.replace(' ', '_')
        
        filename = f"lot_{lot_number:03d}_{safe_title}.jpg"
        filepath = os.path.join(folder_path, filename)
        
        response = requests.get(img_url, timeout=10)
        time.sleep(3)
        if response.status_code == 200:
            with open(filepath, 'wb') as f:
                f.write(response.content)
            return filepath
        return "Изображение не найдено"
    except Exception as e:
        return f"Ошибка загрузки: {e}"

In [26]:
def save_to_csv(auction_data, filename):
    """
    Сохраняет данные в CSV файл
    """
    try:
        df = pd.DataFrame(auction_data)
        
        # Создаем колонки в правильном порядке
        columns_order = [
            'auction_year', 'auction_month', 'auction_title', 
            'lot_number', 'title', 'secondary_title',
            'estimate_price', 'realized_price', 'lot_url',
            'image_path', 'auction_url'
        ]
        
        # Добавляем только существующие колонки
        existing_columns = [col for col in columns_order if col in df.columns]
        df = df[existing_columns]
        
        # Сохраняем в CSV
        df.to_csv(filename, index=False, encoding='utf-8')
        print(f"Данные сохранены в {filename}")
        
    except Exception as e:
        print(f"Ошибка сохранения CSV: {e}")

def append_to_main_csv(auction_data, main_filename="christies_art_auctions.csv"):
    """
    Добавляет данные в основной CSV файл
    """
    try:
        df = pd.DataFrame(auction_data)
        
        if os.path.exists(main_filename):
            existing_df = pd.read_csv(main_filename)
            df = pd.concat([existing_df, df], ignore_index=True)
        
        df.to_csv(main_filename, index=False, encoding='utf-8')
        print(f"Данные добавлены в {main_filename}")
        
    except Exception as e:
        print(f"Ошибка добавления в основной CSV: {e}")

In [27]:
def get_christies_data(start_year=2024, end_year=2024, delay=30):
    options = webdriver.ChromeOptions()
    options.add_argument("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
    options.add_argument(r"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome")  
    options.add_argument('--profile-directory=Default')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--disable-notifications')
    options.add_argument('--disable-popup-blocking')
    driver = webdriver.Chrome(options=options)
    
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    wait = WebDriverWait(driver, 10)
    
    all_auction_data = []
    try:
        current_year = 2025
        current_month = 11
        
        for year in range(start_year, end_year - 1, -1):  
            print(f"Обрабатываем {year} год")
            
            if year == current_year:
                start_month = current_month
            else:
                start_month = 12
            
            if year == end_year:
                end_month = 1
            else:
                end_month = 1
            
            for month in range(start_month, end_month - 1, -1):
                url = f'https://www.christies.com/en/results?language=en&month={month}&year={year}&component=e7d92272-7bcc-4dba-ae5b-28e4f3729ae8'
                
                print(f"Переходим на: {url}")
                driver.get(url)
                
                time.sleep(delay)
                
                #try:
                    #rejection_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "path")))
                    #rejection_button.click()
                    #time.sleep(10)
                #except Exception as e:
                    #print("pop-up закрыт")
                    
                #убираю попап с cookies
                #try:
                    #reject_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "ot-button-order-1")))
                    #reject_button.click()
                    #time.sleep(10)
                #except Exception as e:
                    #print("pop-up закрыт")
                    
                
                
                try:
                    products = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "chr-event-tile__title")))
                    art_auctions = []
                    for product in products:
                        try:
                            title_text = product.text.lower()
                            if 'art' in title_text:
                                auction_url = product.get_attribute('href')
                                print(product.text)
                                art_auctions.append({
                                    'title': product.text,
                                    'url': auction_url})
                                
                        except Exception as e:
                            print(e)
                            continue
                    for i, art_auction in enumerate(art_auctions):
                        try:
                            auction_title = art_auction['title']
                            auction_url = art_auction['url']
                            
                            print(f"Обрабатываем аукцион {i+1}: {auction_title}")
                            driver.get(auction_url)
                            time.sleep(delay)
                            
                            auction_data = parse_art_auction_page(driver, auction_title, month, year)
                            all_auction_data.extend(auction_data)
                            
                            driver.back()
                            time.sleep(delay)
                            
                            products = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "chr-event-tile__title")))
                            
                        except Exception as e:
                            print(f"Ошибка при обработке аукциона: {e}")
                            # Пытаемся вернуться назад при ошибке
                            try:
                                driver.back()
                                time.sleep(delay)
                                products = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "chr-event-tile__title")))
                            except:
                                continue
                        
                    continue
                except Exception as e:
                    print(f"  {month:02d}/{year}: Ошибка - {e}")
                    continue
        if all_auction_data:
            append_to_main_csv(all_auction_data)
            print(f"Всего собрано данных: {len(all_auction_data)} записей")
                
    except Exception as e:
        print(f"Ошибка: {e}")
    
    finally:
        driver.quit()
        print("Завершено")

In [28]:
get_christies_data(start_year=2010, end_year=2010, delay=30)


Обрабатываем 2010 год
Переходим на: https://www.christies.com/en/results?language=en&month=12&year=2010&component=e7d92272-7bcc-4dba-ae5b-28e4f3729ae8
20th Century British Art
Art d'Asie
Christie's Interiors - including Russian Silver, Ceramics and Works of Art
Important 20th Century Decorative Art & Design Including Property from The Collection of Max Palevsky
Old Masters & 19th Century Art
500 Years: Important Decorative Arts Europe
Art Contemporain
Art Impressionniste et Moderne
Art Africain et Océanien
Important Chinese Ceramics and Works of Art
Six Chefs-d'Oeuvre d'Art Africain de la Collection Kahane
Обрабатываем аукцион 1: 20th Century British Art
Парсим аукцион: 20th Century British Art
Michael Rothenstein, R.A. (1908-1993)
Patrick Heron (1920-1999)
John Piper, C.H. (1903-1992)
Alan Reynolds (b. 1926)
Sir Peter Blake, R.A. (b. 1932)
Walter James Steggles (1902-1997)
Vanessa Bell (1879-1961)
Duncan Grant (1885-1978)
Doreen Carter, 20th Century
Stephen Bone (1904-1958)
Walter Gre

A PAIR OF RUSSIAN SILVER-GILT AND SHADED CLOISONNE ENAMEL BEAKERS
A RUSSIAN SILVER-GILT SALT IN THE FORM OF A KOVSH
FOUR CASED RUSSIAN SILVER-GILT AND NIELLO SPOONS
Завершено


KeyboardInterrupt: 