In [9]:
import requests
import pandas as pd
from tqdm import tqdm
import time
import json

url = "https://api.cian.ru/search-offers/v2/search-offers-desktop/"

COOKIES = "frontend_session_id=abc123xyz; device_id=xyz098abc; session_region_id=1"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/118.0.0.0 Safari/537.36",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "ru,en;q=0.9",
    "Content-Type": "application/json;charset=UTF-8",
    "Origin": "https://www.cian.ru",
    "Referer": "https://www.cian.ru/",
    "X-Requested-With": "XMLHttpRequest",
    "Cookie": COOKIES,
}

def get_page(page_num):
    payload = {
        "jsonQuery": {
            "_type": "commercialrent",
            "region": {"type": "terms", "value": [1]},
            "floor_types": {"type": "terms", "value": [1]},
            "commercial_type": {"type": "terms", "value": [4, 6]}
        }
    }

    r = requests.post(url, headers=headers, json=payload)
    if r.status_code == 403:
        raise Exception("403 Forbidden — проверь cookies")
    r.raise_for_status()
    return r.json()

def parse_offers(data):
    offers = []
    for offer in data.get("data", {}).get("offersSerialized", []):
        info = {
            "id": offer.get("id"),
            "title": offer.get("title"),
            "address": offer.get("geo", {}).get("address"),
            "latitude": offer.get("geo", {}).get("coordinates", {}).get("lat"),
            "longitude": offer.get("geo", {}).get("coordinates", {}).get("lng"),
            "price": offer.get("bargainTerms", {}).get("priceRur"),
            "area": offer.get("totalArea"),
            "floor": offer.get("floorNumber"),
            "floors_total": offer.get("building", {}).get("floorsCount"),
            "description": offer.get("description"),
            "link": f'https://www.cian.ru/rent/commercial/{offer.get("id")}/',
        }
        offers.append(info)
    return offers

all_offers = []

for page in tqdm(range(1, 280)):
    try:
        data = get_page(page)
        offers = parse_offers(data)
        all_offers.extend(offers)

        if page % 20 == 0:
            pd.DataFrame(all_offers).to_csv("cian_offices_temp.csv", index=False, encoding="utf-8-sig")
            print(f"Сохранено: {len(all_offers)} объявлений")

        time.sleep(1.5)
    except Exception as e:
        print(f"Ошибка на странице {page}: {e}")
        time.sleep(3)

df = pd.DataFrame(all_offers)
df.to_csv("cian_offices.csv", index=False, encoding="utf-8-sig")
print(f"Сохранено {len(df)} объявлений")

  7%|▋         | 19/258 [01:17<16:11,  4.07s/it]

Сохранено: 560 объявлений


 15%|█▌        | 39/258 [02:36<14:22,  3.94s/it]

Сохранено: 1120 объявлений


 23%|██▎       | 59/258 [03:58<13:43,  4.14s/it]

Сохранено: 1680 объявлений


 31%|███       | 79/258 [05:17<12:00,  4.03s/it]

Сохранено: 2240 объявлений


 38%|███▊      | 99/258 [06:38<10:35,  4.00s/it]

Сохранено: 2800 объявлений


 46%|████▌     | 119/258 [08:00<09:25,  4.07s/it]

Сохранено: 3360 объявлений


 54%|█████▍    | 139/258 [09:21<08:01,  4.04s/it]

Сохранено: 3920 объявлений


 62%|██████▏   | 159/258 [10:43<06:36,  4.01s/it]

Сохранено: 4480 объявлений


 69%|██████▉   | 179/258 [12:03<05:14,  3.98s/it]

Сохранено: 5040 объявлений


 77%|███████▋  | 199/258 [13:25<03:58,  4.03s/it]

Сохранено: 5600 объявлений


 85%|████████▍ | 219/258 [14:45<02:33,  3.92s/it]

Сохранено: 6160 объявлений


 93%|█████████▎| 239/258 [16:08<01:16,  4.04s/it]

Сохранено: 6720 объявлений


100%|██████████| 258/258 [17:25<00:00,  4.05s/it]


Сохранено 7224 объявлений


In [37]:
pd.set_option('display.max_colwidth', 100)   # Максимальная ширина 500 символов
pd.set_option('display.max_columns', 20)     # Максимум 20 колонок
pd.set_option('display.width', 100)         # Ширина вывода 1000 символов

In [16]:
cian = pd.read_csv('cian_offices.csv')

In [19]:
del cian['title']

In [21]:
cian = cian[cian['floor'] == 1]

In [22]:
del cian['floor']

In [38]:
cian.head(1)

Unnamed: 0,id,address,latitude,longitude,price,area,floors_total,description,link
0,306607073,"Москва, ЦАО, р-н Пресненский, м. Краснопресненская, Рочдельская улица, 15С13",55.755986,37.56512,6330850,1433.4,4,"Трехгорная мануфактура исторический квартал с развитой инфраструктурой, в котором богатое прошл...",https://www.cian.ru/rent/commercial/306607073/


In [39]:
import ast

def extract_titles(address_str):
        address_list = ast.literal_eval(address_str)
        return ', '.join([item['title'] for item in address_list if 'title' in item])

cian['address'] = cian['address'].apply(extract_titles)

SyntaxError: invalid syntax (<unknown>, line 1)

In [43]:
cian.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5898 entries, 0 to 7222
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            5898 non-null   int64  
 1   address       5898 non-null   object 
 2   latitude      5898 non-null   float64
 3   longitude     5898 non-null   float64
 4   price         5898 non-null   int64  
 5   area          5898 non-null   float64
 6   floors_total  5898 non-null   int64  
 7   description   5898 non-null   object 
 8   link          5898 non-null   object 
dtypes: float64(3), int64(3), object(3)
memory usage: 460.8+ KB
