In [1]:
# pip install undetected-chromedriver bs4 pandas
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time, random

# ================== Ayarlar ==================
BASE = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES     = 10          # daha fazlası verification riskini artırır
PAGE_WAIT     = 20          # sayfa yükleme bekleme (sn)
SLOW_DELAY    = (1.0, 2.0)  # kart okuma arası bekleme
HEADLESS      = False       # mümkünse False kalsın; headless daha kolay yakalanır
CSV_PATH      = "indeed_tr_tum_ilanlar.csv"
# =============================================

def looks_like_verification(driver):
    html = driver.page_source.lower()
    return ("additional verification required" in html or
            "cloudflare" in html or
            "captcha" in html)

def text_or_none(el, selector):
    try:
        return el.find_element(By.CSS_SELECTOR, selector).text.strip()
    except: 
        return None

def snippet_or_none(el, selector):
    try:
        txt = el.find_element(By.CSS_SELECTOR, selector).text
        return " ".join(txt.split())
    except:
        return None

# ---- Chrome başlat ----
opts = uc.ChromeOptions()
opts.add_argument("--start-maximized")
opts.add_argument("--lang=tr-TR")
opts.add_argument("--disable-blink-features=AutomationControlled")
# İstersen User-Agent de sabitle:
# opts.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
if HEADLESS:
    opts.add_argument("--headless=new")

driver = uc.Chrome(options=opts)
wait = WebDriverWait(driver, PAGE_WAIT)

all_rows, seen = [], set()

try:
    for page in range(MAX_PAGES):
        start = page * 10   # Indeed genelde 10'luk artırır
        url = BASE.format(start=start)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url)

        # Verification çıktıysa elle geç, sonra Enter'a bas
        if looks_like_verification(driver):
            print("⚠️ Verification/CAPTCHA görünüyor. Lütfen pencerede doğrula, sonra terminalde Enter'a bas.")
            input("Devam için Enter...")
        
        # Sonuçlar konteynerini bekle
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#mosaic-jobResults, .jobsearch-ResultsList")))
        except:
            # bazen geç geliyor
            time.sleep(3)

        # Kartları al
        cards = driver.find_elements(By.CSS_SELECTOR, "a.tapItem")  # Indeed'in yeni kartları anchor
        print(f"Bulunan kart sayısı: {len(cards)}")
        if not cards:
            # hiç kart yoksa muhtemelen blok/verification ya da bitti
            if looks_like_verification(driver):
                print("Tekrar verification geldi. Durduruyorum.")
                break

        for i, card in enumerate(cards, start=1):
            try:
                href = card.get_attribute("href")
                if not href or href in seen: 
                    continue

                title    = text_or_none(card, "h2.jobTitle span")
                company  = text_or_none(card, ".companyName")
                location = text_or_none(card, ".companyLocation")
                summary  = snippet_or_none(card, ".job-snippet")

                # bazı başlıklar 'new' gibi badge span'ları içerir; BeautifulSoup ile temizlik:
                if not title:
                    # fallback: soup üzerinden
                    soup = BeautifulSoup(card.get_attribute("outerHTML"), "html.parser")
                    h2 = soup.select_one("h2.jobTitle")
                    if h2: title = " ".join(h2.get_text(" ", strip=True).split())

                all_rows.append({
                    "Title": title,
                    "Company": company,
                    "Location": location,
                    "Summary": summary,
                    "Link": href,
                    "Page": page+1
                })
                seen.add(href)

                time.sleep(random.uniform(*SLOW_DELAY))
            except Exception as e:
                # tek kartta hata olsa da devam edelim
                # print("Kart hatası:", e)
                continue

        # sayfalar arası nazik bekleme
        time.sleep(random.uniform(1.0, 2.0))

    df = pd.DataFrame(all_rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(12))
    df.to_csv(CSV_PATH, index=False)
    print(f"CSV kaydedildi: {CSV_PATH}")

finally:
    driver.quit()



>>> Sayfa 1: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
⚠️ Verification/CAPTCHA görünüyor. Lütfen pencerede doğrula, sonra terminalde Enter'a bas.


Devam için Enter... 


Bulunan kart sayısı: 0
Tekrar verification geldi. Durduruyorum.

Toplam 0 ilan toplandı.


CSV kaydedildi: indeed_tr_tum_ilanlar.csv


In [5]:
# pip install undetected-chromedriver bs4 pandas
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time, random, os

# ====== Ayarlar ======
BASE = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES   = 5
PAGE_WAIT   = 25
SLOW_DELAY  = (1.2, 2.4)
HEADLESS    = False
CSV_PATH    = "indeed_tr_tum_ilanlar.csv"

# Kendi Chrome profilin (çerezler kalır): YOLU KENDİNE GÖRE DÜZENLE!
USER_DATA_DIR = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default")  # Mac
# Windows örnek: r"C:\Users\Kullanici\AppData\Local\Google\Chrome\User Data\Default"
# Linux örnek: "~/.config/google-chrome/Default"
# ==============

def looks_like_verification(driver):
    h = driver.page_source.lower()
    return ("verification" in h) or ("cloudflare" in h) or ("captcha" in h)

def text_or_none(el, css):
    try:
        return el.find_element(By.CSS_SELECTOR, css).text.strip()
    except:
        return None

def humanize(driver):
    # ufak rastgele hareketler/scroll (çok abartma)
    for _ in range(random.randint(2,4)):
        driver.execute_script(f"window.scrollBy(0, {random.randint(300,900)});")
        time.sleep(random.uniform(0.5, 1.2))

# ---- Chrome başlat ----
opts = uc.ChromeOptions()
opts.add_argument("--disable-blink-features=AutomationControlled")
opts.add_argument("--lang=tr-TR")
opts.add_argument(f"--user-data-dir={os.path.dirname(USER_DATA_DIR)}")
opts.add_argument(f"--profile-directory={os.path.basename(USER_DATA_DIR)}")
# random pencere boyutu
opts.add_argument(f"--window-size={random.randint(1100,1400)},{random.randint(720,900)}")
if HEADLESS:
    opts.add_argument("--headless=new")

driver = uc.Chrome(options=opts)
wait = WebDriverWait(driver, PAGE_WAIT)

all_rows, seen = [], set()

try:
    # 1) Önce ana sayfaya git, gerekiyorsa manüel doğrula
    driver.get("https://tr.indeed.com/")
    time.sleep(2)
    humanize(driver)
    if looks_like_verification(driver):
        print("⚠️ İlk doğrulama görünüyor. Pencerede doğrula, sonra konsolda Enter'a bas.")
        input("Devam etmek için Enter...")

    for page in range(MAX_PAGES):
        start = page * 10
        url = BASE.format(start=start)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url)
        humanize(driver)

        if looks_like_verification(driver):
            print("⚠️ Doğrulama çıktı. Pencerede tamamla, sonra Enter.")
            input("Devam için Enter...")

        # sonuç listesi container'ı
        try:
            wait.until(EC.presence_of_element_located((
                By.CSS_SELECTOR, "#mosaic-jobResults, .jobsearch-ResultsList"
            )))
        except:
            time.sleep(3)

        # kartlar: yeni yapı anchor 'a.tapItem', bazen 'td.resultContent' içinde
        cards = driver.find_elements(By.CSS_SELECTOR, "a.tapItem")
        if not cards:
            # bir kez yenile
            driver.refresh()
            humanize(driver)
            cards = driver.find_elements(By.CSS_SELECTOR, "a.tapItem")

        print("Bulunan kart:", len(cards))
        if not cards:
            if looks_like_verification(driver):
                print("Tekrar verification geldi, durduruyorum.")
                break
            else:
                print("Kart bulunamadı, sayfa atlanıyor.")
                continue

        for c in cards:
            try:
                href = c.get_attribute("href")
                if not href or href in seen:
                    continue

                # başlık/company/location/snippet
                title = text_or_none(c, "h2.jobTitle") or text_or_none(c, "h2.jobTitle span")
                company = text_or_none(c, ".companyName")
                location = text_or_none(c, ".companyLocation")
                summary = text_or_none(c, ".job-snippet")

                # fallback temizlik
                if not title:
                    soup = BeautifulSoup(c.get_attribute("outerHTML"), "html.parser")
                    h2 = soup.select_one("h2.jobTitle")
                    if h2: title = " ".join(h2.get_text(" ", strip=True).split())

                all_rows.append({
                    "Title": title,
                    "Company": company,
                    "Location": location,
                    "Summary": summary,
                    "Link": href,
                    "Page": page+1
                })
                seen.add(href)

                time.sleep(random.uniform(*SLOW_DELAY))
            except Exception:
                continue

        time.sleep(random.uniform(1.0, 2.0))

    df = pd.DataFrame(all_rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(12))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)

finally:
    driver.quit()


SessionNotCreatedException: Message: session not created: cannot connect to chrome at 127.0.0.1:53674
from chrome not reachable; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
Stacktrace:
0   undetected_chromedriver             0x0000000104e38e98 undetected_chromedriver + 5918360
1   undetected_chromedriver             0x0000000104e3042a undetected_chromedriver + 5882922
2   undetected_chromedriver             0x00000001048fcc63 undetected_chromedriver + 429155
3   undetected_chromedriver             0x00000001048e8832 undetected_chromedriver + 346162
4   undetected_chromedriver             0x000000010493cf29 undetected_chromedriver + 692009
5   undetected_chromedriver             0x000000010493240d undetected_chromedriver + 648205
6   undetected_chromedriver             0x0000000104983307 undetected_chromedriver + 979719
7   undetected_chromedriver             0x0000000104982996 undetected_chromedriver + 977302
8   undetected_chromedriver             0x0000000104974ce3 undetected_chromedriver + 920803
9   undetected_chromedriver             0x000000010494129b undetected_chromedriver + 709275
10  undetected_chromedriver             0x0000000104941f81 undetected_chromedriver + 712577
11  undetected_chromedriver             0x0000000104df5ba0 undetected_chromedriver + 5643168
12  undetected_chromedriver             0x0000000104df9a54 undetected_chromedriver + 5659220
13  undetected_chromedriver             0x0000000104dd1412 undetected_chromedriver + 5493778
14  undetected_chromedriver             0x0000000104dfa4ff undetected_chromedriver + 5661951
15  undetected_chromedriver             0x0000000104dc03b4 undetected_chromedriver + 5424052
16  undetected_chromedriver             0x0000000104e1d718 undetected_chromedriver + 5805848
17  undetected_chromedriver             0x0000000104e1d8e0 undetected_chromedriver + 5806304
18  undetected_chromedriver             0x0000000104e30001 undetected_chromedriver + 5881857
19  libsystem_pthread.dylib             0x00007ff811540df1 _pthread_start + 99
20  libsystem_pthread.dylib             0x00007ff81153c857 thread_start + 15


In [7]:
pip install -U undetected-chromedriver selenium bs4 pandas

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting pandas
  Downloading pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (91 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Downloading pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl (10.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: pandas, bs4
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aext-panels 4.1.0 requires anaconda-cloud-auth>=0.7.1, which is not installed.
aext-panels-server 4.1.0 requires anaconda-cloud-auth>=0.7.1, which is not installed.[0m[31m
[0mSuccessf

In [3]:
# === INDEED (TR) SCRAPER — UC + AYRI PROFIL + PAGINATION ===
import os, time, random
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd

# ------------------ AYARLAR ------------------
BASE          = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES     = 8          # yavaş yavaş artır (CAPTCHA riskini unutma)
PAGE_WAIT     = 25         # sayfa yükleme bekleme süresi (sn)
SLOW_DELAY    = (1.2, 2.2) # kartlar arası bekleme
HEADLESS      = False      # mümkünse False kalsın
CSV_PATH      = "indeed_tr_tum_ilanlar.csv"

# Ayrı profil klasörü (Default yerine) — yolu kendine göre değiştirmen gerekmiyor; bu klasörü biz oluşturuyoruz
PROFILE_DIR   = os.path.expanduser("~/uc_profiles/indeed_profile")
os.makedirs(PROFILE_DIR, exist_ok=True)
# ----------------------------------------------


def looks_like_verification(driver):
    html = driver.page_source.lower()
    return ("verification" in html) or ("cloudflare" in html) or ("captcha" in html)


def humanize(driver, steps=(2,4)):
    """Ufak kaydırmalar yaparak daha insansı davran."""
    for _ in range(random.randint(*steps)):
        driver.execute_script(f"window.scrollBy(0, {random.randint(350,900)});")
        time.sleep(random.uniform(0.5, 1.1))


def text_or_none(el, css):
    try:
        return el.find_element(By.CSS_SELECTOR, css).text.strip()
    except:
        return None


def start_driver(headless=False):
    opts = uc.ChromeOptions()
    opts.add_argument("--lang=tr-TR")
    opts.add_argument("--disable-blink-features=AutomationControlled")
    # Ayrı kullanıcı verisi klasörü (Default profille çakışmasın)
    opts.add_argument(f"--user-data-dir={PROFILE_DIR}")
    # Hafif “insansı” pencere
    opts.add_argument(f"--window-size={random.randint(1100,1400)},{random.randint(760,920)}")
    # İstersen sabit UA:
    # opts.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    if headless:
        opts.add_argument("--headless=new")
    # Chrome major sürümünde uyumsuzluk yaşarsan:
    # return uc.Chrome(options=opts, version_main=120)
    return uc.Chrome(options=opts)


def safe_quit(driver):
    try:
        time.sleep(1.3)
        driver.quit()
    except Exception:
        pass


# ------------------ ANA AKIŞ ------------------
driver = start_driver(headless=HEADLESS)
wait = WebDriverWait(driver, PAGE_WAIT)
all_rows, seen = [], set()

try:
    # İlk giriş: bazen direkt verification gelebilir
    driver.get("https://tr.indeed.com/")
    time.sleep(2)
    humanize(driver)
    if looks_like_verification(driver):
        print("⚠️ İlk doğrulama görünüyor. Pencerede doğrula (CAPTCHA vs.), sonra burada Enter'a bas.")
        input("Devam için Enter...")

    for page in range(MAX_PAGES):
        start = page * 10  # Indeed genelde 10'luk artıyor
        url = BASE.format(start=start)
        print(f"\n>>> Sayfa {page+1}/{MAX_PAGES}: {url}")
        driver.get(url)
        humanize(driver)

        if looks_like_verification(driver):
            print("⚠️ Doğrulama çıktı. Pencerede tamamla, sonra Enter'a bas.")
            input("Devam için Enter...")

        # Sonuç listesi container'ını bekle
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#mosaic-jobResults, .jobsearch-ResultsList")))
        except:
            time.sleep(3)

        # Kartlar (Indeed yeni yapıda anchor 'a.tapItem')
        cards = driver.find_elements(By.CSS_SELECTOR, "a.tapItem")
        if not cards:
            # bir kez refresh dene
            driver.refresh()
            humanize(driver)
            cards = driver.find_elements(By.CSS_SELECTOR, "a.tapItem")

        print(f"Bulunan kart: {len(cards)}")
        if not cards:
            if looks_like_verification(driver):
                print("Tekrar verification geldi, döngüyü bitiriyorum.")
                break
            else:
                print("Kart bulunamadı, sayfa atlandı.")
                continue

        for i, c in enumerate(cards, start=1):
            try:
                href = c.get_attribute("href")
                if not href or href in seen:
                    continue

                title    = text_or_none(c, "h2.jobTitle span") or text_or_none(c, "h2.jobTitle")
                company  = text_or_none(c, ".companyName")
                location = text_or_none(c, ".companyLocation")
                summary  = text_or_none(c, ".job-snippet")

                # fallback: soup ile başlık temizliği
                if not title:
                    soup = BeautifulSoup(c.get_attribute("outerHTML"), "html.parser")
                    h2 = soup.select_one("h2.jobTitle")
                    if h2:
                        title = " ".join(h2.get_text(" ", strip=True).split())

                all_rows.append({
                    "Title": title,
                    "Company": company,
                    "Location": location,
                    "Summary": summary,
                    "Link": href,
                    "Page": page + 1
                })
                seen.add(href)

                time.sleep(random.uniform(*SLOW_DELAY))
            except Exception:
                # tek kart patlarsa geç
                continue

        # sayfalar arası nazik bekleme
        time.sleep(random.uniform(1.0, 2.0))

    df = pd.DataFrame(all_rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(12))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)

finally:
    safe_quit(driver)


⚠️ İlk doğrulama görünüyor. Pencerede doğrula (CAPTCHA vs.), sonra burada Enter'a bas.


Devam için Enter... 



>>> Sayfa 1/8: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
⚠️ Doğrulama çıktı. Pencerede tamamla, sonra Enter'a bas.


Devam için Enter... 


Bulunan kart: 0
Tekrar verification geldi, döngüyü bitiriyorum.

Toplam 0 ilan toplandı.


CSV kaydedildi: indeed_tr_tum_ilanlar.csv


In [5]:
# --- Indeed TR Scraper (Lazy Load fix + Soup fallback) ---
import time, random, os
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES = 5
SLOW_DELAY = (1.0, 1.8)
CSV_PATH = "indeed_tr_tum_ilanlar.csv"

PROFILE_DIR = os.path.expanduser("~/uc_profiles/indeed_profile")
os.makedirs(PROFILE_DIR, exist_ok=True)

def start_driver():
    opts = uc.ChromeOptions()
    opts.add_argument("--lang=tr-TR")
    opts.add_argument(f"--user-data-dir={PROFILE_DIR}")
    opts.add_argument("--disable-blink-features=AutomationControlled")
    opts.add_argument("--window-size=1280,900")
    return uc.Chrome(options=opts)

def scroll_all(driver, pause=1):
    """Sayfanın sonuna kadar kaydırıp tüm kartları yükletir."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

driver = start_driver()
all_rows, seen = [], set()

try:
    for page in range(MAX_PAGES):
        start = page * 10
        url = BASE_URL.format(start=start)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url)
        time.sleep(2)

        # tüm ilanları yüklet
        scroll_all(driver)

        soup = BeautifulSoup(driver.page_source, "html.parser")
        cards = soup.select("a.tapItem")
        print(f"Bulunan kart sayısı: {len(cards)}")

        for c in cards:
            href = c.get("href")
            if href and not href.startswith("http"):
                href = "https://tr.indeed.com" + href
            if not href or href in seen:
                continue

            title = c.select_one("h2.jobTitle span")
            company = c.select_one(".companyName")
            location = c.select_one(".companyLocation")
            summary = c.select_one(".job-snippet")

            all_rows.append({
                "Title": title.get_text(strip=True) if title else None,
                "Company": company.get_text(strip=True) if company else None,
                "Location": location.get_text(strip=True) if location else None,
                "Summary": summary.get_text(" ", strip=True) if summary else None,
                "Link": href,
                "Page": page + 1
            })
            seen.add(href)
            time.sleep(random.uniform(*SLOW_DELAY))

    df = pd.DataFrame(all_rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(10))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)

finally:
    driver.quit()



>>> Sayfa 1: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
Bulunan kart sayısı: 0

>>> Sayfa 2: https://tr.indeed.com/jobs?q=&l=Türkiye&start=10
Bulunan kart sayısı: 0

>>> Sayfa 3: https://tr.indeed.com/jobs?q=&l=Türkiye&start=20
Bulunan kart sayısı: 0

>>> Sayfa 4: https://tr.indeed.com/jobs?q=&l=Türkiye&start=30
Bulunan kart sayısı: 0

>>> Sayfa 5: https://tr.indeed.com/jobs?q=&l=Türkiye&start=40
Bulunan kart sayısı: 0

Toplam 0 ilan toplandı.


CSV kaydedildi: indeed_tr_tum_ilanlar.csv


In [7]:
# --- Indeed TR: overlay kapatma + scroll + soup ---
import os, time, random
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL   = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES  = 5
CSV_PATH   = "indeed_tr_tum_ilanlar.csv"

PROFILE_DIR = os.path.expanduser("~/uc_profiles/indeed_profile")
os.makedirs(PROFILE_DIR, exist_ok=True)

def start_driver():
    opts = uc.ChromeOptions()
    opts.add_argument("--lang=tr-TR")
    opts.add_argument(f"--user-data-dir={PROFILE_DIR}")
    opts.add_argument("--disable-blink-features=AutomationControlled")
    opts.add_argument("--window-size=1280,900")
    return uc.Chrome(options=opts)

def dismiss_overlays(driver):
    """Çerez bannerı ve sign-in modalı kapat."""
    time.sleep(1.0)
    # 1) Çerez banner
    for sel in [
        "#onetrust-accept-btn-handler",
        "button#onetrust-accept-btn-handler",
        "button[aria-label*='kabul' i]",
        "button:contains('Kabul')"
    ]:
        try:
            el = driver.find_element(By.CSS_SELECTOR, sel)
            driver.execute_script("arguments[0].click();", el)
            time.sleep(0.6)
            break
        except: pass

    # 2) Sign-in / modal kapatma
    for sel in [
        "button[aria-label='Kapat']",
        "button[aria-label='Close']",
        ".icl-CloseButton",                          # eski close
        ".mosaic-modal [data-testid='modal-x']",
        ".mosaic-modal button[aria-label*='close' i]",
        ".mosaic-modal button[aria-label*='kapat' i]"
    ]:
        try:
            btns = driver.find_elements(By.CSS_SELECTOR, sel)
            if btns:
                driver.execute_script("arguments[0].click();", btns[0])
                time.sleep(0.6)
                break
        except: pass

    # 3) ESC gönder
    try:
        driver.switch_to.active_element.send_keys(Keys.ESCAPE)
        time.sleep(0.4)
    except: pass

    # 4) Son çare: role=dialog’ları kaldır
    try:
        driver.execute_script("""
            document.querySelectorAll('[role="dialog"], .mosaic-modal').forEach(x=>x.remove());
            document.querySelectorAll('html, body').forEach(el=>{el.style.overflow='auto'});
        """)
        time.sleep(0.3)
    except: pass

def scroll_all(driver, pause=0.9, max_steps=20):
    last = 0
    for _ in range(max_steps):
        driver.execute_script("window.scrollBy(0, 1200);")
        time.sleep(pause)
        now = driver.execute_script("return document.documentElement.scrollTop || document.body.scrollTop;")
        if abs(now - last) < 50:
            break
        last = now

driver = start_driver()
rows, seen = [], set()

try:
    # ana sayfada da overlay gelebilir
    driver.get("https://tr.indeed.com/")
    dismiss_overlays(driver)

    for page in range(MAX_PAGES):
        url = BASE_URL.format(start=page*10)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url)
        time.sleep(1.5)
        dismiss_overlays(driver)          # kritik: önce kapat
        scroll_all(driver, pause=0.8, max_steps=25)

        soup = BeautifulSoup(driver.page_source, "html.parser")
        cards = soup.select("a.tapItem")
        print("Bulunan kart:", len(cards))

        for c in cards:
            href = c.get("href")
            if href and not href.startswith("http"):
                href = "https://tr.indeed.com" + href
            if not href or href in seen:
                continue

            title    = c.select_one("h2.jobTitle span") or c.select_one("h2.jobTitle")
            company  = c.select_one(".companyName")
            location = c.select_one(".companyLocation")
            summary  = c.select_one(".job-snippet")

            rows.append({
                "Title": title.get_text(" ", strip=True) if title else None,
                "Company": company.get_text(" ", strip=True) if company else None,
                "Location": location.get_text(" ", strip=True) if location else None,
                "Summary": summary.get_text(" ", strip=True) if summary else None,
                "Link": href,
                "Page": page+1
            })
            seen.add(href)
            time.sleep(random.uniform(0.8, 1.6))

    df = pd.DataFrame(rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(12))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)
finally:
    try:
        driver.quit()
    except: pass



>>> Sayfa 1: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
Bulunan kart: 0

>>> Sayfa 2: https://tr.indeed.com/jobs?q=&l=Türkiye&start=10
Bulunan kart: 0

>>> Sayfa 3: https://tr.indeed.com/jobs?q=&l=Türkiye&start=20
Bulunan kart: 0

>>> Sayfa 4: https://tr.indeed.com/jobs?q=&l=Türkiye&start=30
Bulunan kart: 0

>>> Sayfa 5: https://tr.indeed.com/jobs?q=&l=Türkiye&start=40
Bulunan kart: 0

Toplam 0 ilan toplandı.


CSV kaydedildi: indeed_tr_tum_ilanlar.csv


In [9]:
# Indeed TR — overlay kapat + scroll + çoklu seçici + debug dump
import os, time, random
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL   = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES  = 5
CSV_PATH   = "indeed_tr_tum_ilanlar.csv"

PROFILE_DIR = os.path.expanduser("~/uc_profiles/indeed_profile")
os.makedirs(PROFILE_DIR, exist_ok=True)

def start_driver():
    opts = uc.ChromeOptions()
    opts.add_argument("--lang=tr-TR")
    opts.add_argument("--disable-blink-features=AutomationControlled")
    opts.add_argument(f"--user-data-dir={PROFILE_DIR}")
    opts.add_argument("--window-size=1280,900")
    return uc.Chrome(options=opts)

def dismiss_overlays(driver):
    time.sleep(1.0)
    # çerez
    for sel in [
        "#onetrust-accept-btn-handler",
        "button#onetrust-accept-btn-handler",
        "button[aria-label*='kabul' i]",
    ]:
        try:
            el = driver.find_element(By.CSS_SELECTOR, sel)
            driver.execute_script("arguments[0].click();", el)
            time.sleep(0.4)
            break
        except: pass
    # modallar
    for sel in [
        "button[aria-label='Kapat']",
        "button[aria-label='Close']",
        ".icl-CloseButton",
        ".mosaic-modal [data-testid='modal-x']",
        ".mosaic-modal button[aria-label*='close' i]",
        ".mosaic-modal button[aria-label*='kapat' i]",
    ]:
        try:
            btns = driver.find_elements(By.CSS_SELECTOR, sel)
            if btns:
                driver.execute_script("arguments[0].click();", btns[0])
                time.sleep(0.4)
                break
        except: pass
    # ESC
    try:
        driver.switch_to.active_element.send_keys(Keys.ESCAPE)
        time.sleep(0.2)
    except: pass
    # dialogları zorla kaldır
    try:
        driver.execute_script("""
            document.querySelectorAll('[role="dialog"], .mosaic-modal').forEach(x=>x.remove());
            document.querySelectorAll('html, body').forEach(el=>{el.style.overflow='auto'});
        """)
    except: pass

def scroll_all(driver, pause=0.8, max_steps=30):
    last = 0
    for _ in range(max_steps):
        driver.execute_script("window.scrollBy(0, 1400);")
        time.sleep(pause)
        now = driver.execute_script("return document.documentElement.scrollTop || document.body.scrollTop;")
        if abs(now - last) < 50:
            break
        last = now

def pick_cards_from_html(html, page_idx):
    soup = BeautifulSoup(html, "html.parser")
    selector_candidates = [
        "a.tapItem",                 # yeni yapı (çoğu sayfa)
        "h2.jobTitle a",             # alternatif
        "a.jcs-JobTitle",            # bazı sayfalarda bu
        ".resultContent a.jcs-JobTitle",
    ]
    best = []
    counts = []
    for sel in selector_candidates:
        found = soup.select(sel)
        counts.append((sel, len(found)))
        if len(found) > len(best):
            best = found
    print("Seçici sayıları:", ", ".join(f"{s}:{n}" for s,n in counts))

    # Hiç yoksa debug için html dump et
    if not best:
        dump_path = f"indeed_debug_p{page_idx}.html"
        with open(dump_path, "w", encoding="utf-8") as f:
            f.write(html)
        print(f"⚠️ Kart bulunamadı. HTML dump: {dump_path}")
    return best

driver = start_driver()
rows, seen = [], set()

try:
    driver.get("https://tr.indeed.com/")
    dismiss_overlays(driver)

    for page in range(MAX_PAGES):
        url = BASE_URL.format(start=page*10)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url)
        time.sleep(1.5)
        dismiss_overlays(driver)
        scroll_all(driver, pause=0.8, max_steps=30)

        html = driver.page_source
        cards = pick_cards_from_html(html, page+1)
        print("Bulunan kart:", len(cards))

        for c in cards:
            href = c.get("href")
            if href and not href.startswith("http"):
                href = "https://tr.indeed.com" + href
            if not href or href in seen:
                continue

            title    = c.select_one("h2.jobTitle span") or c.select_one("h2.jobTitle") or c
            company  = c.select_one(".companyName")
            location = c.select_one(".companyLocation")
            summary  = c.select_one(".job-snippet")

            rows.append({
                "Title": title.get_text(" ", strip=True) if title else None,
                "Company": company.get_text(" ", strip=True) if company else None,
                "Location": location.get_text(" ", strip=True) if location else None,
                "Summary": summary.get_text(" ", strip=True) if summary else None,
                "Link": href,
                "Page": page+1
            })
            seen.add(href)
            time.sleep(random.uniform(0.9, 1.6))

    df = pd.DataFrame(rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(12))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)
finally:
    try: driver.quit()
    except: pass



>>> Sayfa 1: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 2: https://tr.indeed.com/jobs?q=&l=Türkiye&start=10
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 3: https://tr.indeed.com/jobs?q=&l=Türkiye&start=20
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 4: https://tr.indeed.com/jobs?q=&l=Türkiye&start=30
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 5: https://tr.indeed.com/jobs?q=&l=Türkiye&start=40
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

Toplam 75 ilan toplandı.


Unnamed: 0,Title,Company,Location,Summary,Link,Page
0,Customer Support Travel (m/w/d) 300€ Startprämie!,,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
1,Call Center Agent deutsch (m/w/d) 300€ Startpr...,,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
2,"Managing Consultant, Technical Program Managem...",,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
3,Telefonischer Kundenberater (m/w/d) Outbound B...,,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
4,Üretim Yöneticisi (İnegöl),,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
5,Telefonischer Kundenberater (m/w/d) Inbound Sales,,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
6,Satış Temsilcisi (Tire/İzmir),,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
7,"Consultant, Advisors & Consulting Services, St...",,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
8,Language Data Quality Reviewer for Uzbek (Free...,,,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
9,Vasıflı - Vasıfsız Üretim Personeli,,,,https://tr.indeed.com/rc/clk?jk=e4ed7769ec465a...,1


CSV kaydedildi: indeed_tr_tum_ilanlar.csv


In [13]:
# -*- coding: utf-8 -*-
# Indeed (TR) — UC + overlay kapatma + scroll + çoklu seçici + Company/Location doldurma

import os, time, random
import pandas as pd
from bs4 import BeautifulSoup

import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# ----------------- AYARLAR -----------------
BASE_URL   = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES  = 5                     # yavaş artır (CAPTCHA riskine dikkat)
CSV_PATH   = "indeed_tr_tum_ilanlar.csv"
PROFILE_DIR = os.path.expanduser("~/uc_profiles/indeed_profile")
os.makedirs(PROFILE_DIR, exist_ok=True)
# -------------------------------------------

def start_driver():
    opts = uc.ChromeOptions()
    opts.add_argument("--lang=tr-TR")
    opts.add_argument("--disable-blink-features=AutomationControlled")
    opts.add_argument(f"--user-data-dir={PROFILE_DIR}")
    opts.add_argument("--window-size=1280,900")
    return uc.Chrome(options=opts)

def dismiss_overlays(driver):
    """Çerez ve sign-in modallarını kapatmaya çalış."""
    time.sleep(1.0)
    # çerez
    for sel in [
        "#onetrust-accept-btn-handler",
        "button#onetrust-accept-btn-handler",
        "button[aria-label*='kabul' i]",
        "button:contains('Kabul')",
    ]:
        try:
            el = driver.find_element(By.CSS_SELECTOR, sel)
            driver.execute_script("arguments[0].click();", el)
            time.sleep(0.4)
            break
        except: pass
    # modal kapat
    for sel in [
        "button[aria-label='Kapat']",
        "button[aria-label='Close']",
        ".icl-CloseButton",
        ".mosaic-modal [data-testid='modal-x']",
        ".mosaic-modal button[aria-label*='close' i]",
        ".mosaic-modal button[aria-label*='kapat' i]",
    ]:
        try:
            btns = driver.find_elements(By.CSS_SELECTOR, sel)
            if btns:
                driver.execute_script("arguments[0].click();", btns[0])
                time.sleep(0.4)
                break
        except: pass
    # ESC
    try:
        driver.switch_to.active_element.send_keys(Keys.ESCAPE)
        time.sleep(0.2)
    except: pass
    # role=dialog’ları son çare kaldır
    try:
        driver.execute_script("""
            document.querySelectorAll('[role="dialog"], .mosaic-modal').forEach(x=>x.remove());
            document.querySelectorAll('html, body').forEach(el=>{el.style.overflow='auto'});
        """)
    except: pass

def scroll_all(driver, pause=0.8, max_steps=30):
    """Sayfayı aşağı kaydırarak lazy-load içeriği getir."""
    last = 0
    for _ in range(max_steps):
        driver.execute_script("window.scrollBy(0, 1400);")
        time.sleep(pause)
        now = driver.execute_script("return document.documentElement.scrollTop || document.body.scrollTop;")
        if abs(now - last) < 50:
            break
        last = now

def pick_cards_from_html(html, page_idx):
    """Farklı şablonlar için alternatif kart seçicilerini dene."""
    soup = BeautifulSoup(html, "html.parser")
    selector_candidates = [
        "a.tapItem",                 # yaygın yeni şablon
        "h2.jobTitle a",             # alternatif
        "a.jcs-JobTitle",            # diğer şablon
        ".resultContent a.jcs-JobTitle",
    ]
    best = []
    counts = []
    for sel in selector_candidates:
        found = soup.select(sel)
        counts.append((sel, len(found)))
        if len(found) > len(best):
            best = found
    print("Seçici sayıları:", ", ".join(f"{s}:{n}" for s,n in counts))
    if not best:
        # debug için dump bırak
        dump_path = f"indeed_debug_p{page_idx}.html"
        with open(dump_path, "w", encoding="utf-8") as f:
            f.write(html)
        print(f"⚠️ Kart bulunamadı. HTML dump: {dump_path}")
    return best

def get_text(el):
    return " ".join(el.get_text(" ", strip=True).split()) if el else None

def extract_company_location_summary(anchor_tag):
    """
    Company/Location/Summary bazen anchor'ın kardeş/ebeveyn bloklarında.
    Bu yüzden yukarı doğru 5 seviye kadar tırmanıp çoklu seçici deneriz.
    """
    company = location = summary = None

    # 1) Önce anchor içinde bir şey var mı?
    company  = anchor_tag.select_one(".companyName, [data-testid='company-name'], .company, .companyTitle")
    location = anchor_tag.select_one(".companyLocation, [data-testid='text-location'], .company_location, .company_location span")
    summary  = anchor_tag.select_one(".job-snippet, .job-snippet-list")

    # 2) Ebeveynlere tırman ve ara
    parent = anchor_tag.parent
    hops = 0
    while hops < 5 and not (company and location and summary):
        if parent:
            if not company:
                company = parent.select_one(".companyName, [data-testid='company-name'], .company, .companyTitle, div.company_location > span")
            if not location:
                location = parent.select_one(".companyLocation, [data-testid='text-location'], .company_location, .company_location span")
            if not summary:
                summary = parent.select_one(".job-snippet, .job-snippet-list")
            parent = parent.parent
            hops += 1
        else:
            break

    # 3) Özel kapsayıcılar: resultContent / slider_container / jobCard_mainContent
    if not (company or location or summary):
        rc = anchor_tag.find_parent(lambda t: t and t.has_attr("class") and any(
            cls in t.get("class", [])
            for cls in ["resultContent", "slider_container", "jobCard_mainContent"]
        ))
        if rc:
            company  = rc.select_one(".companyName, [data-testid='company-name'], .company, .companyTitle")
            location = rc.select_one(".companyLocation, [data-testid='text-location'], .company_location, .company_location span")
            summary  = rc.select_one(".job-snippet, .job-snippet-list")

    return get_text(company), get_text(location), get_text(summary)

# --------------- ÇALIŞTIR ----------------
driver = start_driver()
rows, seen = [], set()

try:
    # ana sayfada da overlay çıkabiliyor
    driver.get("https://tr.indeed.com/")
    dismiss_overlays(driver)

    for page in range(MAX_PAGES):
        url = BASE_URL.format(start=page*10)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url)
        time.sleep(1.3)
        dismiss_overlays(driver)
        scroll_all(driver, pause=0.8, max_steps=30)

        html = driver.page_source
        cards = pick_cards_from_html(html, page+1)
        print("Bulunan kart:", len(cards))

        for c in cards:
            # ---- Link ----
            href = c.get("href")
            if href and not href.startswith("http"):
                href = "https://tr.indeed.com" + href
            if not href or href in seen:
                continue

            # ---- Title ----
            title_el = c.select_one("h2.jobTitle span") or c.select_one("h2.jobTitle") or c
            title = get_text(title_el)

            # ---- Company / Location / Summary ----
            company_txt, location_txt, summary_txt = extract_company_location_summary(c)

            rows.append({
                "Title": title,
                "Company": company_txt,
                "Location": location_txt,
                "Summary": summary_txt,
                "Link": href,
                "Page": page+1
            })
            seen.add(href)
            time.sleep(random.uniform(0.8, 1.5))

    df = pd.DataFrame(rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(20))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)

finally:
    try: driver.quit()
    except: pass



>>> Sayfa 1: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 2: https://tr.indeed.com/jobs?q=&l=Türkiye&start=10
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 3: https://tr.indeed.com/jobs?q=&l=Türkiye&start=20
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 4: https://tr.indeed.com/jobs?q=&l=Türkiye&start=30
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 5: https://tr.indeed.com/jobs?q=&l=Türkiye&start=40
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

Toplam 75 ilan toplandı.


Unnamed: 0,Title,Company,Location,Summary,Link,Page
0,Customer Support Travel (m/w/d) 300€ Startprämie!,Avedo - Eine Marke der Ströer X GmbH,Avedo - Eine Marke der Ströer X GmbH Antalya,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
1,Call Center Agent deutsch (m/w/d) 300€ Startpr...,Avedo - Eine Marke der Ströer X GmbH,Avedo - Eine Marke der Ströer X GmbH Antalya,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
2,"Managing Consultant, Technical Program Managem...",Mastercard,Mastercard Beşiktaş,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
3,Language Data Quality Reviewer for Uzbek (Free...,Volga Partners,Volga Partners Türkiye,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
4,Üretim Yöneticisi (İnegöl),Mondi,"Mondi 3,3 İnegöl",,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
5,Telefonischer Kundenberater (m/w/d) Inbound Sales,KiKxxl GmbH,KiKxxl GmbH İstanbul,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
6,Telefonischer Kundenberater (m/w/d) Outbound B...,KiKxxl GmbH,KiKxxl GmbH İstanbul,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
7,Satış Temsilcisi (Tire/İzmir),Mondi,"Mondi 3,3 Tire",,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
8,Metin Yazarı Arıyoruz,OneWord Dijital Reklam ve Video Prodüksiyon...,OneWord Dijital Reklam ve Video Prodüksiyon......,,https://tr.indeed.com/rc/clk?jk=b996c4311a9c3a...,1
9,"Consultant, Advisors & Consulting Services, St...",Mastercard,Mastercard Beşiktaş,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1


CSV kaydedildi: indeed_tr_tum_ilanlar.csv


In [1]:
# Indeed (TR) — Summary doldurma: çoklu seçici + detay sayfası fallback

import os, time, random
import pandas as pd
from bs4 import BeautifulSoup

import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# ---------- AYARLAR ----------
BASE_URL    = "https://tr.indeed.com/jobs?q=&l=Türkiye&start={start}"
MAX_PAGES   = 5
CSV_PATH    = "indeed_tr_tum_ilanlar.csv"
PROFILE_DIR = os.path.expanduser("~/uc_profiles/indeed_profile"); os.makedirs(PROFILE_DIR, exist_ok=True)

# Detay sayfasından özet zenginleştirmesi
ENRICH_FROM_DETAIL = True
MAX_ENRICH         = 40          # en fazla kaç ilanın detayına gidilsin
DETAIL_SNIPPET_LEN = 280         # detaydan alınacak özet uzunluğu (karakter)
# -----------------------------

def start_driver():
    opts = uc.ChromeOptions()
    opts.add_argument("--lang=tr-TR")
    opts.add_argument("--disable-blink-features=AutomationControlled")
    opts.add_argument(f"--user-data-dir={PROFILE_DIR}")
    opts.add_argument("--window-size=1280,900")
    return uc.Chrome(options=opts)

def dismiss_overlays(driver):
    time.sleep(1.0)
    for sel in ["#onetrust-accept-btn-handler","button#onetrust-accept-btn-handler","button[aria-label*='kabul' i]"]:
        try:
            el = driver.find_element(By.CSS_SELECTOR, sel)
            driver.execute_script("arguments[0].click();", el); time.sleep(0.4); break
        except: pass
    for sel in ["button[aria-label='Kapat']","button[aria-label='Close']",".icl-CloseButton",
                ".mosaic-modal [data-testid='modal-x']",".mosaic-modal button[aria-label*='close' i]",
                ".mosaic-modal button[aria-label*='kapat' i]"]:
        try:
            btns = driver.find_elements(By.CSS_SELECTOR, sel)
            if btns: driver.execute_script("arguments[0].click();", btns[0]); time.sleep(0.4); break
        except: pass
    try: driver.switch_to.active_element.send_keys(Keys.ESCAPE); time.sleep(0.2)
    except: pass
    try:
        driver.execute_script("""
            document.querySelectorAll('[role="dialog"], .mosaic-modal').forEach(x=>x.remove());
            document.querySelectorAll('html, body').forEach(el=>{el.style.overflow='auto'});
        """)
    except: pass

def scroll_all(driver, pause=0.8, max_steps=30):
    last = 0
    for _ in range(max_steps):
        driver.execute_script("window.scrollBy(0, 1400);"); time.sleep(pause)
        now = driver.execute_script("return document.documentElement.scrollTop || document.body.scrollTop;")
        if abs(now - last) < 50: break
        last = now

def pick_cards_from_html(html, page_idx):
    soup = BeautifulSoup(html, "html.parser")
    candidates = ["a.tapItem","h2.jobTitle a","a.jcs-JobTitle",".resultContent a.jcs-JobTitle"]
    best, counts = [], []
    for sel in candidates:
        found = soup.select(sel); counts.append((sel, len(found)))
        if len(found) > len(best): best = found
    print("Seçici sayıları:", ", ".join(f"{s}:{n}" for s,n in counts))
    if not best:
        with open(f"indeed_debug_p{page_idx}.html","w",encoding="utf-8") as f: f.write(html)
        print(f"⚠️ Kart bulunamadı. Dump: indeed_debug_p{page_idx}.html")
    return best

def t(el): return " ".join(el.get_text(" ", strip=True).split()) if el else None

def extract_title(anchor):
    return t(anchor.select_one("h2.jobTitle span") or anchor.select_one("h2.jobTitle") or anchor)

def extract_company_location(anchor):
    company = anchor.select_one(".companyName, [data-testid='company-name'], .company, .companyTitle")
    location = anchor.select_one(".companyLocation, [data-testid='text-location'], .company_location, .company_location span")
    parent, hops = anchor.parent, 0
    while hops < 5 and (company is None or location is None):
        if parent:
            if company is None:
                company = parent.select_one(".companyName, [data-testid='company-name'], .company, .companyTitle, div.company_location > span")
            if location is None:
                location = parent.select_one(".companyLocation, [data-testid='text-location'], .company_location, .company_location span")
            parent = parent.parent; hops += 1
        else: break
    if company is None or location is None:
        rc = anchor.find_parent(lambda t: t and t.has_attr("class") and any(
            cls in t.get("class", []) for cls in ["resultContent","slider_container","jobCard_mainContent"]))
        if rc:
            if company is None:
                company  = rc.select_one(".companyName, [data-testid='company-name'], .company, .companyTitle")
            if location is None:
                location = rc.select_one(".companyLocation, [data-testid='text-location'], .company_location, .company_location span")
    return t(company), t(location)

def extract_summary_listpage(anchor):
    # liste sayfasında olabilecek tüm varyantlar
    for sel in [".job-snippet","[data-testid='snippet']","[data-testid='text-snippet']",
                ".job-snippet-list","ul.job-snippet-list li"]:
        el = anchor.select_one(sel)
        if el: return t(el)
    # ebeveynde ara
    parent, hops = anchor.parent, 0
    while hops < 5:
        if parent:
            for sel in [".job-snippet","[data-testid='snippet']","[data-testid='text-snippet']",
                        ".job-snippet-list","ul.job-snippet-list li"]:
                el = parent.select_one(sel)
                if el: return t(el)
            parent = parent.parent; hops += 1
        else: break
    # özel kapsayıcılar
    rc = anchor.find_parent(lambda t: t and t.has_attr("class") and any(
        cls in t.get("class", []) for cls in ["resultContent","slider_container","jobCard_mainContent"]))
    if rc:
        for sel in [".job-snippet","[data-testid='snippet']","[data-testid='text-snippet']",
                    ".job-snippet-list","ul.job-snippet-list li"]:
            el = rc.select_one(sel)
            if el: return t(el)
    return None

def enrich_summary_from_detail(driver, link):
    try:
        driver.execute_script("window.open('about:blank','_enrich');")
        driver.switch_to.window(driver.window_handles[-1])
        driver.get(link); time.sleep(1.2)
        dismiss_overlays(driver)
        # detay sayfası seçicileri
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")
        box = soup.select_one("#jobDescriptionText, [data-testid='jobsearch-Viewjob'], article")
        txt = t(box)
        driver.close()
        driver.switch_to.window(driver.window_handles[0])
        if txt:
            return (txt[:DETAIL_SNIPPET_LEN] + "…") if len(txt) > DETAIL_SNIPPET_LEN else txt
    except Exception:
        try:
            driver.close(); driver.switch_to.window(driver.window_handles[0])
        except: pass
    return None

# ---------- ÇALIŞTIR ----------
driver = start_driver()
rows, seen = [], set()

try:
    driver.get("https://tr.indeed.com/"); dismiss_overlays(driver)

    for page in range(MAX_PAGES):
        url = BASE_URL.format(start=page*10)
        print(f"\n>>> Sayfa {page+1}: {url}")
        driver.get(url); time.sleep(1.3)
        dismiss_overlays(driver)
        scroll_all(driver, pause=0.8, max_steps=30)

        html = driver.page_source
        cards = pick_cards_from_html(html, page+1)
        print("Bulunan kart:", len(cards))

        for c in cards:
            href = c.get("href")
            if href and not href.startswith("http"): href = "https://tr.indeed.com" + href
            if not href or href in seen: continue

            title    = extract_title(c)
            company, location = extract_company_location(c)
            summary  = extract_summary_listpage(c)  # önce liste sayfasında dene

            rows.append({
                "Title": title,
                "Company": company,
                "Location": location,
                "Summary": summary,
                "Link": href,
                "Page": page+1
            })
            seen.add(href)
            time.sleep(random.uniform(0.8, 1.5))

    # --- Detay sayfasından özet zenginleştirme (opsiyonel) ---
    if ENRICH_FROM_DETAIL:
        missing = [i for i,r in enumerate(rows) if not r["Summary"]]
        print(f"\nDetaydan özet zenginleştirme: {min(len(missing), MAX_ENRICH)} / {len(missing)} ilan")
        for idx in missing[:MAX_ENRICH]:
            s = enrich_summary_from_detail(driver, rows[idx]["Link"])
            if s: rows[idx]["Summary"] = s
            time.sleep(random.uniform(0.9, 1.6))

    df = pd.DataFrame(rows)
    print(f"\nToplam {len(df)} ilan toplandı.")
    display(df.head(20))
    df.to_csv(CSV_PATH, index=False)
    print("CSV kaydedildi:", CSV_PATH)

finally:
    try: driver.quit()
    except: pass



>>> Sayfa 1: https://tr.indeed.com/jobs?q=&l=Türkiye&start=0
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 2: https://tr.indeed.com/jobs?q=&l=Türkiye&start=10
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 3: https://tr.indeed.com/jobs?q=&l=Türkiye&start=20
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 4: https://tr.indeed.com/jobs?q=&l=Türkiye&start=30
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

>>> Sayfa 5: https://tr.indeed.com/jobs?q=&l=Türkiye&start=40
Seçici sayıları: a.tapItem:0, h2.jobTitle a:15, a.jcs-JobTitle:15, .resultContent a.jcs-JobTitle:15
Bulunan kart: 15

Detaydan özet zenginleştirme: 40 / 75 ilan

Toplam 75 ilan toplandı.


Unnamed: 0,Title,Company,Location,Summary,Link,Page
0,Call Center Agent deutsch (m/w/d) 300€ Startpr...,Avedo - Eine Marke der Ströer X GmbH,Avedo - Eine Marke der Ströer X GmbH Antalya,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
1,Customer Support Travel (m/w/d) 300€ Startprämie!,Avedo - Eine Marke der Ströer X GmbH,Avedo - Eine Marke der Ströer X GmbH Antalya,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
2,"Temizlik ,Çay Kahve servisi",Dr.FİLİZ CAN KURTİŞ,Dr.FİLİZ CAN KURTİŞ Levent,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
3,"Consultant, Program Management, Advisors & Con...",Mastercard,Mastercard Beşiktaş,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
4,Metin Yazarı Arıyoruz,OneWord Dijital Reklam ve Video Prodüksiyon...,OneWord Dijital Reklam ve Video Prodüksiyon......,,https://tr.indeed.com/rc/clk?jk=b996c4311a9c3a...,1
5,Telefonischer Kundenberater (m/w/d) Outbound B...,KiKxxl GmbH,KiKxxl GmbH İstanbul,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
6,Quereinsteiger telefonischer Kundenservice (m/...,KiKxxl GmbH,KiKxxl GmbH İstanbul,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
7,Homeoffice Müşteri Temsilcisi Türkiye,Bezmi Alem Çağrı Hizmetleri,Bezmi Alem Çağrı Hizmetleri Türkiye,,https://tr.indeed.com/rc/clk?jk=fd4e3c6326d6b8...,1
8,Mitarbeiter Kundenbetreuung (m/w/d) Deutsch,KiKxxl GmbH,KiKxxl GmbH İstanbul,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1
9,Almanca bilen cagri merkezi müsteri temsilcisi,KiKxxl GmbH,KiKxxl GmbH İstanbul,,https://tr.indeed.com/pagead/clk?mo=r&ad=-6NYl...,1


CSV kaydedildi: indeed_tr_tum_ilanlar.csv
