In [7]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import json
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_news(emiten_list, output_file):
    # Dictionary to store all news data
    news_data = []

    # Setup WebDriver
    driver = webdriver.Chrome(ChromeDriverManager().install())

    # Loop through each ticker
    for emiten in emiten_list:
        print(f"Searching news for {emiten}...")
        
        # Open the search URL
        search_url = "http://www.iqplus.info/news/search/"
        driver.get(search_url)

        # Wait for the search input element to be present and interact with it
        try:
            search_input = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.NAME, "search"))
            )
            # Interact with the search input once it's present
            search_input.send_keys(emiten)
            search_input.submit()
        except:
            print(f"Search input element not found for {emiten}. Moving to next ticker.")
            continue  # Skip to the next ticker if the search input is not found

        time.sleep(3)  # Wait for the page to load

        # Parse the page source
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Find news items
        news_list = soup.find_all("li", style="text-transform:capitalize;")

        # Extract news details
        if news_list:
            print(f"Found {len(news_list)} news items for {emiten}")
            for news in news_list:
                date_time = news.find("b").text.strip() if news.find("b") else "No Date"
                title = news.find("a").text.strip() if news.find("a") else "No Title"
                link = news.find("a")["href"] if news.find("a") else "#"

                # Check if title contains the emiten name followed by a colon
                if f"{emiten}:" in title:
                    # Append news as a dictionary
                    news_data.append({
                        "Emiten": emiten,
                        "Date": date_time,
                        "Title": title,
                        "Link": link
                    })
                else:
                    print(f"Skipping news item as title does not contain '{emiten}:'")
        else:
            print(f"No news found for {emiten}.")

    # Close the WebDriver
    driver.quit()

    # Save the news data to a JSON file
    with open(output_file, "w", encoding="utf-8") as json_file:
        json.dump(news_data, json_file, indent=4, ensure_ascii=False)
    
    print(f"News data saved to {output_file}")


In [8]:
# Process JSON files from pt1 to pt5
for i in range(1, 6):
    input_file = f"emiten_list_pt{i}.json"
    output_file = f"stock_news.json_pt{i}"

    try:
        # Read emiten list from JSON file
        with open(input_file, "r", encoding="utf-8") as file:
            emiten_list = json.load(file)
        print(f"Successfully loaded {len(emiten_list)} emiten from {input_file}")

            # Scrape news for the current emiten list
        scrape_news(emiten_list, output_file)

    except FileNotFoundError:
        print(f"Error: {input_file} not found. Skipping...")
        continue
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {input_file}. Skipping...")
        continue

Successfully loaded 200 emiten from emiten_list_pt4.json
Searching news for SDPC...
Found 1 news items for SDPC
Searching news for SDRA...
Found 1 news items for SDRA
Searching news for SGRO...
Found 3 news items for SGRO
Searching news for SHID...
Found 1 news items for SHID
Skipping news item as title does not contain 'SHID:'
Searching news for SIDO...
Found 27 news items for SIDO
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Skipping news item as title does not contain 'SIDO:'
Searching news for SILO...
Found 9 news items for SILO
Skipping news item as title does not contain 'SILO:'
Searching news for SIMA...
Fo

Found 1 news items for STTP
Searching news for SUGI...
Found 3 news items for SUGI
Skipping news item as title does not contain 'SUGI:'
Skipping news item as title does not contain 'SUGI:'
Skipping news item as title does not contain 'SUGI:'
Searching news for SULI...
Found 5 news items for SULI
Skipping news item as title does not contain 'SULI:'
Searching news for SUPR...
Found 8 news items for SUPR
Skipping news item as title does not contain 'SUPR:'
Skipping news item as title does not contain 'SUPR:'
Skipping news item as title does not contain 'SUPR:'
Skipping news item as title does not contain 'SUPR:'
Skipping news item as title does not contain 'SUPR:'
Skipping news item as title does not contain 'SUPR:'
Searching news for TALF...
Found 1 news items for TALF
Searching news for TARA...
Found 50 news items for TARA
Skipping news item as title does not contain 'TARA:'
Skipping news item as title does not contain 'TARA:'
Skipping news item as title does not contain 'TARA:'
Skippin

Found 5 news items for TRIM
Skipping news item as title does not contain 'TRIM:'
Skipping news item as title does not contain 'TRIM:'
Skipping news item as title does not contain 'TRIM:'
Searching news for TRIO...
Found 1 news items for TRIO
Skipping news item as title does not contain 'TRIO:'
Searching news for TRIS...
Found 6 news items for TRIS
Skipping news item as title does not contain 'TRIS:'
Skipping news item as title does not contain 'TRIS:'
Searching news for TRST...
No news found for TRST.
Searching news for TRUS...
Found 8 news items for TRUS
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Skipping news item as title does not contain 'TRUS:'
Searching news

Found 2 news items for DPUM
Searching news for IDPR...
Found 8 news items for IDPR
Searching news for JGLE...
Found 1 news items for JGLE
Searching news for KINO...
Found 3 news items for KINO
Searching news for MARI...
Found 29 news items for MARI
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping news item as title does not contain 'MARI:'
Skipping 

Found 2 news items for CSIS
Searching news for TGRA...
Found 2 news items for TGRA
Searching news for FIRE...
No news found for FIRE.
Searching news for TOPS...
No news found for TOPS.
Searching news for KMTR...
Found 3 news items for KMTR
Searching news for ARMY...
No news found for ARMY.
Searching news for MAPB...
No news found for MAPB.
Searching news for WOOD...
Found 5 news items for WOOD
Skipping news item as title does not contain 'WOOD:'
Searching news for HRTA...
Found 16 news items for HRTA
Searching news for MABA...
No news found for MABA.
Searching news for HOKI...
Found 6 news items for HOKI
Searching news for MPOW...
Found 1 news items for MPOW
Skipping news item as title does not contain 'MPOW:'
Searching news for MARK...
Found 34 news items for MARK
Skipping news item as title does not contain 'MARK:'
Skipping news item as title does not contain 'MARK:'
Skipping news item as title does not contain 'MARK:'
Skipping news item as title does not contain 'MARK:'
Skipping new

Found 17 news items for HEAL
Skipping news item as title does not contain 'HEAL:'
Skipping news item as title does not contain 'HEAL:'
Skipping news item as title does not contain 'HEAL:'
Skipping news item as title does not contain 'HEAL:'
Searching news for TRUK...
Found 50 news items for TRUK
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news item as title does not contain 'TRUK:'
Skipping news 

Found 50 news items for LAND
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news item as title does not contain 'LAND:'
Skipping news ite

No news found for DUCK.
Searching news for GOOD...
Found 9 news items for GOOD
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Skipping news item as title does not contain 'GOOD:'
Searching news for SKRN...
Found 4 news items for SKRN
Searching news for YELO...
Found 1 news items for YELO
Searching news for CAKK...
Found 8 news items for CAKK
Searching news for SATU...
Found 41 news items for SATU
Skipping news item as title does not contain 'SATU:'
Skipping news item as title does not contain 'SATU:'
Skipping news item as title does not contain 'SATU:'
Skipping news item as title does not contain 'SATU:'
Skipping news item as title does not contain 'SATU:'
Skipping ne

No news found for HRME.
Searching news for POSA...
Found 7 news items for POSA
Skipping news item as title does not contain 'POSA:'
Skipping news item as title does not contain 'POSA:'
Skipping news item as title does not contain 'POSA:'
Skipping news item as title does not contain 'POSA:'
Skipping news item as title does not contain 'POSA:'
Searching news for JAST...
Found 18 news items for JAST
Searching news for FITT...
Found 5 news items for FITT
Searching news for BOLA...
Found 4 news items for BOLA
Skipping news item as title does not contain 'BOLA:'
Skipping news item as title does not contain 'BOLA:'
Skipping news item as title does not contain 'BOLA:'
Searching news for CCSI...
Found 3 news items for CCSI
Searching news for SFAN...
Found 3 news items for SFAN
Skipping news item as title does not contain 'SFAN:'
Searching news for POLU...
Found 8 news items for POLU
Skipping news item as title does not contain 'POLU:'
Searching news for KJEN...
Found 5 news items for KJEN
Skipp

Found 2 news items for SMKL
Searching news for HDIT...
No news found for HDIT.
Searching news for KEEN...
Found 6 news items for KEEN
Skipping news item as title does not contain 'KEEN:'
Skipping news item as title does not contain 'KEEN:'
Skipping news item as title does not contain 'KEEN:'
Searching news for BAPI...
No news found for BAPI.
Searching news for TFAS...
No news found for TFAS.
Searching news for GGRP...
Found 12 news items for GGRP
Searching news for OPMS...
Found 1 news items for OPMS
Searching news for NZIA...
Found 1 news items for NZIA
Searching news for SLIS...
No news found for SLIS.
Searching news for PURE...
Found 1 news items for PURE
Searching news for IRRA...
No news found for IRRA.
Searching news for DMMX...
Found 9 news items for DMMX
Searching news for SINI...
Found 1 news items for SINI
Searching news for WOWS...
Found 1 news items for WOWS
Searching news for ESIP...
No news found for ESIP.
Searching news for TEBE...
Found 1 news items for TEBE
Searching n

Found 10 news items for AMOR
Skipping news item as title does not contain 'AMOR:'
Skipping news item as title does not contain 'AMOR:'
Skipping news item as title does not contain 'AMOR:'
Skipping news item as title does not contain 'AMOR:'
Skipping news item as title does not contain 'AMOR:'
Skipping news item as title does not contain 'AMOR:'
Skipping news item as title does not contain 'AMOR:'
Searching news for TRIN...
Found 15 news items for TRIN
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news item as title does not contain 'TRIN:'
Skipping news 

Found 1 news items for AYLS
Searching news for DADA...
Found 1 news items for DADA
Skipping news item as title does not contain 'DADA:'
Searching news for ASPI...
Found 6 news items for ASPI
Skipping news item as title does not contain 'ASPI:'
Skipping news item as title does not contain 'ASPI:'
Skipping news item as title does not contain 'ASPI:'
Skipping news item as title does not contain 'ASPI:'
Skipping news item as title does not contain 'ASPI:'
Skipping news item as title does not contain 'ASPI:'
Searching news for ESTA...
Found 50 news items for ESTA
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping news item as title does not contain 'ESTA:'
Skipping ne

Found 2 news items for PGUN
Searching news for SOFA...
No news found for SOFA.
Searching news for PPGL...
Found 11 news items for PPGL
Searching news for TOYS...
No news found for TOYS.
Searching news for SGER...
Found 4 news items for SGER
Searching news for TRJA...
Found 1 news items for TRJA
Searching news for PNGO...
Found 3 news items for PNGO
Searching news for SCNP...
Found 1 news items for SCNP
Searching news for BBSI...
Found 10 news items for BBSI
Searching news for KMDS...
Found 2 news items for KMDS
Searching news for PURI...
Found 11 news items for PURI
Skipping news item as title does not contain 'PURI:'
Skipping news item as title does not contain 'PURI:'
Skipping news item as title does not contain 'PURI:'
Skipping news item as title does not contain 'PURI:'
Searching news for SOHO...
Found 4 news items for SOHO
Skipping news item as title does not contain 'SOHO:'
Searching news for HOMI...
Found 1 news items for HOMI
Searching news for ROCK...
Found 3 news items for RO

No news found for ARCI.
Searching news for IPAC...
Found 2 news items for IPAC
Searching news for MASB...
Found 1 news items for MASB
Searching news for BMHS...
Found 5 news items for BMHS
Searching news for FLMC...
Found 1 news items for FLMC
Searching news for NICL...
Found 1 news items for NICL
Searching news for UVCR...
Found 11 news items for UVCR
Searching news for BUKA...
Found 50 news items for BUKA
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skipping news item as title does not contain 'BUKA:'
Skippi

No news found for BSML.
Searching news for DRMA...
Found 13 news items for DRMA
Searching news for ADMR...
Found 2 news items for ADMR
Searching news for SEMA...
Found 35 news items for SEMA
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news item as title does not contain 'SEMA:'
Skipping news 

Found 7 news items for RAFI
Skipping news item as title does not contain 'RAFI:'
Skipping news item as title does not contain 'RAFI:'
Skipping news item as title does not contain 'RAFI:'
Skipping news item as title does not contain 'RAFI:'
Skipping news item as title does not contain 'RAFI:'
Skipping news item as title does not contain 'RAFI:'
Searching news for KKES...
No news found for KKES.
Searching news for ELPI...
Found 6 news items for ELPI
Skipping news item as title does not contain 'ELPI:'
Skipping news item as title does not contain 'ELPI:'
Skipping news item as title does not contain 'ELPI:'
Skipping news item as title does not contain 'ELPI:'
Skipping news item as title does not contain 'ELPI:'
Searching news for EURO...
Found 15 news items for EURO
Skipping news item as title does not contain 'EURO:'
Skipping news item as title does not contain 'EURO:'
Skipping news item as title does not contain 'EURO:'
Skipping news item as title does not contain 'EURO:'
Skipping news i

No news found for ISAP.
Searching news for VTNY...
Found 7 news items for VTNY
Searching news for SOUL...
Found 6 news items for SOUL
Searching news for ELIT...
Found 25 news items for ELIT
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news item as title does not contain 'ELIT:'
Skipping news i

Found 3 news items for NCKL
Searching news for MENN...
Found 6 news items for MENN
Skipping news item as title does not contain 'MENN:'
Searching news for AWAN...
Found 50 news items for AWAN
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news item as title does not contain 'AWAN:'
Skipping news

Found 2 news items for RMKO
Searching news for CNMA...
Found 9 news items for CNMA
Searching news for FOLK...
No news found for FOLK.
Searching news for HBAT...
Found 1 news items for HBAT
Searching news for GRIA...
Found 2 news items for GRIA
Searching news for PPRI...
Found 11 news items for PPRI
Skipping news item as title does not contain 'PPRI:'
Skipping news item as title does not contain 'PPRI:'
Searching news for ERAL...
Found 33 news items for ERAL
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping news item as title does not contain 'ERAL:'
Skipping

Found 50 news items for BAIK
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news item as title does not contain 'BAIK:'
Skipping news ite

Found 21 news items for PART
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news item as title does not contain 'PART:'
Skipping news ite

Found 30 news items for RATU
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Skipping news item as title does not contain 'RATU:'
Searching news fo