In [1]:
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import datetime
from urllib.request import urlopen, Request

In [2]:
r = Request('https://th.investing.com/economic-calendar/', headers={'User-Agent': 'Mozilla/5.0'})
time.sleep(2)
response = urlopen(r).read()
soup = BeautifulSoup(response, "html.parser")
table = soup.find_all(class_ = "js-event-item")
result = []

In [3]:
def event_calendar():
    
    for bl in table:
        event_datetime = bl.get('data-event-datetime', '').strip()
        currency = bl.find(class_="left flagCur noWrap").text.strip()
        intensity_divs = bl.find_all(class_="left textNum sentiment noWrap")
        event = bl.find(class_="left event").text.strip()
        intencity_val = 0
        true_count = 0

        for intence in intensity_divs:
            _true = intence.find_all(class_="grayFullBullishIcon")
            _false = intence.find_all(class_="grayEmptyBullishIcon")

            true_count = len(_true)

            if true_count == 3:
                intencity_val = 3
            elif true_count == 2:
                intencity_val = 2
            else :
                intencity_val = 1
                
        event_datetime = event_datetime.split(' ')
        date=event_datetime[0]
        time=event_datetime[1]
        
        
        result.append({'currency' : currency, 'date' : date, 'time' : time, 'intensity' : intencity_val, 'event':event})

    return result

In [4]:
news = event_calendar()
news_df = pd.DataFrame(news)

In [5]:
display(news_df.shape)
display(news_df.head(10))

(77, 5)

Unnamed: 0,currency,date,time,intensity,event
0,USD,2025/06/18,00:00:00,2,การประมูลพันธบัตร TIPS อายุ 5 ปี
1,USD,2025/06/18,03:30:00,2,รายงานสินค้าคงเหลือของน้ำมันดิบประจำไตรมาสจาก API
2,NZD,2025/06/18,04:00:00,2,ดัชนีความเชื่อมั่นผู้บริโภคจาก Westpac (ไตรมา...
3,NZD,2025/06/18,05:45:00,2,บัญชีเดินสะพัด (Current Account) (ไตรมาสต่อไตร...
4,NZD,2025/06/18,05:45:00,2,Current Account (ปีต่อปี) (ไตรมาส 1)
5,NZD,2025/06/18,05:45:00,1,% ของดุลบัญชีเดินสะพัดต่อค่าจีดีพี (ไตรมาส 1)
6,JPY,2025/06/18,06:00:00,1,ดัชนีจากการสำรวจ Reuters Tankan (มิ.ย.)
7,JPY,2025/06/18,06:50:00,2,ดุลการค้าที่ได้ปรับรายการ
8,JPY,2025/06/18,06:50:00,1,ยอดคำสั่งซื้อสินค้าประเภทเครื่องจักร (เดือนต่อ...
9,JPY,2025/06/18,06:50:00,1,ยอดคำสั่งซื้อเครื่องจักรกล (ปีต่อปี) (เม.ย.)


In [6]:
now = datetime.datetime.now()
date_time = now.strftime("%Y-%m-%d %H-%M-%S").strip().replace(' ', '_')
news_df.to_csv(f'Data/Event_Calendar/{date_time}.csv', index=False)

In [7]:
print(news_df[news_df['intensity'] == 3][['currency', 'event']])

   currency                                        event
19      GBP  ดัชนีราคาผู้บริโภค (CPI) (ปีต่อปี)  ( พ.ค.)
37      EUR  ดัชนีราคาผู้บริโภค (CPI) (ปีต่อปี)  ( พ.ค.)
56      USD  จำนวนคนที่ยื่นขอรับสวัสดิการว่างงานครั้งแรก
58      USD                        สินค้าคงคลังน้ำมันดิบ


In [8]:
import os
from dotenv import load_dotenv
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

load_dotenv("../SentimentAnalysis/GPT/secret.env")

mongo_connection_string = os.getenv("MONGO_CONNECTION_STRING")

try:
    client = MongoClient(mongo_connection_string)
    db = client['stock_news_db']
    collection = db['event_calendar']

    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")

except Exception as e:
    print(f"An error occurred: {e}")

Pinged your deployment. You successfully connected to MongoDB!


In [9]:
complete_dict=news_df.to_dict(orient='records')

result = collection.insert_many(complete_dict,ordered=True)
print(f"Successfully inserted document with id: {result.inserted_ids}")

Successfully inserted document with id: [ObjectId('6852637ab070ecb53f98d0ce'), ObjectId('6852637ab070ecb53f98d0cf'), ObjectId('6852637ab070ecb53f98d0d0'), ObjectId('6852637ab070ecb53f98d0d1'), ObjectId('6852637ab070ecb53f98d0d2'), ObjectId('6852637ab070ecb53f98d0d3'), ObjectId('6852637ab070ecb53f98d0d4'), ObjectId('6852637ab070ecb53f98d0d5'), ObjectId('6852637ab070ecb53f98d0d6'), ObjectId('6852637ab070ecb53f98d0d7'), ObjectId('6852637ab070ecb53f98d0d8'), ObjectId('6852637ab070ecb53f98d0d9'), ObjectId('6852637ab070ecb53f98d0da'), ObjectId('6852637ab070ecb53f98d0db'), ObjectId('6852637ab070ecb53f98d0dc'), ObjectId('6852637ab070ecb53f98d0dd'), ObjectId('6852637ab070ecb53f98d0de'), ObjectId('6852637ab070ecb53f98d0df'), ObjectId('6852637ab070ecb53f98d0e0'), ObjectId('6852637ab070ecb53f98d0e1'), ObjectId('6852637ab070ecb53f98d0e2'), ObjectId('6852637ab070ecb53f98d0e3'), ObjectId('6852637ab070ecb53f98d0e4'), ObjectId('6852637ab070ecb53f98d0e5'), ObjectId('6852637ab070ecb53f98d0e6'), ObjectId(