In [59]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from bs4 import BeautifulSoup as bs
from datetime import datetime

url = 'http://www.ipostock.co.kr/sub03/ipo06.asp'

In [62]:
def html_parsing(url):
    # 세션 생성
    session = requests.Session()
    
    # 재시도 로직 설정
    retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
    adapter = HTTPAdapter(max_retries=retries)
    session.mount('http://', adapter)
    
    try:
        response = session.get(url)
        response.raise_for_status()  # HTTP 에러가 발생하면 예외를 발생시킴
        response.encoding='UTF-8' # 한글 깨지는 문제 예방
        html_text = response.text
        return html_text
        # print(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")

In [83]:
ipo_cal = html_parsing(url)

In [84]:
soup = bs(ipo_cal, 'html.parser')

In [90]:
# 데이터 저장을 위한 딕셔너리 초기화
data = {}

# td 태그를 모두 찾음
tds = soup.find_all('td', class_='days')

for td in tds:
    date_tag = td.find('strong')
    if date_tag:
        date = date_tag.text.strip()
        rows = td.find_all('tr')
        
        for row in rows:
            img_tag = row.find('img')
            a_tag = row.find('a')
            
            if img_tag and a_tag:
                img_src = img_tag['src']
                link = a_tag['href']
                title = a_tag['title']
                
                # 날짜를 키로 사용하여 링크, 제목, 이미지 src를 저장
                if date not in data:
                    data[date] = []
                data[date].append({'link': link, 'title': title, 'img_src': img_src})

In [91]:
# 결과 출력
for date, infos in data.items():
    print(f"Date: {date}")
    for info in infos:
        print(f"  Link: {info['link']}, Title: {info['title']}, Img Src: {info['img_src']}")

Date: 1
  Link: /view_pg/view_04.asp?code=B202405291, Title: 신한글로벌액티브리츠, Img Src: ../image/icon/ipo7.jpg
Date: 2
  Link: /view_pg/view_04.asp?code=B202312111, Title: 이노스페이스, Img Src: ../image/icon/ipo7.jpg
Date: 3
  Link: /view_pg/view_04.asp?code=B202402201, Title: 하스, Img Src: ../image/icon/ipo7.jpg
  Link: /view_pg/view_04.asp?code=B202403051, Title: 시프트업, Img Src: ../image/icon/ipo4.gif
  Link: /view_pg/view_04.asp?code=B202403221, Title: 이베스트스팩6호, Img Src: ../image/icon/ipo4.gif
Date: 4
  Link: /view_pg/view_04.asp?code=B202104161, Title: 엑셀세라퓨틱스, Img Src: ../image/icon/ipo4.gif
Date: 5
  Link: /view_pg/view_04.asp?code=B202403051, Title: 시프트업, Img Src: ../image/icon/ipo6.jpg
  Link: /view_pg/view_04.asp?code=B202403221, Title: 이베스트스팩6호, Img Src: ../image/icon/ipo6.jpg
Date: 8
  Link: /view_pg/view_04.asp?code=B202104161, Title: 엑셀세라퓨틱스, Img Src: ../image/icon/ipo6.jpg
Date: 10
  Link: /view_pg/view_04.asp?code=B202404022, Title: 에스케이증권제13호스팩, Img Src: ../image/icon/ipo3.jpg
  Lin

In [25]:
today = datetime.now().strftime("%d")

In [153]:
today_ipo_data = {}

today = '3'

if data[today]:
    for i in range(len(data[today])):
        link = data[today][i]['link']
        detail_url = 'http://www.ipostock.co.kr' + link
        detail_soup = bs(html_parsing(detail_url), 'html.parser')
        table = detail_soup.find_all('table', class_='view_tb')
        ipo_sub_date, ipo_refund_date, ipo_public_date, confirmed_ipo_price, company = detail_crawling(table)
        today_ipo_data[data[today][i]['title']] = [ipo_sub_date, ipo_refund_date, ipo_public_date, confirmed_ipo_price, company]
        
else:
    print('no')

In [154]:
today_ipo_data

{'하스': ['2024.06.24 ~ 06.25',
  '2024.06.27',
  '2024.07.03',
  '16,000 원',
  ['삼성증권']],
 '시프트업': ['2024.07.02 ~ 07.03',
  '2024.07.05',
  '2024.07.11',
  '60,000 원',
  ['한국투자증권', 'NH투자증권', '신한투자증권']],
 '이베스트스팩6호': ['2024.07.02 ~ 07.03',
  '2024.07.05',
  '2024.07.12',
  '2,000 원',
  ['이베스트투자']]}

In [111]:
detail_url = 'http://www.ipostock.co.kr' + link
detail_soup = bs(html_parsing(detail_url), 'html.parser')

In [112]:
table = detail_soup.find_all('table', class_='view_tb')

In [148]:
def detail_crawling(table):
    # 필요한 정보 저장을 위한 변수 초기화
    ipo_sub_date = None
    ipo_refund_date = None
    ipo_public_date = None
    confirmed_ipo_price = None
    company = []
    
    for row in table[0].find_all('tr'):
        cells = row.find_all('td')
        if '공모청약일' in cells[0].text:
            ipo_sub_date = cells[1].text.strip()
        elif '환불일' in cells[0].text:
            ipo_refund_date = cells[1].text.strip()
        elif '상장일' in cells[0].text:
            ipo_public_date = cells[1].text.strip()
    
    for row in table[1].find_all('tr'):
        cells = row.find_all('td')
        if '(확정)공모가격' in cells[0].text:
            confirmed_ipo_price = cells[1].text.strip()
    
    for row in table[3].find_all('tr'):
        cells = row.find_all('td')
        if '증권회사' not in cells[0].text:
            company.append(cells[0].text.strip())

    return ipo_sub_date, ipo_refund_date, ipo_public_date, confirmed_ipo_price, company

In [142]:
# 결과 출력
print(f"공모청약일: {ipo_dates.get('공모청약일')}")
print(f"환불일: {ipo_dates.get('환불일')}")
print(f"상장일: {ipo_dates.get('상장일')}")
print(f"(확정)공모가격: {confirmed_ipo_price}")
print(f"증권회사: {company}")

공모청약일: 2024.07.15 ~ 07.16
환불일: 2024.07.18
상장일: 2024.07.25
(확정)공모가격: 2,000 원
증권회사: ['SK증권']
