In [8]:
import requests
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import json
import os
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime
import platform
import time
import re

In [9]:
from Webtoon import Webtoon 
from Episode import Episode 
from Platform import Platform 
from Crawler import Crawler 

In [10]:
id = 0

In [11]:
ChromeDriverManager().install()
driver = webdriver.Chrome()

In [12]:
days = [
        "mon",
        "tue",
        "wed", 
        "thu", 
        "fri",
        "sat",
        "sun"
    ]

In [13]:
class LezhinWebtoonCrawler(Crawler):
    def __init__(self, driver, day):
        self.driver = driver
        self.platformName = "Lezhin"
        self.day = day

        self.platform = Platform()
        self.platform.setName(self.platformName)
        self.platform.setDay(self.day)
        
        self.createFolder()
        

    def crawl(self):
        
        dayToNumberTable = {
            "mon": 1,
            "tue": 2,
            "wed": 3, 
            "thu": 4, 
            "fri": 5,
            "sat": 6,
            "sun": 0
        }
    
        dayNumber = str(dayToNumberTable[self.day])
        url = "https://www.lezhin.com/ko/scheduled?day=" + dayNumber
        self.driver.get(url)

        dayElement = self.driver.find_element(By.ID, 'scheduled-day-' + dayNumber)


    
        webtoonElementList = dayElement.find_elements(By.CLASS_NAME, 'lzComic__item')
        for webtoonElement in webtoonElementList:
            wait = WebDriverWait(webtoonElement, 3)
            webtoonUrl = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'lzComic__link'))).get_attribute('href')
            imageUrl = "https://ccdn.lezhin.com/v2/comics/" + webtoonElement.get_attribute("data-id") + "/images/wide.webp"
            genre = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'lzComic__genre'))).text

            webtoon = self.getWebtoon(webtoonUrl, imageUrl, genre)
            self.platform.addWebtoon(webtoon)

            episodes = self.getEpisodes(webtoonUrl)
            global id
            episodeFilePath = os.path.join( self.platformName, "webtoons","lz" + str(id))
            self.saveToJson(episodeFilePath, episodes)
            id += 1

        webtoonFilePath = os.path.join(self.platformName, self.platformName + "_" + self.day)
        self.saveToJson(webtoonFilePath, self.platform)
        

    def getWebtoon(self, webtoonUrl, imageUrl, genre):
        webtoon = Webtoon()
        
        webtoonJson = self.getWebtoonJson(webtoonUrl)
        title = webtoonJson["display"]["title"]
        author = self.getAuthors(webtoonJson["artists"])
        serializedDay = webtoonJson["display"]["schedule"]
        summary = webtoonJson["display"]["synopsis"]
        
        webtoon.setWebtoon(self.platformName, self.day, id, title, author, serializedDay, webtoonUrl, imageUrl, genre, summary)
        return webtoon
        
    def getAuthors(self, authorList):
        authors = ""
        for author in authorList:
            authors += author["name"] + " "
        return authors
        
    def getEpisodes(self, webtoonUrl):
        webtoonJson = self.getWebtoonJson(webtoonUrl)
            
        webtoonId = webtoonJson["id"]
        imageBaseUrl = f"https://ccdn.lezhin.com/v2/comics/{webtoonId}/episodes/{{}}/images/cover.jpg"

        episodeJson = webtoonJson["episodes"]

        episodeList = []
        
        for episode in episodeJson:
            title = episode["display"]["title"]
            url = webtoonUrl + "/" + episode["name"]
            
            imageUrl = imageBaseUrl.format(episode["id"])
            try:
                publicationDate = self.getPublicationDate(episode["freedAt"])
            except:
                publicationDate = self.getPublicationDate(episode["publishedAt"])
            episode = Episode(title, url, imageUrl, publicationDate)
            episodeList.append(episode)
              
        return episodeList

    def getWebtoonJson(self, webtoonUrl):
        
        response = requests.get(webtoonUrl)
        soup = None
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
        else:
            print(webtoonUrl)
            print("요청 실패:", response.status_code)
        
        
        scriptTags = soup.find_all('script')
        
        productScript = None
        
        for scriptTag in scriptTags:
            scriptContent = scriptTag.string
            if scriptContent and '__LZ_PRODUCT__' in scriptContent:
                productScript = scriptContent
                break
        
        match = re.search(r'__LZ_PRODUCT__\s*=\s*({.*?});', productScript, re.DOTALL)
        product = match.group(1)

        
        product =product.replace("productType:", '"productType":')
        product =product.replace("product:", '"product":')
        product =product.replace("'comic',", '"comic",')
        product =product.replace("prefree:", '"prefree":')
        product =product.replace("departure:", '"departure":')
        product =product.replace("''", '""')
        product =product.replace('""\\', '')
        product =product.replace("all:", '"all":')

        try:
            product = json.loads(product)
        except:
            with open("dump.json", "w", encoding="utf-8") as file:
                file.write(product)
        
        return product["product"]

    def getPublicationDate(self, timestamp):
        timestamp = timestamp / 1000 
    
        date = datetime.fromtimestamp(timestamp)
        
        return date.strftime("%y.%m.%d") 

In [14]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "mon")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' created successfully
Folder 'Lezhin\webtoons' created successfully


In [15]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "tue")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' already exists
Folder 'Lezhin\webtoons' already exists


In [16]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "wed")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' already exists
Folder 'Lezhin\webtoons' already exists


In [17]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "thu")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' already exists
Folder 'Lezhin\webtoons' already exists


In [18]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "fri")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' already exists
Folder 'Lezhin\webtoons' already exists


In [19]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "sat")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' already exists
Folder 'Lezhin\webtoons' already exists


In [20]:
lezhinCrawler = LezhinWebtoonCrawler(driver, "sun")
lezhinCrawler.crawl()

Folder 'C:\Users\pyj78\Desktop\webtoon\Lezhin' already exists
Folder 'Lezhin\webtoons' already exists
