In [185]:
#import all libraries
import pandas as pd
import requests
import bs4
import re
import logging
import concurrent.futures
import threading
from time import sleep
from datetime import datetime

In [186]:
def get_links():
    urls = list(pd.read_csv('books_url.csv')['link'])
    return urls

In [187]:
def get_response(input_url):
    headers = {
        'User-Agent': 'My User Agent 1.0',
        "Accept-Language": "en-US,en;q=0.5"
    }
    response = requests.get(input_url, headers=headers)
    return response

In [188]:
def get_soup(input_url):
    headers = {
        'User-Agent': 'My User Agent 1.0',
        "Accept-Language": "en-US,en;q=0.5"
    }
    response = requests.get(input_url, headers=headers)
    if response.status_code != 200:
        print("Error in getting link")
        print("response code is : ", response.status_code)
    book_urls.remove(page_url)
    soup = bs4.BeautifulSoup(response.text, 'html.parser')
    return soup

In [189]:
def get_fa_title(soup):
    title = soup.select('.product-name strong')[0]
    return title.text


In [190]:
def get_en_title(soup):
    title = soup.select('.product-name-englishname')[0]
    return title.text

In [191]:
def get_price(soup):
    price = soup.select('.price-broken , .col-md-7 .price:nth-child(1)')[0].text
    return int(price.replace(',', ''))

In [192]:
def get_discount(soup):
    try:
        discount_price = int(soup.select('.col-md-12+ .clearfix .price-special')[0].text.replace(',', ''))
        discount_price = ((get_price(soup) - discount_price) / get_price(soup)) * 100
    except Exception:
        discount_price = 0
        logging.exception("This book has no discount!")
    return discount_price

In [193]:
def get_score(soup):
    soup = soup.find('div', {'class': 'col-md-7'}).find('li', {'class': 'pull-left'}).find('div',
                                                                                           {'class': 'my-rating'})
    soup_str = str(soup)

    match = re.search(r'data-rating="(\d+\.\d+)"', soup_str)
    if match:
        data_rating = match.group(1)
        return data_rating


In [194]:
def get_publisher(soup):
    try:
        publisher_tag = soup.select('div.prodoct-attribute-items:nth-child(1) > a')[0]
        publisher_link = publisher_tag.get('href')
        publisher_id = publisher_link.split('/')[2].split('-')[0]
        publisher_name = publisher_tag.text.strip()
    except Exception:
        publisher_link = -1
        publisher_id = -1
        publisher_name = -1
    return {'id': publisher_id, 'name': publisher_name, 'link': publisher_link}

In [195]:
def get_author(soup):
    authors_list = []
    try:
        authors_a_tag = soup.select('.prodoct-attribute-items+ .prodoct-attribute-items > a')
        if (len(authors_a_tag)) == 0:
            return authors_list
        for author_a_tag in authors_a_tag:
            author_link = author_a_tag.get('href')
            author_id = author_link.split('/')[2].split('-')[0]
            author_name = author_a_tag.text.strip()
            authors_list.append({'id': author_id, 'name': author_name, 'link': author_link})
    except Exception:
         authors_list=[]         
    return authors_list

In [196]:
def is_author_available(soup):
    try:
        existence = soup.select('.pull-left+ li span')[0].text
    except:
        existence = None
        logging.exception("This book has no author!")
    return existence


In [197]:
def get_book_attribute(soup):
    rows = soup.find('table', {'class': 'product-table'}).findAll('td')
    code = -1
    isbn = -1
    size = -1
    pages = -1
    per_cal = -1
    ad_cal = -1
    material = -1
    series = -1
    send_time = -1
    language = 'فارسی'
    code_flag = 0
    isbn_flag = 0
    size_flag = 0
    pages_flag = 0
    per_cal_flag = 0
    ad_cal_flag = 0
    material_flag = 0
    language_flag = 0
    series_flag = 0
    send_time_flag = 0

    for row in rows:
        text = row.text.strip()
        if code_flag == 1:
            code = int(text)
            code_flag = 0
        elif isbn_flag == 1:
            isbn = text
            isbn = re.sub('[^0-9-]', '', isbn)
            isbn_flag = 0
        elif size_flag == 1:
            size = text
            size_flag = 0
        elif pages_flag == 1:
            pages = int(text)
            pages_flag = 0
        elif per_cal_flag == 1:
            per_cal = int(text)
            per_cal_flag = 0
        elif ad_cal_flag == 1:
            ad_cal = int(text)
            ad_cal_flag = 0
        elif material_flag == 1:
            material = text
            material_flag = 0
        elif language_flag == 1:
            language = text
            language_flag = 0
        elif series_flag == 1:
            series = int(text)
            series_flag = 0
        elif send_time_flag == 1:
            send_time = text
            send_time_flag = 0

        if 'کد کتاب' in text:
            code_flag = 1
        elif 'شابک' in text:
            isbn_flag = 1
        elif 'قطع' in text:
            size_flag = 1
        elif 'تعداد صفحه' in text:
            pages_flag = 1
        elif 'سال انتشار شمسی' in text:
            per_cal_flag = 1
        elif 'سال انتشار میلادی' in text:
            ad_cal_flag = 1
        elif 'نوع جلد' in text:
            material_flag = 1
        elif 'زبان کتاب' in text:
            language_flag = 1
        elif 'سری چاپ' in text:
            series_flag = 1
        elif 'زودترین زمان ارسال' in text:
            send_time_flag = 1

    return [code, isbn, size, pages, per_cal, ad_cal, material, language, series, send_time]

In [198]:
def get_summary(soup):
    summary = soup.select('.product-description')[0].text.strip()
    return summary

In [199]:
def get_tags(soup):
    tags = soup.select('.product-tags-item')
    tags_list = []
    for tag in tags:
        tags_list += [tag.text.strip()]
    return tags_list

In [200]:
def get_book_detail(book_soup, site_index):
    book_fa_title = get_fa_title(book_soup)
    book_en_title = get_en_title(book_soup)
    book_price = get_price(book_soup)
    book_discount_percent = get_discount(book_soup)
    book_score = get_score(book_soup)
    book_publisher = get_publisher(book_soup)
    book_author = get_author(book_soup)
    book_author_presence = is_author_available(book_soup)

    [book_code, book_Isbn, book_size, book_pages, book_publication_per_date, book_publication_ad_date,
     book_cover_material,book_language, book_print_series, book_earliest_send_time] = get_book_attribute(book_soup)

    price_history_data_list.append({'book_id':int(book_code),'price':book_price,'discount':int(book_discount_percent),'date':str(datetime.today())})
    writer_page_data_list.extend(book_author)     #9th index is the writer column which is a list of writers
    publishers_data_list.append(book_publisher)  #8th column is dict of publisher
    writers_list_of_dict=book_author.copy()        #writes list which is a list of dictionary
    if len(writers_list_of_dict)==0:
        books_writers_data_list.append({'book_id':int(book_code),'writer_id':-1})
    else:
        for w_id in writers_list_of_dict:
            books_writers_data_list.append({'book_id':int(book_code),'writer_id':w_id['id']})
    book_publisher=book_publisher['id']               #convert 8th column from dict to the publisher's id
    book_data = [site_index, int(book_code), book_Isbn, book_fa_title, book_en_title,
                 book_score, book_publisher,
                 int(book_pages), int(book_publication_per_date), int(book_publication_ad_date), book_size,
                 book_cover_material,
                  book_language,int(book_print_series), book_earliest_send_time, book_author_presence]
    return book_data

In [201]:
def get_book_site_veneration(soup, site_index):
    div = soup.find('div', attrs = {'class':'col-md-6 col-xs-12'})
    try:
        english_bars = div.find_all('div', attrs = {'class':'english-bar ltr'})
        persian_bars = div.find_all('div', attrs = {'class':'persian-bar'})
        prise_writers = div.find_all('div', attrs = {'class':'prise-writer ltr'})
        
        n = len(english_bars)
        ven_lst = list()
        for i in range(n):
            english_quote = english_bars[i].text.strip()
            persian_quote = persian_bars[i].text.strip()
            prise_writer = prise_writers[i].text.strip()

            ven_dic = {'id':site_index,
                       'English_Quote': english_quote,
                       'Persian_Quote': persian_quote,
                       'Prise_Writer': prise_writer}
            ven_lst.append(ven_dic)
        return(ven_lst)
    except:
        return []

In [202]:
def get_book_site_summary(book_soup, site_index):
    try:
        book_summary = get_summary(book_soup)
    except Exception:
        book_summary = None
        logging.exception("This book has no summary!")
    return [site_index, book_summary]

In [203]:
def get_book_site_tags(book_soup, site_index):
    book_tags = get_tags(book_soup)
    book_tags_list = []
    for tag in book_tags:
        book_tags_list += [[site_index, tag]]
    return book_tags_list

In [204]:
def get_site_awards(soup, site_index):
    awards_list = []
    awards = soup.select('book_soup, site_index')

    for award in awards:
        print(award.text)
        awards += [award.text]

In [205]:
def get_req_list(list, req_count):
    if len(list) >= req_count:
        request_list = list[:req_count].copy()
    else:
        request_list = list.copy()
    return request_list

In [206]:
def scrape(site_soup):
    try:
        with lock:
            global site_index
            site_summary_data_list.append(get_book_site_summary(site_soup, site_index))
            site_tags_data_list.extend(get_book_site_tags(site_soup, site_index))
            site_page_books = site_soup.select('.clearfix .clearfix .row')
            for book_index in range(0, len(site_page_books), 2):
                data = get_book_detail(site_page_books[book_index], site_index)
                books_data_list.append(data)
            site_index += 1
    except Exception:
        logging.exception("An error occurred")

In [207]:
def fast_scrape(link):
    try:
        site_soup = get_soup(link)
        with lock:
            global site_index
            site_summary_data_list.append(get_book_site_summary(site_soup, site_index))
            site_tags_data_list.extend(get_book_site_tags(site_soup, site_index))
            book_veneration_data_list.extend(get_book_site_veneration(site_soup,site_index))
            site_page_books = site_soup.select('.clearfix .clearfix .row')
            for book_index in range(0, len(site_page_books), 2):
                data = get_book_detail(site_page_books[book_index], site_index)
                # price_history_data_list.append({'book_id':data[1],'price':data[5],'discount':data[6],'date':str(datetime.today())})
                # writer_page_data_list.extend(data[9])     #9th index is the writer column which is a list of writers
                # publishers_data_list.append(data[8])  #8th column is dict of publisher
                # data[8]=data[8]['id']                 #convert 8th column from dict to the publisher's id
                # writers_list_of_dict=data[9]          #writes list which is a list of dictionary
                # data.pop(9)                           #remove 9th column from data(9th column was writers)
                # data.pop(5)                           #remove 5th column which is price column
                # data.pop(5)                           #remove 5th column which is discount after deleting price
                # for w_id in writers_list_of_dict:
                #      books_writers_data_list.append({'book_id':data[1],'writer_id':w_id['id']})
                books_data_list.append(data)
            site_index += 1
    except Exception:
        logging.exception("An error occurred")


<h1>Detailed Scraper</h1>

In [208]:
links = get_links()[:200] + ['https://www.iranketab.ir/book/270-gone-with-the-wind']

page_response = []
books_data_list = []
site_tags_data_list = []
site_summary_data_list = []

site_index = 1
sleep_time = 0.5
max_threads = 20
book_count_request = 20  #number of requests per time

lock = threading.Lock()
book_urls = links.copy()

while len(book_urls):
    sleep(sleep_time)  #sleep so that the site does not ban us
    request_list = get_req_list(book_urls, book_count_request)  #list of book's urls we want to send request 
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
        future_list = executor.map(get_response, request_list)
        for future in future_list:
            try:
                data = future
                page_response.append(data)
            except Exception as exc:
                continue
        for item in page_response:
            if item.status_code == 200:
                page_url = item.url
                if page_url in request_list:
                    page_soup = bs4.BeautifulSoup(item.content, 'html.parser')
                    scrape(page_soup)
                    book_urls.remove(page_url)

<h1>Fast Scraper</h1>

In [209]:
links = get_links()[:500] #+ ['https://www.iranketab.ir/book/270-gone-with-the-wind']

books_data_list = []
site_tags_data_list = []
site_summary_data_list = []
writer_page_data_list=[]
publishers_data_list=[]
books_writers_data_list=[]
price_history_data_list=[]
book_veneration_data_list=[]

site_index = 1
max_threads = 20

lock = threading.Lock()

with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
    executor.map(fast_scrape, links)


ERROR:root:This book has no discount!
Traceback (most recent call last):
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_37076\1188541366.py", line 3, in get_discount
    discount_price = int(soup.select('.col-md-12+ .clearfix .price-special')[0].text.replace(',', ''))
IndexError: list index out of range
ERROR:root:This book has no summary!
Traceback (most recent call last):
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_37076\1156613770.py", line 3, in get_book_site_summary
    book_summary = get_summary(book_soup)
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_37076\3952702039.py", line 2, in get_summary
    summary = soup.select('.product-description')[0].text.strip()
IndexError: list index out of range
ERROR:root:This book has no summary!
Traceback (most recent call last):
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_37076\1156613770.py", line 3, in get_book_site_summary
    book_summary = get_summary(book_soup)
  File "C:\Users\Asus\AppData\Local\Temp\ipykernel_37

<h1>Check Completnes</h1>

In [210]:
if len(book_urls) == 0:
    print('All links scraped!')
else:
    print('Something wrong happened!',len(book_urls),)

Something wrong happened! Missed  11 links while scraping.


<h1>Make Dataframes</h1>

In [211]:
tableOfData = pd.DataFrame(books_data_list,
                           columns=['site_index', 'code', 'Isbn', 'fa_title', 'en_title', 'score',
                                    'publisher_id', 'pages', 'publication_per_date', 'publication_ad_date',
                                    'size', 'cover_material', 'print_series','language' ,'earliest_send_time', 'presence'])
tableOfData

Unnamed: 0,site_index,code,Isbn,fa_title,en_title,score,publisher_id,pages,publication_per_date,publication_ad_date,size,cover_material,print_series,language,earliest_send_time,presence
0,1,-1,978-9648014327,دفتر یادداشت ترکیبی,Notebook,3.39,1773,-1,-1,-1,پالتویی,-1,فارسی,-1,---,تمام شد ، اما میاریمش 😏
1,2,107977,978-9641721932,کتاب قاصدک ها در هوا ایستاده اند,Dandelions are standing in the air,3.18,1122,123,1401,-1,رقعی,شومیز,فارسی,1,6 مهر,موجود
2,3,101571,978-6007405529,کتاب نقدی بر مارکسیسم نواسمیتی,Criticism of Neo-Smithian Marxism,3.73,33,261,1399,-1,رقعی,شومیز,فارسی,2,5 مهر,موجود
3,4,41233,978-9644238291,کتاب زبان و ادب فارسی در قلمرو عثمانی,Persian language and literature in the Ottoma...,3.95,90,304,1396,-1,وزیری,زرکوب,فارسی,3,5 مهر,موجود
4,5,110534,978-6229262764,کتاب ترلان ارسباران,Tarlan Arasbaran,3.36,1389,648,1402,-1,رقعی,شومیز,فارسی,1,6 مهر,موجود
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,196,19734,978-6004623056,کتاب این کتاب عکس ندارد!,The Book with No Pictures,3.84,28,48,1400,2014,رحلی,شومیز,فارسی,2,5 مهر,موجود
211,197,34264,978-6008267355,کتاب جهان باکتری ها,The Surprising World of Bacteria with Max Axi...,3.3,2193,32,1395,2010,وزیری,شومیز,فارسی,2,5 مهر,موجود
212,198,15719,978-6229606506,کتاب زندگانی من و روزگار سخت,My Life and Hard Times,3.54,1142,116,1398,1933,رقعی,شومیز,فارسی,1,5 مهر,موجود
213,198,79410,978-6220108443,روزهای دشوار زندگی من,My Life and Hard Times,3.01,33,130,1401,1933,رقعی,شومیز,فارسی,1,5 مهر,موجود


In [212]:
table_of_cover_type=pd.DataFrame(tableOfData['cover_material'].drop_duplicates())
table_of_cover_type=table_of_cover_type[table_of_cover_type['cover_material']!=-1].reset_index(drop=True)
table_of_cover_type.to_csv('./cover_type.csv',encoding='utf-8')
table_of_cover_type

Unnamed: 0,cover_material
0,شومیز
1,زرکوب
2,جلد سخت
3,جلد نرم
4,سلفونی


In [213]:
table_of_format=pd.DataFrame(tableOfData['size'].drop_duplicates())
table_of_format=table_of_format[table_of_format['size']!=-1].reset_index(drop=True)
table_of_format.to_csv('./format.csv',encoding='utf-8')
table_of_format

Unnamed: 0,size
0,پالتویی
1,رقعی
2,وزیری
3,خشتی
4,رحلی
5,جیبی
6,بیاضی


In [214]:
def convert_size_to_int(size):
    if size=='وزیری':
        return 0
    elif size=='رقعی':
        return 1
    elif size=='جیبی':
        return 2
    elif size=='رحلی':
        return 3
    elif size=='پالتویی':
        return 4
    elif size=='خشتی':
        return 5
    elif size=='بیاضی':
        return 6
    else:
        return -1

tableOfData['size']=tableOfData['size'].apply(convert_size_to_int)

In [215]:
def convert_cover_type_to_int(material):
    if material=='شومیز':
        return 0
    elif material=='زرکوب':
        return 1
    elif material=='جلد سخت':
        return 2
    elif material=='جلد نرم':
        return 3
    elif material=='سلفونی':
        return 4
    else:
        return -1

tableOfData['cover_material']=tableOfData['cover_material'].apply(convert_cover_type_to_int)

In [216]:
tableOfData

Unnamed: 0,site_index,code,Isbn,fa_title,en_title,score,publisher_id,pages,publication_per_date,publication_ad_date,size,cover_material,print_series,language,earliest_send_time,presence
0,1,-1,978-9648014327,دفتر یادداشت ترکیبی,Notebook,3.39,1773,-1,-1,-1,4,-1,فارسی,-1,---,تمام شد ، اما میاریمش 😏
1,2,107977,978-9641721932,کتاب قاصدک ها در هوا ایستاده اند,Dandelions are standing in the air,3.18,1122,123,1401,-1,1,0,فارسی,1,6 مهر,موجود
2,3,101571,978-6007405529,کتاب نقدی بر مارکسیسم نواسمیتی,Criticism of Neo-Smithian Marxism,3.73,33,261,1399,-1,1,0,فارسی,2,5 مهر,موجود
3,4,41233,978-9644238291,کتاب زبان و ادب فارسی در قلمرو عثمانی,Persian language and literature in the Ottoma...,3.95,90,304,1396,-1,0,1,فارسی,3,5 مهر,موجود
4,5,110534,978-6229262764,کتاب ترلان ارسباران,Tarlan Arasbaran,3.36,1389,648,1402,-1,1,0,فارسی,1,6 مهر,موجود
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,196,19734,978-6004623056,کتاب این کتاب عکس ندارد!,The Book with No Pictures,3.84,28,48,1400,2014,3,0,فارسی,2,5 مهر,موجود
211,197,34264,978-6008267355,کتاب جهان باکتری ها,The Surprising World of Bacteria with Max Axi...,3.3,2193,32,1395,2010,0,0,فارسی,2,5 مهر,موجود
212,198,15719,978-6229606506,کتاب زندگانی من و روزگار سخت,My Life and Hard Times,3.54,1142,116,1398,1933,1,0,فارسی,1,5 مهر,موجود
213,198,79410,978-6220108443,روزهای دشوار زندگی من,My Life and Hard Times,3.01,33,130,1401,1933,1,0,فارسی,1,5 مهر,موجود


In [217]:
file_path = "bookData.csv"
tableOfData.to_csv(file_path, index=False, encoding='utf-8')

In [218]:
tableOfSummaryData = pd.DataFrame(site_summary_data_list, columns=['site_index', 'summary']).drop_duplicates(subset=['site_index','summary'])
tableOfSummaryData

Unnamed: 0,site_index,summary
0,1,دفتر يادداشت تركيبي پنگوئن:خط دار،بي خط،نقطه ا...
1,2,
2,3,از نظر برنر، می‌توان روایت پخته‌تر و منسجم‌تری...
3,4,"کتاب ""زبان و ادب فارسی در قلمرو عثمانی"" نوشته ..."
4,5,
...,...,...
195,194,در زمانی که بسیاری از پسران در بحران هستند ، ی...
196,195,
197,196,هشدار!خب بله، این کتاب بسیار بسیار بسیاااااااا...
198,197,با ابر دانشمند مکس آکسیوم، دیدار کنید او توانا...


In [219]:
file_path = "BookSummaryData.csv"
tableOfData.to_csv(file_path, index=False, encoding='utf-8')

In [220]:
tableOfSiteTagsData = pd.DataFrame(site_tags_data_list, columns=['site_index', 'tag']).drop_duplicates(subset=['site_index','tag'])
tableOfSiteTagsData

Unnamed: 0,site_index,tag
0,1,دفتر یادداشت
1,1,دفتر یادداشت نقطه ای
2,1,دفتر یادداشت خط دار
3,1,دفتر یادداشت بی خط
4,2,شعر
...,...,...
1054,198,داستان کمدی (طنز)
1055,198,خود زندگی نامه
1056,198,ادبیات واقع گرایانه
1057,198,دهه 1930 میلادی


In [221]:
file_path = "bookTagsData.csv"
tableOfData.to_csv(file_path, index=False, encoding='utf-8')

In [222]:
table_of_publisher=pd.DataFrame(publishers_data_list).drop_duplicates(subset=['id','name','link'])
table_of_publisher.to_csv('./publisher.csv',index=False,encoding='utf-8')
table_of_publisher

Unnamed: 0,id,name,link
0,1773,کارگاه فیلم و گرافیک سپاس,/publisher/1773-%da%a9%d8%a7%d8%b1%da%af%d8%a7...
1,1122,دنیای نو,/publisher/1122-%d8%af%d9%86%db%8c%d8%a7%db%8c...
2,33,نشر چشمه,/publisher/33-%d9%86%d8%b4%d8%b1-%da%86%d8%b4%...
3,90,اطلاعات,/publisher/90-%d8%a7%d8%b7%d9%84%d8%a7%d8%b9%d...
4,1389,آوای مهدیس,/publisher/1389-%d8%a2%d9%88%d8%a7%db%8c-%d9%8...
...,...,...,...
206,48,ققنوس,/publisher/48-%d9%82%d9%82%d9%86%d9%88%d8%b3
208,53,کتاب کوله پشتی,/publisher/53-%da%a9%d8%aa%d8%a7%d8%a8-%da%a9%...
209,85,نیلا,/publisher/85-%d9%86%db%8c%d9%84%d8%a7
211,2193,آوای روزان,/publisher/2193-%d8%a2%d9%88%d8%a7%db%8c-%d8%b...


In [223]:
table_of_writer_page=pd.DataFrame(writer_page_data_list).drop_duplicates(subset=['id','name','link'])
table_of_writer_page.to_csv('./writer_page.csv',index=False,encoding='utf-8')
table_of_writer_page

Unnamed: 0,id,name,link
0,63375,مهدی حسینی (مهد),/profile/63375-%d9%85%d9%87%d8%af%db%8c-%d8%ad...
1,12330,رابرت برنر,/profile/12330-robert-brenner
2,5839,محمد امین ریاحی,/profile/5839-mohammad-amin-riahi
3,4627,اعظم فرخزاد,/profile/4627-azam-farokhzad
4,43502,آدام گرینفیلد,/profile/43502-adam-greenfield
...,...,...,...
202,22621,مایکل سی ریچرت,/profile/22621-michael-c-reichert
203,22975,محمد رحمانیان,/profile/22975-%d9%85%d8%ad%d9%85%d8%af-%d8%b1...
204,9801,بی جی نواک,/profile/9801-b-j-novak
205,19773,انیسکا بسکاپ,/profile/19773-agnieszka-biskup


In [224]:
books_writers_data_list=list(filter(bool, books_writers_data_list))
table_of_writer=pd.DataFrame(books_writers_data_list).drop_duplicates(subset=['book_id','writer_id'])
table_of_writer.to_csv('./writer.csv',index=False,encoding='utf-8')
table_of_writer

Unnamed: 0,book_id,writer_id
0,-1,-1
1,107977,63375
2,101571,12330
3,41233,5839
4,110534,4627
...,...,...
218,19734,9801
219,34264,19773
220,15719,9113
221,79410,9113


In [225]:
table_of_price_history=pd.DataFrame(price_history_data_list).drop_duplicates(subset=['book_id','price','discount','date'])
table_of_price_history.to_csv('./price-history.csv',index=False,encoding='utf-8')
table_of_price_history

Unnamed: 0,book_id,price,discount,date
0,-1,175000,0,2023-09-25 00:09:01.195104
1,107977,69000,20,2023-09-25 00:09:02.329761
2,101571,52000,15,2023-09-25 00:09:04.810411
3,41233,22000,10,2023-09-25 00:09:06.243034
4,110534,380000,15,2023-09-25 00:09:06.378880
...,...,...,...,...
210,19734,49000,20,2023-09-25 00:09:44.718560
211,34264,35000,20,2023-09-25 00:09:44.819662
212,15719,35000,15,2023-09-25 00:09:44.918792
213,79410,60000,0,2023-09-25 00:09:44.932052


In [226]:
book_veneration_data_list=list(filter(bool, book_veneration_data_list))
table_of_book_veneration=pd.DataFrame(book_veneration_data_list).drop_duplicates(subset=['id','English_Quote','Persian_Quote','Prise_Writer'])
table_of_book_veneration.to_csv('./book_veneration.csv',index=False,encoding='utf-8')
table_of_book_veneration

Unnamed: 0,id,English_Quote,Persian_Quote,Prise_Writer
0,6,A tremendously intelligent and stylish book.,کتابی فوق العاده هوشمندانه و شیوا.,Guardian
1,6,A cautionary tale in which each chapter walks ...,داستانی هشداردهنده که هر فصل از آن، مخاطبین را...,Times Literary Supplement
2,6,A very valuable contribution to the discussion...,مشارکتی بسیار ارزشمند در بحث درباره این که آین...,Morning Star
3,18,"Beautifully written, intricately plotted.",با نثری زیبا و داستانی چندوجهی.,Barnes & Noble
4,18,"An extraordinary, gripping novel about bravery...",رمانی جذاب و فوق العاده درباره ی شجاعت، سوگ و ...,Goodreads
5,18,An intelligent and humane historical mystery.,یک داستان معمایی/تاریخی هوشمندانه و انسانی.,Wall Street Journal
6,25,"This detailed, interdisciplinary study is reco...",این مطالعه دقیق و بین رشته ای برای همه کتابخان...,Library Journal
7,59,[Paris] builds a nice plot and brings some ori...,پاریس طرح خوبی از داستان می سازد و اصالت جدیدی...,New York Times Book Review
8,59,Paris once again proves her suspense chops wit...,پاریس بار دیگر اثبات می کند که تعلیق های او با...,Library Journal
9,59,Outstanding Hitchcockian thriller...Paris play...,تریلری هیچکاکی و برجسته...پاریس با این پرداخت ...,Publishers Weekly
