Scraping Part

In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from tqdm import tqdm
import datetime
import pytz

In [2]:
class BookScraper:
    def __init__(self):
        self.collections = [] # List to store collection URLs
        self.all_books_url = [] # List to store URLs of all books
        self.all_books_data = [] # List to store data of all books
        self.df = None # DataFrame to store book data

    def scrape_collections(self, url):
        # Scrape collection URLs from the main page
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')

        for i in soup.find_all(href=True):
            if i['href'].startswith('/collection/knigi-') or i['href'].startswith('/collection/books'):
                self.collections.append('https://alinino.az/' + i['href'])

    def scrape_collection_books(self, collection_url):
        # Scrape book URLs from a collection page
        r = requests.get(collection_url)
        soup = BeautifulSoup(r.text, 'html.parser')

        category_div = soup.find_all('div', {'class': 'product-card-image'})

        for i in category_div:
            self.all_books_url.append(('https://alinino.az' + i.find(href=True)['href']).strip())

    def scrape_book(self, news_url):
        #Scrape data of an individual book
        result_dict = {}
        r = requests.get(news_url)
        soup = BeautifulSoup(r.text, 'html.parser')

        product_title = soup.find('h1', attrs={'itemprop': 'name'}).text

        product_price = soup.find('div', attrs={'class': 'price js-product-price on-page'}).text
        product_price = product_price.split('\xa0')[0] + ' ' + product_price.split('\xa0')[1]

        try:
            author = ((soup.find_all('div', attrs={'class': 'properties_item'})[1]).get_text(strip=True)).split("Müəllif:")[1]
        except:
            author = '----'

        try:
            publishing = ((soup.find_all('div', attrs={'class': 'properties_item'})[0]).get_text(strip=True)).split(":")[1]
        except:
            publishing = '----'

        result_dict = {"Kitabın adı": product_title,
                       "Müəllif": author,
                       "Nəşriyyat": publishing,
                       "Qiymət": product_price}
        return result_dict

    def scrape_all_books(self):
        # Scrape data of all books from the collection URLs
        for i in self.collections:
            self.scrape_collection_books(i)

        for i in tqdm(self.all_books_url):
            try:
                self.all_books_data.append(self.scrape_book(i))
            except:
                pass

    def generate_dataframe(self):
        # Generate a DataFrame from the scraped book data
        self.df = pd.DataFrame(self.all_books_data)
        self.df["Stok"] = np.random.randint(0, 18, size=len(self.df))
        self.df["ID"] = np.random.choice(range(100000, 150000), size=len(self.df), replace=False)
        self.df.set_index('ID', inplace=True)


In [3]:
scraper = BookScraper()

In [4]:
scraper.scrape_collections('https://alinino.az/#')
scraper.scrape_all_books()
scraper.generate_dataframe()
df_all_books = scraper.df

100%|██████████| 210/210 [05:05<00:00,  1.45s/it]


In [5]:
df_all_books.to_csv('list_of_books.csv')

In [6]:
df_all_books

Unnamed: 0_level_0,Kitabın adı,Müəllif,Nəşriyyat,Qiymət,Stok
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
108319,Evdə və məktəbdə addım-addım montessori təlimi,İrem Savcı Köroğlu,Qanun Nəşriyyatı,8.79 AZN,8
111414,Zamanın sınağında,----,Cəmil Həsənli,9.50 AZN,2
145358,İnsan nədir?,Mark Tven,Qanun Nəşriyyatı,3.19 AZN,13
122619,Açıq cəmiyyət və onun düşmənləri ( I cild ),Karl Raymund Popper,Qanun Nəşriyyatı,14.39 AZN,12
130238,Yol,Kormak Makkarti,Qanun Nəşriyyatı,9.59 AZN,5
...,...,...,...,...,...
143099,Fanny- French,----,Marcel Pagnol,14.48 AZN,7
129601,La fille du train- French,----,PAULA HAWKINS,24 AZN,9
115256,La Nausee - French,----,Jean-Paul Sartre,17.28 AZN,11
124046,LA FILLE DE LA CORNE D OR - French,----,Kurban Said,17.20 AZN,0


Main part

In [14]:
class Bookstore:
    """
    A class representing a bookstore.
    This class manages the inventory and sales operations of the bookstore.
    It provides methods for adding books, generating sales reports, and
    maintaining the stock.
    """
    def __init__(self):
        """
        Initialize the Bookstore object.
        Load the bookstore inventory from a CSV file and set the index to book ID.
        Initialize an empty DataFrame for the sales report.
        Initialize empty lists and strings for book and count information.
        """
        self.bookstore_data = pd.read_csv("list_of_books.csv")
        self.bookstore_data.set_index('ID', inplace=True)
    
        self.df_sales_report = pd.DataFrame(columns=['Date', 'Book Name', 'Count', "Price", 'Total Price'])

        self.book_enough_id = []
        self.book_enough_count = []

        self.book_not_enough_id = []
        self.book_not_enough_count =[]

        self.not_found = []
        self.book_string = ''
        self.count_string = ''
        self.book_not_found = ''

    def add_book(self, id, count):
        """
        Add books to the inventory.
        Args:
        id (int): ID of the book.
        count (int): Number of books to add.
        """
        self.bookstore_data.loc[id, 'Stok'] += count
        print(f"Kitab əlavə edildi. Artıq bu kitabdan {self.bookstore_data.loc[id, 'Stok']} ədəd vardır.")

    def delete_book(self, id):
        """
        Delete a book from the inventory.
        Args:
            id (int): ID of the book.
        """
        if id not in self.bookstore_data.index:
            print("Kitab tapılmadı.")
        else:
            self.bookstore_data = self.bookstore_data.drop(id)
            print("Kitab silindi.")

    def reset_var(self):
        """
        Reset the variables used for book tracking.
        """
        self.book_enough_id = []
        self.book_enough_count = []
        self.book_not_enough_id = []
        self.book_not_enough_count =[]

        self.not_found = []
        self.book_string = ''
        self.count_string = ''
        self.book_not_found = ''

    def sell_book(self):
        """
        Sell books and generate sales report.
        """
        book_ids = list(map(int,input("ID'ləri daxil edin (boşluq ilə ayırın): ").split()))
        book_num = list(map(int,input("Sayları daxil edin (boşluq ilə ayırın): ").split()))
        self.not_found = [id for id in book_ids if id not in self.bookstore_data.index ]

        for id in range(len(book_ids)): #user's preferences
            if book_ids[id] in self.bookstore_data.index:#if they match with our products
                if self.bookstore_data.loc[book_ids[id], 'Stok']==0:
                    self.not_found+=[book_ids[id]]
                elif self.bookstore_data.loc[book_ids[id], 'Stok'] < book_num[id]:#Supply<Demand=>group them like "not enough"
                    self.book_not_enough_id += [book_ids[id]] 
                    self.book_not_enough_count += [self.bookstore_data.loc[book_ids[id], 'Stok']]
                else:
                    #Supply>=Demand=>group them like "enough"
                    self.book_enough_id += [book_ids[id]]
                    self.book_enough_count +=[book_num[id]]

        if len(self.not_found)>0:
            self.book_not_found = ' '.join(map(str, self.not_found))
            print(f"Təəssüf ki, {self.book_not_found} kitabları tapılmadı.")

        if len(self.book_not_enough_id)>0:
            self.book_string = ' '.join(map(str, self.book_not_enough_id))
            self.count_string = ' '.join(map(str, self.book_not_enough_count))
            print(f"Təəssüf ki, {self.book_string} kitablarından {self.count_string} sayda qalıb.")

            if input(f"Sayı az olan {self.book_string} id-li kitabları almaq istəyirsinizmi?") == "+":
                self.book_enough_id += self.book_not_enough_id
                self.book_enough_count += self.book_not_enough_count

        if len(self.book_enough_id)>0:
            if input("Uyğun olan kitabları almaq üçün sifarişi təsdiq etmək istəyirsinizmi?") == "+":
                self.sales_report()
                self.receipt()
        else:
            self.reset_var()
            print("Sifariş ləğv olundu!")


    def receipt(self):    
        """
        Generate a receipt for the sold books.
        """
        local_tz = pytz.timezone('Asia/Baku')
        current_date = datetime.datetime.now(local_tz)
        formatted_datetime = current_date.strftime("%Y-%m-%d %I:%M %p")

        self.bookstore_data.loc[self.book_enough_id, 'Stok'] -= self.book_enough_count

        print('-'*60)
        print('elEGant Book Store')
        print('30 Khojali Avenue, Baku, Azerbaijan')
        print('-'*60)
        print(f'Date: {formatted_datetime}')
        print('Receipt Number: 00123')
        print('-'*60)

        category2= self.book_enough_id   #will be the names of the books"
        category3= self.book_enough_count  #count of each
        category4=[float(str(self.bookstore_data.loc[i, 'Qiymət']).split('AZN')[0]) for i in self.book_enough_id ]  #price of each
        category5=[i*j for i,j in zip(category3,category4)]  #count*price of each 

        a1="Item";a2="Quantity";a3="Price";a4="Subtotal"
        print(f"{a1}{' '*5}\t{a2}{' '*10}\t{a3}{' '*10}\t{a4}") #header

        for a1, a2, a3, a4 in zip(category2,category3,category4,category5):
            print(f"{a1}{' '*10}\t{a2}{' '*10}\t{round(a3,2)}{' '*5}\t{round(a4,2)}")
        print()
        print(f"Subtotal:{' '*46}{round(sum(category5),2)}")
        print(f"Tax:     {' '*47}{round(sum(category5)*0.07,2)}")
        print('-'*60)

        print(f"Grand Total:{' '*42}{round(sum(category5)*0.93,2)}")
        print('-'*60)

        self.reset_var()


    def sales_report(self):
        """
        Generate a sales report for the sold books.
        """
        local_tz = pytz.timezone('Asia/Baku')
        current_date = datetime.datetime.now(local_tz)
        formatted_datetime = current_date.strftime("%Y-%m-%d %I:%M %p")

        book_name = self.book_enough_id  # will be the names of the books
        book_count = self.book_enough_count  # count of each
        book_price = [float(str(self.bookstore_data.loc[i, 'Qiymət']).split('AZN')[0]) for i in self.book_enough_id]  # price of each
        total_price = [i * j for i, j in zip(book_count, book_price)]  # count * price of each

        report_data = {
            "Date": [formatted_datetime] * len(self.book_enough_id),
            "Book Name": book_name,
            "Count": book_count,
            "Price": book_price,
            "Total Price": total_price}
        new_df = pd.DataFrame(report_data)

        self.df_sales_report = pd.concat([self.df_sales_report, new_df], ignore_index=True)


    def return_df(self):
        """
        Return the bookstore inventory DataFrame.
        """
        return self.bookstore_data

    def return_sales_report(self):
        """
        Generate and return the sales report DataFrame.
        """
        self.df_sales_report.to_csv("sales_report.csv", index=False)
        print("Sales report generated successfully!")
        return self.df_sales_report

    def alert_system(self):
        """
        Check for books with low stock and return the alert data.
        """
        alert_data = self.bookstore_data[self.bookstore_data['Stok']<5]
        return alert_data


In [15]:
bookstore=Bookstore()

In [18]:
bookstore.add_book(145358,15)

Kitab əlavə edildi. Artıq bu kitabdan 28 ədəd vardır.


In [20]:
bookstore.delete_book(145358)

Kitab silindi.


In [21]:
bookstore.alert_system()

Unnamed: 0_level_0,Kitabın adı,Müəllif,Nəşriyyat,Qiymət,Stok
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
111414,Zamanın sınağında,----,Cəmil Həsənli,9.50 AZN,2
125611,Qara qu quşu,Nassim Nikolas Taleb,Qanun Nəşriyyatı,12.79 AZN,0
109013,Məsumluğumu itirərkən,Riçard Branson,Qanun Nəşriyyatı,16.79 AZN,4
114648,Gənc şairin dilemması,Cəlil Cavanşir,Qanun Nəşriyyatı,3.99 AZN,1
103021,Uşaqla necə ünsiyyət saxlamalı,Səidə İkigai,Qanun Nəşriyyatı,8 AZN,4
130036,Yadlar ölkəsi,Leyla Slimani,Qanun Nəşriyyatı,10.39 AZN,2
123759,Müdriklik məbədi,Əlisa Nicat,Qanun Nəşriyyatı,31.99 AZN,0
149477,Lokla 90 dəqiqə,Pol Stretern,Qanun Nəşriyyatı,3.99 AZN,0
128523,Konsulun köpəyi,Şərəf Yılmaz,Qanun Nəşriyyatı,6.39 AZN,0
110104,Kərim Xan Zənd,Məhbubə Tehrani,Qanun Nəşriyyatı,9.59 AZN,2


In [22]:
bookstore.return_df()

Unnamed: 0_level_0,Kitabın adı,Müəllif,Nəşriyyat,Qiymət,Stok
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
108319,Evdə və məktəbdə addım-addım montessori təlimi,İrem Savcı Köroğlu,Qanun Nəşriyyatı,8.79 AZN,8
111414,Zamanın sınağında,----,Cəmil Həsənli,9.50 AZN,2
122619,Açıq cəmiyyət və onun düşmənləri ( I cild ),Karl Raymund Popper,Qanun Nəşriyyatı,14.39 AZN,12
130238,Yol,Kormak Makkarti,Qanun Nəşriyyatı,9.59 AZN,5
131649,Ronaldonu al!,Mario Leo və Aleks Fon Kuçkovski,Qanun Nəşriyyatı,11.99 AZN,16
...,...,...,...,...,...
143099,Fanny- French,----,Marcel Pagnol,14.48 AZN,7
129601,La fille du train- French,----,PAULA HAWKINS,24 AZN,9
115256,La Nausee - French,----,Jean-Paul Sartre,17.28 AZN,11
124046,LA FILLE DE LA CORNE D OR - French,----,Kurban Said,17.20 AZN,0


In [23]:
bookstore.sell_book()

ID'ləri daxil edin (boşluq ilə ayırın): 122619 143099
Sayları daxil edin (boşluq ilə ayırın): 15 5
Təəssüf ki, 122619 kitablarından 12 sayda qalıb.
Sayı az olan 122619 id-li kitabları almaq istəyirsinizmi?+
Uyğun olan kitabları almaq üçün sifarişi təsdiq etmək istəyirsinizmi?+
------------------------------------------------------------
elEGant Book Store
30 Khojali Avenue, Baku, Azerbaijan
------------------------------------------------------------
Date: 2023-05-25 12:43 PM
Receipt Number: 00123
------------------------------------------------------------
Item     	Quantity          	Price          	Subtotal
143099          	5          	14.48     	72.4
122619          	12          	14.39     	172.68

Subtotal:                                              245.08
Tax:                                                    17.16
------------------------------------------------------------
Grand Total:                                          227.92
------------------------------------------

In [24]:
bookstore.return_sales_report()

Sales report generated successfully!


Unnamed: 0,Date,Book Name,Count,Price,Total Price
0,2023-05-25 12:43 PM,143099,5,14.48,72.4
1,2023-05-25 12:43 PM,122619,12,14.39,172.68


In [25]:
bookstore.sell_book()

ID'ləri daxil edin (boşluq ilə ayırın): 111414 145358
Sayları daxil edin (boşluq ilə ayırın): 2 4
Təəssüf ki, 145358 kitabları tapılmadı.
Uyğun olan kitabları almaq üçün sifarişi təsdiq etmək istəyirsinizmi?+
------------------------------------------------------------
elEGant Book Store
30 Khojali Avenue, Baku, Azerbaijan
------------------------------------------------------------
Date: 2023-05-25 12:44 PM
Receipt Number: 00123
------------------------------------------------------------
Item     	Quantity          	Price          	Subtotal
111414          	2          	9.5     	19.0

Subtotal:                                              19.0
Tax:                                                    1.33
------------------------------------------------------------
Grand Total:                                          17.67
------------------------------------------------------------


In [26]:
bookstore.return_sales_report()

Sales report generated successfully!


Unnamed: 0,Date,Book Name,Count,Price,Total Price
0,2023-05-25 12:43 PM,143099,5,14.48,72.4
1,2023-05-25 12:43 PM,122619,12,14.39,172.68
2,2023-05-25 12:44 PM,111414,2,9.5,19.0
