In [None]:
import sqlite3
from datetime import datetime
from os import getenv, path
from threading import Event

from bs4 import BeautifulSoup
from dotenv import load_dotenv
from requests import get, exceptions
from unidecode import unidecode

load_dotenv()
url_list_advertises = getenv('URL_ADDRESS')
date_database = str(datetime.today().date()).replace('-', '_')
database_path = f'databases/data_{date_database}.db'


if not path.exists(database_path):
    with sqlite3.connect(database_path) as conn:
        cursor = conn.cursor()
        cursor.execute("""CREATE TABLE advert
                          (Date BLOB, Name TEXT, District TEXT, Street TEXT, Price TEXT, Surface TEXT,
                           Number_of_rooms TEXT, Floor TEXT, Year_of_construction TEXT, Id_offer TEXT,
                           Date_of_publish TEXT, Distinction_date TEXT, Actualisation_date TEXT, Views TEXT,
                           Heating_type_information TEXT, Building_height TEXT, Additional_information TEXT)
                           """)


def getting_last_page():
    f_page = get(url_list_advertises)
    f_bs = BeautifulSoup(f_page.content, "html.parser")

    for page_step in f_bs.find_all('li', class_='page-step'):
        last_page_string = str(page_step.findChildren()[2]).split(' ')[1].split('=')[1].lstrip('"').rstrip('"')
        return int(last_page_string)


def url_together(url: str):
    for single_page in range(getting_last_page() + 1):
        yield url + f'?strona={single_page}'


counter = 0
for i in url_together(url_list_advertises):
    try:
        page = get(i)
        bs = BeautifulSoup(page.content, "html.parser")
        figure_of_offers = int(bs.find('h1', class_='title').findChild('span').get_text().strip().strip('()'))
        all_offers_in_page = bs.find_all('div', class_='list__item__wrap__content')

        for offers_content in all_offers_in_page:
            current_time = datetime.now().isoformat(' ', 'seconds')
            start_time = datetime.now()
            counter += 1

            # print('*' * 100)
            # Title is required but in the location only the city is mandatory,
            # the rest of elements are optional (district; street number) on the page.
            title = offers_content.find('h2', class_='list__item__content__title').get_text().lstrip().rstrip()
            sub_title_location = offers_content.find(
                                                     'p', class_='list__item__content__subtitle'
                                                     ).get_text()
            sub_title_location = sub_title_location.split(',') if (sub_title_location.find(',')
                                                                   != -1) else str(sub_title_location + ',').split(',')

            if (sub_title_location[0] != 'Gdynia') and (sub_title_location[0] != 'gdynia'):

                district = ' '.join(elem for elem in sub_title_location[0].replace(',', '').split(' ')[1:])
                street = sub_title_location[1].lstrip(' ') if len(sub_title_location[1]) > 0 else 'None'
                # print(f'Title:{title}')
                # print(f'District: {district} Street:{street}')
            else:
                district = 'None'
                street = 'None'
                # print(f'Title:{title}')
                # print(f'District: {district} Street:{street}')

            # Price is required on the page.
            for price_detail in offers_content.find_all('div', class_='list__item__price'):
                price_name = price_detail.find('div', class_='list__item__price__label').get_text()
                price_value = price_detail.find('p').get_text()

                # print(f'{price_name}: {price_value}')

            # The surface, number of rooms, floor, year of built block of flats are optional on the page
            details_from_front_page_dict = {
                                            'Powierzchnia': 'None',
                                            'Liczba pokoi': 'None',
                                            'Pietro': 'None',
                                            'Rok budowy': 'None'
                                            }
            for details in offers_content.find_all('div', class_='list__item__details__icons__wrap'):

                details_element_name = details.find('p', class_='list__item__details__icons__element__title').get_text()
                details_element_name = unidecode(details_element_name)
                details_element_value = details.find('p', class_='list__item__details__icons__element__desc').get_text()

                details_from_front_page_dict[details_element_name] = details_element_value
                # print(f'{details_element_name} {details_element_value}')

            # print('-' * 40)
            try:
                # Here the application is going to go to a single offer and take data from the page.
                url_to_offer = offers_content.find('a')['href']
                bs_single_offer = BeautifulSoup(get(url_to_offer).content, "html.parser")

                # The number of offer is required.
                # The date of favour is optional on the page.
                # The date of publishing the offer is always on the page.
                # The date of actualization is optional on the page.
                details_from_single_offer_dict = {
                                                  'Data wyroznienia': 'None',
                                                  'Aktualizacja': 'None'
                                                  }
                for id_date_and_displays in bs_single_offer.find('ul', class_='oglStats').find_all_next('li'):

                    if unidecode(id_date_and_displays.get_text().split(':')[0].split(' ')[-1]) == 'ogloszenia':
                        # Offer ID
                        offer_id = id_date_and_displays.get_text().split(':')

                        offer_name = unidecode(offer_id[0])
                        offer_number = offer_id[1]

                        details_from_single_offer_dict.update({offer_name: offer_number})
                        # print(f'{offer_name}: {offer_number}')

                    elif unidecode(id_date_and_displays.get_text().split(':')[0].split(' ')[-1]) == 'wyroznienia':
                        # Date of distinction
                        name_and_date_of_distinction = id_date_and_displays.get_text().split(':')

                        name_of_distinction = unidecode(name_and_date_of_distinction[0])
                        date_of_distinction = unidecode(name_and_date_of_distinction[1])

                        details_from_single_offer_dict[name_of_distinction] = date_of_distinction
                        # print(f'{name_of_distinction}: {date_of_distinction}')

                    elif id_date_and_displays.get_text().split(':')[0].split(' ')[-1] == 'wprowadzenia':
                        # Date of publish
                        name_and_date_of_placing = id_date_and_displays.get_text().split(':')

                        name_of_placing = unidecode(name_and_date_of_placing[0])
                        date_of_placing = unidecode(name_and_date_of_placing[1])

                        details_from_single_offer_dict.update({name_of_placing: date_of_placing})
                        # print(f'{name_of_placing}: {date_of_placing}')

                    elif str(id_date_and_displays.get_text().split(':')[0]).lower() == 'aktualizacja':
                        # Date of actualisation
                        actualisation_name_and_date = id_date_and_displays.get_text().split(':')

                        actualisation_name = actualisation_name_and_date[0]
                        actualisation_date = unidecode(actualisation_name_and_date[1])

                        details_from_single_offer_dict[actualisation_name] = actualisation_date
                        # print(f'{actualisation_name}: {actualisation_date}')

                    elif unidecode(str(id_date_and_displays.get_text().split(':')[0]).lower()) == 'wyswietlen':
                        # Views
                        views_name_and_figure = id_date_and_displays.get_text().split(':')

                        views_name = unidecode(views_name_and_figure[0])
                        views_figure = views_name_and_figure[1]

                        # print(f'{views_name}: {views_figure}')

                type_of_heating = bs_single_offer.find_all('div', class_='oglField oglField--typ_ogrzewania')
                building_height = bs_single_offer.find_all('div', class_='oglField oglField--l_pieter')
                additional_information = bs_single_offer.find_all('div', class_='oglField oglField--array')

                extra_information_dict = {
                                          'Typ ogrzewania': 'None',
                                          'Liczba pieter w budynku': 'None',
                                          'Dodatkowe informacje': ''
                                         }
                if type_of_heating:
                    for heating_detail in type_of_heating:
                        heat_detail_name = unidecode(heating_detail.find('div', class_='oglField__name').get_text())
                        heat_detail_type = heating_detail.find('span', class_='oglField__value').get_text()

                        extra_information_dict[heat_detail_name] = heat_detail_type
                        # print(f'{heat_detail_name}: {heat_detail_type}')

                if building_height:
                    for building_high in building_height:
                        building_high_name = unidecode(building_high.find('div', class_='oglField__name').get_text())
                        building_high_value = building_high.find('span', class_='oglField__value').get_text()

                        extra_information_dict[building_high_name] = building_high_value
                        # print(f'{building_high_name}: {building_high_value}')

                if additional_information:
                    for extra_detail_array in additional_information:
                        extra_detail_array_name = unidecode(extra_detail_array.find(
                                                                                    'div', class_='oglField__name'
                                                                                   ).get_text())
                        array_detail_string = ', '.join(
                                                        item.get_text() for item in
                                                        extra_detail_array.find_all('li', class_='oglFieldList__item')
                                                       )

                        extra_information_dict[extra_detail_array_name] = array_detail_string
                        # print(f'{extra_detail_array_name}: {array_detail_string}')

                with sqlite3.connect(database_path) as conn:
                    cursor = conn.cursor()
                    cursor.execute("INSERT INTO advert VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                                   (current_time, title, district, street, price_value,
                                    details_from_front_page_dict['Powierzchnia'],
                                    details_from_front_page_dict['Liczba pokoi'],
                                    details_from_front_page_dict['Pietro'],
                                    details_from_front_page_dict['Rok budowy'],
                                    details_from_single_offer_dict['Numer ogloszenia'],
                                    details_from_single_offer_dict['Data wprowadzenia'],
                                    details_from_single_offer_dict['Data wyroznienia'],
                                    details_from_single_offer_dict['Aktualizacja'],
                                    views_figure,
                                    extra_information_dict['Typ ogrzewania'],
                                    extra_information_dict['Liczba pieter w budynku'],
                                    extra_information_dict['Dodatkowe informacje']
                                    ))
                    conn.commit()
                Event().wait(28)
                roughly_to_end = (datetime.now() - start_time) * (figure_of_offers - counter)
                print(str(roughly_to_end).split('.')[0])

            except exceptions.RequestException as err:
                with open('./nested_loop_error.log', 'a+') as file:
                    file.write(f'Time: {datetime.now()}.Counter: {counter}. Error: {err}\n')
                all_offers_in_page.insert(0, all_offers_in_page[all_offers_in_page.index(offers_content)])
                Event().wait(300)
                counter -= 1
                pass

            except Exception as e:
                with open('./nested_loop_general_error.log', 'a+') as file:
                    file.write(f'Time: {datetime.now()}. Counter: {counter}. Error: {e}\n')
                Event().wait(300)
                counter -= 1
                pass

    except Exception as e:
        with open('./general_error.log', 'a+') as file:
            file.write(f'Time: {datetime.now()}. Error: {e}\n')
        Event().wait(600)
        pass

print('The end at ' + str(datetime.now()))
