In [None]:
from bs4 import BeautifulSoup
import re
import pyodbc
import openpyxl
import requests
import urllib
from urllib.parse import quote_plus as qp
from requests.utils import dict_from_cookiejar as dfc
import pandas as pd

connection_string = "Driver={SQL Server Native Client 11.0};Server=DESKTOP-R9VI2A2\SQLEXPRESS;Database=fedresurs;Trusted_Connection=yes;"
url1 = 'https://bankrot.fedresurs.ru/DebtorsSearch.aspx?attempt=1'
url2 ='https://bankrot.fedresurs.ru/DebtorsSearch.aspx'

class DatabaseWorker:
    def insert_in_debtor(self,debtor_firstname,debtor_middlename,debtor_lastname):
        sqlRequest = """INSERT INTO debtor (debtor_firstname,debtor_middlename,debtor_lastname,debtor_status)
                        VALUES(?,?,?,0)"""
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        dbCursor.execute(sqlRequest,debtor_firstname,debtor_middlename,debtor_lastname)
        connection.commit()
        connection.close()

    def insert_debtor_info(self,debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link):
        sqlRequest = """
        INSERT INTO debtorInfo (debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link)
        VALUES(?,?,?, ?,?,?, ?,?)"""
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        print("debtor_id: "+str(debtor_id))
        print("debtor_category: "+str(debtor_category))
        print("debtor_inn: "+str(debtor_inn))
        print("debtor_ogrnip: "+str(debtor_ogrnip))
        print("debtor_snils:" +str(debtor_snils))
        print("debtor_region: " + str(debtor_region))
        print("debtor_address: "+str(debtor_address))
        print("debtor_link:" +str(debtor_link))
        
        dbCursor.execute(sqlRequest,debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link)
        connection.commit()
        connection.close()

    def update_debtor(self,debtor_id):
        sqlRequest = """UPDATE debtor SET debtor_status=1 WHERE debtor_id=?"""
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        dbCursor.execute(sqlRequest, debtor_id)
        connection.commit()
        connection.close()
    
    def search_unprocess_debtors(self):
        sqlRequest = "select debtor_id, debtor_firstname, debtor_middlename, debtor_lastname from debtor where debtor_status=0"
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        dbCursor.execute(sqlRequest)
        cursorVocabulary = {}
        for row in dbCursor:
            cursorVocabulary[row.debtor_id] = [row.debtor_firstname, row.debtor_middlename, row.debtor_lastname]
        connection.commit()
        connection.close()
        return cursorVocabulary

class StringWorker:
    def search_debtor_link(self,td_with_link):
        result_url = "https://bankrot.fedresurs.ru"
        bs = BeautifulSoup(td_with_link,"html.parser")
        try:
            href = bs.findAll("a",href=True)[0]["href"]
            result_url += href
        except:
            print("При работе метода search_debtor_link произошла ошибка")
        finally:
            return result_url

    def preproc_line(self,line):
        line = re.sub("\n"," ",line)  #удаление переходов на новую строку
        line = re.sub("\t"," ",line) #удаление отступов (табуляции)
        line = re.sub("(\s+){2,}","",line) #удаление лишних пробелов
        return line

class Parser:
    def __init__(self):
        self.session = requests.Session()
        self.sw = StringWorker()
        self.db = DatabaseWorker()
        
    def write_debtor_info(self,debtor_id,response):
        soup = BeautifulSoup(response,"html.parser")
        searched_table = soup.find("table", attrs={ "id" : "ctl00_cphBody_gvDebtors"})
        
        soup2 = BeautifulSoup(str(searched_table),"html.parser")
        allTh = soup2.findAll("th")
        if len(allTh)>0:
            print("Всё ок")
            allTr = soup2.findAll("tr")
            for tr in allTr[1:]:
                debtor_category, debtor_link, debtor_region, debtor_address = "","","",""
                debtor_inn,debtor_ogrnip,debtor_snils = 0,0,0
                bs = BeautifulSoup(str(tr),"html.parser")
                allTd = bs.findAll("td")
                try:
                    debtor_category = self.sw.preproc_line(str(allTd[0].text)) # категория должника
                except:
                    debtor_category = "Category exists"
                try:
                    debtor_link = self.sw.search_debtor_link(str(allTd[1])) # ссылка на страницу с подробными данными должника
                except:
                    debtor_link = "Link exists"
                try:
                    debtor_inn = int(self.sw.preproc_line(str(allTd[2].text))) # ИНН должника
                except:
                    print("INN exists")
                try:
                    debtor_ogrnip = int(self.sw.preproc_line(str(allTd[3].text))) # ОРГНИП должника
                except:
                    print("ORGNIP exists")
                try:
                    debtor_snils = re.sub("\s","",str(allTd[4].text))
                    debtor_snils = int(re.sub("-","",debtor_snils))  # СНИЛС должника
                except:
                    print("SNILS exists")
                try:
                    debtor_region = self.sw.preproc_line(str(allTd[5].text)) # регион должника
                except:
                    debtor_region = "Region exists"
                try:
                    debtor_address = self.sw.preproc_line(str(allTd[6].text)) # полный адрес должника
                except:
                    debtor_address = "Address exists"
                self.db.insert_debtor_info(debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link)
                self.db.update_debtor(debtor_id)
        else:
            print("Пустая таблица")
            self.db.update_debtor(debtor_id)

    def write_debtors(self,filePath):
        wb = openpyxl.load_workbook(filePath,read_only=True)
        sheet = wb['Sheet1']
        consumers = []
        for i in range(2,sheet.max_row+1):
            try:
                cell = sheet.cell(row=i,column=3)
                consumers.append(cell.value)
            except:
                continue
        for consumer in consumers:
            FIO = consumer.split(" ")
            try:
                last_name = FIO[0]
                first_name = FIO[1]
                middle_name = FIO[2]
                print("Фамилия: "+last_name+" Имя: " + first_name+" Отчество: "+ middle_name)
                self.db.insert_in_debtor(first_name,middle_name,last_name)
            except:
                print("Произошла ошибка")
                continue
                
    def write_debtors(self,filePath):
        excel_data_df = pd.read_excel(filePath, sheet_name='Sheet1')
        consumers = excel_data_df['Табельный номер'].tolist()
        for consumer in consumers:
            FIO = consumer.split(" ")
            try:
                last_name = FIO[0]
                first_name = FIO[1]
                middle_name = FIO[2]
                print("Фамилия: "+last_name+" Имя: " + first_name+" Отчество: "+ middle_name)
                self.db.insert_in_debtor(first_name,middle_name,last_name)
            except:
                print("Произошла ошибка")
                continue
    """       
    def write_debtors(self,filePath):
        wb = openpyxl.load_workbook(filePath,read_only=True)
        sheet = wb['Sheet1']
        for i in range(2,sheet.max_row+1):
            try:
                cell= sheet.cell(row=i,column=3)
                FIO = str(cell.value).split(" ")
                last_name = FIO[0]
                first_name = FIO[1]
                middle_name = FIO[2]
                print("Фамилия: "+last_name+" Имя: " + first_name+" Отчество: "+ middle_name)
                self.db.insert_in_debtor(first_name,middle_name,last_name)
            except:
                print("Произошла ошибка")
                continue
    """     
    def search_debtors(self):
        debtors_list = self.db.search_unprocess_debtors()
        for key in debtors_list.keys():
            firstName = debtors_list[key][0]
            lastName = debtors_list[key][2]
            middleName = debtors_list[key][1]
            self.send_request(key,firstName, middleName, lastName)

    def send_request(self,debtor_id,first_name,middle_name,last_name):
        n_session = self.session.post(url1)
        session_dictionary = dfc(n_session.cookies)
        dict2 = {}
        debtorsearch = 'typeofsearch=Persons&orgname=&orgaddress=&orgregionid=&orgogrn=&orginn=&orgokpo=&OrgCategory=&prslastname='+qp(last_name)+'&prsfirstname='+qp(first_name)+'&prsmiddlename='+qp(middle_name)+'&prsaddress=&prsregionid=&prsinn=&prsogrn=&prssnils=&PrsCategory=&pagenumber=0'
        session_dictionary["debtorsearch"]=debtorsearch
        dict2["Cookie"] = "; ".join([str(x)+"="+str(y) for x,y in session_dictionary.items()])
        response = self.session.post(url2, headers=dict2)
        if response.status_code==200:
            self.write_debtor_info(debtor_id, str(response.text))
        else:
            print("Response code: "+str(response.status_code))
    
if __name__=="__main__":
    
    parser = Parser()
    print("Выберите желаемое действие:")
    print("1 - Считать имена сотрудников для проверки")
    print("2 - Произвести поиск сотрудников")
    choice = int(input())
    if choice==1:
        parser.write_debtors("debtors2.xlsx")
    elif choice==2:
        parser.search_debtors()
    else:
        print("Вы ввели неверный параметр")

In [None]:
from bs4 import BeautifulSoup
import re
import pyodbc
import openpyxl
import requests
import urllib
from urllib.parse import quote_plus as qp
from requests.utils import dict_from_cookiejar as dfc
import pandas as pd
import threading
from itertools import zip_longest
from math import ceil


connection_string = "Driver={SQL Server Native Client 11.0};Server=DESKTOP-R9VI2A2\SQLEXPRESS;Database=fedresurs;Trusted_Connection=yes;"
url1 = 'https://bankrot.fedresurs.ru/DebtorsSearch.aspx?attempt=1'
url2 ='https://bankrot.fedresurs.ru/DebtorsSearch.aspx'

class DatabaseWorker:
    def insert_in_debtor(self,debtor_firstname,debtor_middlename,debtor_lastname):
        sqlRequest = """INSERT INTO debtor (debtor_firstname,debtor_middlename,debtor_lastname,debtor_status)
                        VALUES(?,?,?,0)"""
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        dbCursor.execute(sqlRequest,debtor_firstname,debtor_middlename,debtor_lastname)
        connection.commit()
        connection.close()

    def insert_debtor_info(self,debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link):
        sqlRequest = """
        INSERT INTO debtorInfo (debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link)
        VALUES(?,?,?, ?,?,?, ?,?)"""
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        print("debtor_id: "+str(debtor_id))
        print("debtor_category: "+str(debtor_category))
        print("debtor_inn: "+str(debtor_inn))
        print("debtor_ogrnip: "+str(debtor_ogrnip))
        print("debtor_snils:" +str(debtor_snils))
        print("debtor_region: " + str(debtor_region))
        print("debtor_address: "+str(debtor_address))
        print("debtor_link:" +str(debtor_link))
        
        dbCursor.execute(sqlRequest,debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link)
        connection.commit()
        connection.close()

    def update_debtor(self,debtor_id):
        sqlRequest = """UPDATE debtor SET debtor_status=1 WHERE debtor_id=?"""
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        dbCursor.execute(sqlRequest, debtor_id)
        connection.commit()
        connection.close()
    
    def search_unprocess_debtors(self):
        sqlRequest = "select debtor_id, debtor_firstname, debtor_middlename, debtor_lastname from debtor where debtor_status=0"
        connection = pyodbc.connect(connection_string)
        dbCursor = connection.cursor()
        dbCursor.execute(sqlRequest)
        cursorVocabulary = {}
        for row in dbCursor:
            cursorVocabulary[row.debtor_id] = [row.debtor_firstname, row.debtor_middlename, row.debtor_lastname]
        connection.commit()
        connection.close()
        return cursorVocabulary

class StringWorker:
    def search_debtor_link(self,td_with_link):
        result_url = "https://bankrot.fedresurs.ru"
        bs = BeautifulSoup(td_with_link,"html.parser")
        try:
            href = bs.findAll("a",href=True)[0]["href"]
            result_url += href
        except:
            print("При работе метода search_debtor_link произошла ошибка")
        finally:
            return result_url

    def preproc_line(self,line):
        line = re.sub("\n"," ",line)  #удаление переходов на новую строку
        line = re.sub("\t"," ",line) #удаление отступов (табуляции)
        line = re.sub("(\s+){2,}","",line) #удаление лишних пробелов
        return line

class Parser:
    def __init__(self):
        self.session = requests.Session()
        self.sw = StringWorker()
        self.db = DatabaseWorker()
        
    def write_debtor_info(self,debtor_id,response):
        soup = BeautifulSoup(response,"html.parser")
        searched_table = soup.find("table", attrs={ "id" : "ctl00_cphBody_gvDebtors"})
        
        soup2 = BeautifulSoup(str(searched_table),"html.parser")
        allTh = soup2.findAll("th")
        if len(allTh)>0:
            print("Всё ок")
            allTr = soup2.findAll("tr")
            for tr in allTr[1:]:
                debtor_category, debtor_link, debtor_region, debtor_address = "","","",""
                debtor_inn,debtor_ogrnip,debtor_snils = 0,0,0
                bs = BeautifulSoup(str(tr),"html.parser")
                allTd = bs.findAll("td")
                try:
                    debtor_category = self.sw.preproc_line(str(allTd[0].text)) # категория должника
                except:
                    debtor_category = "Category exists"
                try:
                    debtor_link = self.sw.search_debtor_link(str(allTd[1])) # ссылка на страницу с подробными данными должника
                except:
                    debtor_link = "Link exists"
                try:
                    debtor_inn = int(self.sw.preproc_line(str(allTd[2].text))) # ИНН должника
                except:
                    print("INN exists")
                try:
                    debtor_ogrnip = int(self.sw.preproc_line(str(allTd[3].text))) # ОРГНИП должника
                except:
                    print("ORGNIP exists")
                try:
                    debtor_snils = re.sub("\s","",str(allTd[4].text))
                    debtor_snils = int(re.sub("-","",debtor_snils))  # СНИЛС должника
                except:
                    print("SNILS exists")
                try:
                    debtor_region = self.sw.preproc_line(str(allTd[5].text)) # регион должника
                except:
                    debtor_region = "Region exists"
                try:
                    debtor_address = self.sw.preproc_line(str(allTd[6].text)) # полный адрес должника
                except:
                    debtor_address = "Address exists"
                self.db.insert_debtor_info(debtor_id,debtor_category,debtor_inn,debtor_ogrnip,debtor_snils,debtor_region,debtor_address,debtor_link)
                self.db.update_debtor(debtor_id)
        else:
            print("Пустая таблица")
            self.db.update_debtor(debtor_id)

                
    def write_debtors(self,filePath):
        excel_data_df = pd.read_excel(filePath, sheet_name='Sheet1')
        consumers = excel_data_df['Табельный номер'].tolist()
        for consumer in consumers:
            FIO = consumer.split(" ")
            try:
                last_name = FIO[0]
                first_name = FIO[1]
                middle_name = FIO[2]
                print("Фамилия: "+last_name+" Имя: " + first_name+" Отчество: "+ middle_name)
                self.db.insert_in_debtor(first_name,middle_name,last_name)
            except:
                print("Произошла ошибка")
                continue
    
    def search_debtors(self):
        threads = []
        debtors_list = self.db.search_unprocess_debtors()
        chunks = [debtors_list.items()]*2
        g = list(dict(filter(None,v)) for v in zip_longest(*chunks))
        part_len = ceil(len(g)/8)
        parting_list = [g[part_len*k:part_len*(k+1)] for k in range(4)]
        for part in parting_list:
            threads.append(threading.Thread(target=self.fetch_requests, args=(part,)))
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()
    """
    def search_debtors(self):
        threads = []
        debtors_list = self.db.search_unprocess_debtors()
        #для разбиения готовых к обработке сотрудников на потоки
        chunks = [debtors_list.items()]*2
        g = list(dict(filter(None, v)) for v in zip_longest(*chunks))
        part_len = ceil(len(g)/4)
        parting_list = [g[part_len*k:part_len*(k+1)] for k in range(4)] #разбиваю на четыре части полученный список сотрудников
        threads.append(threading.Thread(target=self.fetch_requests, args=(parting_list[0],)))
        threads.append(threading.Thread(target=self.fetch_requests, args=(parting_list[1],)))
        threads.append(threading.Thread(target=self.fetch_requests, args=(parting_list[2],)))
        threads.append(threading.Thread(target=self.fetch_requests, args=(parting_list[3],)))
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()
    """     
    def fetch_requests(self,list_part):
        for word in list_part:
            for key in word.keys():
                firstName = word[key][0]
                lastName = word[key][2]
                middleName = word[key][1]
                self.send_request(key,firstName,middleName,lastName)
            
    def send_request(self,debtor_id,first_name,middle_name,last_name):
        n_session = self.session.post(url1)
        session_dictionary = dfc(n_session.cookies)
        dict2 = {}
        debtorsearch = 'typeofsearch=Persons&orgname=&orgaddress=&orgregionid=&orgogrn=&orginn=&orgokpo=&OrgCategory=&prslastname='+qp(last_name)+'&prsfirstname='+qp(first_name)+'&prsmiddlename='+qp(middle_name)+'&prsaddress=&prsregionid=&prsinn=&prsogrn=&prssnils=&PrsCategory=&pagenumber=0'
        session_dictionary["debtorsearch"]=debtorsearch
        dict2["Cookie"] = "; ".join([str(x)+"="+str(y) for x,y in session_dictionary.items()])
        response = self.session.post(url2, headers=dict2)
        if response.status_code==200:
            self.write_debtor_info(debtor_id, str(response.text))
        else:
            print("Response code: "+str(response.status_code))
    
if __name__=="__main__":
    
    parser = Parser()
    print("Выберите желаемое действие:")
    print("1 - Считать имена сотрудников для проверки")
    print("2 - Произвести поиск сотрудников")
    choice = int(input())
    if choice==1:
        parser.write_debtors("debtors2.xlsx")
    elif choice==2:
        parser.search_debtors()
    else:
        print("Вы ввели неверный параметр")

Выберите желаемое действие:
1 - Считать имена сотрудников для проверки
2 - Произвести поиск сотрудников
2
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Всё ок
ORGNIP exists
debtor_id: 104
debtor_category: Физическое лицо
debtor_inn: 744813942440
debtor_ogrnip: 0
debtor_snils:12727972889
debtor_region: Челябинская область
debtor_address: Челябинская область, г. Челябинск, пр. Победы, д. 382А, кв.24.
debtor_link:https://bankrot.fedresurs.ru/PrivatePersonCard.aspx?ID=D44432F073B051EA47447AA47309C66F
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пуст

Пустая таблица
Пустая таблица
Всё ок
ORGNIP exists
debtor_id: 26943
debtor_category: Физическое лицо
debtor_inn: 672904999045
debtor_ogrnip: 0
debtor_snils:3566264563
debtor_region: Смоленская область
debtor_address: г.Смоленск, ул.Комсомольская, д.75
debtor_link:https://bankrot.fedresurs.ru/PrivatePersonCard.aspx?ID=08E3C5B548DEF39BA1A4B76A30914669
Пустая таблица
Всё ок
ORGNIP exists
debtor_id: 53940
debtor_category: Физическое лицо
debtor_inn: 366218529079
debtor_ogrnip: 0
debtor_snils:7626637894
debtor_region: Воронежская область
debtor_address: 394077, г. Воронеж, ул. Хользунова, д. 64, кв. 26, к. 1
debtor_link:https://bankrot.fedresurs.ru/PrivatePersonCard.aspx?ID=9C291AB25E688E490314FE4463FD92A3
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Пустая таблица
Всё ок
ORGNIP exists
debtor_id: 26952
debtor_category: Физическое лицо
debtor_inn: 23401493866
debtor_ogrnip: 0
debtor_snils:7676384719
debtor_region: Республика Башкортостан
debtor_address: Респ. Башкортостан, Куш

In [None]:
s= {53692: ['Надежда', 'Вячеславовна', 'Алиева']}
value = s.keys()
print(s[int(value)])