In [1]:
import requests
import json
from bs4 import BeautifulSoup
session = requests.session()

In [2]:
RETRY_COUNT = 1

In [3]:
class IndianMedicalRegistry:
    
    def __init__(self,registrationNo,year):
        
        self.year = year
        self.registrationNo = registrationNo
        self.session = None
        self.user_agent = None
    
    def home_page(self):
        
        url_link = "https://www.nmc.org.in/information-desk/indian-medical-register/"
        headers = {
            
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
        }
        response = session.get(url_link,headers = headers)
        if response.status_code ==200:
            return response
        else:
            return "No Response"
        
    def parse_with_lxml(self,home_page_response):
        soup = BeautifulSoup(home_page_response.text,'lxml')
        return soup
    
    
    def get_table_headings(self,parsed_data):
        table_data = parsed_data.find("table",class_ = "table table-bordered")
        theadings = []
        for th in table_data.find_all('th'):
            theadings.append(th.text.strip())
        
        return theadings
            
    def fetch_data(self,registrationNo,year):
        url_link = f"https://www.nmc.org.in/MCIRest/open/getPaginatedData?service=getPaginatedDoctor&draw=1&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bname%5D=&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=2&columns%5B2%5D%5Bname%5D=&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=3&columns%5B3%5D%5Bname%5D=&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=4&columns%5B4%5D%5Bname%5D=&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=5&columns%5B5%5D%5Bname%5D=&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=6&columns%5B6%5D%5Bname%5D=&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=0&order%5B0%5D%5Bdir%5D=asc&start=0&length=500&search%5Bvalue%5D=&search%5Bregex%5D=false&name=&registrationNo={registrationNo}&smcId=1&year={year}&_=1679142838480"
        headers = {

            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
            "Referer": "https://www.nmc.org.in/information-desk/indian-medical-register/",
            "Host": "www.nmc.org.in",
            "X-Requested-With": "XMLHttpRequest"
        }
        payload = {
            "smcId": "1",
            "registrationNo":registrationNo,
            "year":year,
            "_": "1679142838480",
            "start": "0",
            "length": "500",
            "search[value]":"",
            "search[regex]": "false",
            "name":"",
            "service": "getPaginatedDoctor",
            "draw": "1",
        }

        response = session.get(url_link,headers=headers,data=payload)
        return response
    
    def extract_data(self,second_soup,theadings):
        
        json_data = second_soup.p.text
        data = json.loads(json_data)
        output = []
        
        for d in data["data"]:
            mapped_d = dict(zip(theadings,d))
            output.append(mapped_d)
        
        return json.dumps(output,indent=4)
        

    def get_output(self):
        
        for retry_num in range(RETRY_COUNT):
            
            
            self.session = requests.session()
            
            #calling the home page function and store the response in the 'home_page_response' var
            home_page_response = self.home_page()
            
            #after getting the response from site parse it with lxml by calling the parse_with_lxml func
            parsed_data = self.parse_with_lxml(home_page_response)
            
            #get the table headings from the parsed data
            theadings = self.get_table_headings(parsed_data)
            
            #now call the post request to fetch the data of that reg number based on the year 
            #here we pass the reg number and year as param
            second_response = self.fetch_data(self.registrationNo,self.year)
            
            #similarly parse the second response with lxml
            second_soup = self.parse_with_lxml(second_response)
            
            #get the data from the parsed html
            output = self.extract_data(second_soup,theadings)
            
            return output

In [4]:
if __name__ == '__main__':
    print(IndianMedicalRegistry(123,1960).get_output())

[
    {
        "Sl. No.": 1,
        "Year of Info": 1960,
        "Registration Number": "1123",
        "State Medical Councils": "Andhra Pradesh Medical Council",
        "Name": "Owk Ankanna",
        "Father Name": "O P Ankanna",
        "Action": "View"
    },
    {
        "Sl. No.": 2,
        "Year of Info": 1960,
        "Registration Number": "123",
        "State Medical Councils": "Andhra Pradesh Medical Council",
        "Name": "Sripaurapudilly Mallikarjuna Rao",
        "Father Name": "S Venkatarttanam",
        "Action": "View"
    },
    {
        "Sl. No.": 3,
        "Year of Info": 1960,
        "Registration Number": "1230",
        "State Medical Councils": "Andhra Pradesh Medical Council",
        "Name": "Injamuri Ranga Rao",
        "Father Name": "I Hanumaiah",
        "Action": "View"
    },
    {
        "Sl. No.": 4,
        "Year of Info": 1960,
        "Registration Number": "1231",
        "State Medical Councils": "Andhra Pradesh Medical Council",
   