In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extract_text(tag):
    return tag.text.strip() if tag else ''

def scrape_data_from_child_url(child_url):
    response = requests.get(child_url)
    all_data = []
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Scraping info
        company_divs = soup.find_all('div', class_='col_3 clfix')
        for company_div in company_divs:
            company_name_div = company_div.find('div', class_='col col1ergebnis')
            company_name = company_name_div.find('strong').text.strip() if company_name_div else ''
            
            booth_div = company_div.find('div', class_='col advertP col3ergebnis')
            company_booth = booth_div.text.strip() if booth_div else ''
            
            data = {
                'Exhibitor Name': company_name,
                'Exhibitor Booth': company_booth
            }
            all_data.append(data)
        
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
    
    #print(f"Scraped {len(all_data)} entries from {child_url}")  # Add this line for debugging
    return all_data


def main():
    parent_url = 'https://www.imm-cologne.com/imm-cologne-exhibitors/list-of-exhibitors/'
    child_urls = []

    for page_number in range(1, 38):
        start_value = (page_number - 1) * 20
        page_url = f"{parent_url}?route=aussteller/blaettern&&start={start_value}&paginatevalues=%7B%22stichwort%22%3A%22%22%2C%22suchart%22%3A%22alle%22%7D"
        child_urls.append(page_url)
    
    all_data = []
    for child_url in child_urls:
        data = scrape_data_from_child_url(child_url)
        if data:  # Check if data is not empty
            all_data.extend(data)
    
    if all_data:  # Check if all_data is not empty
        df = pd.DataFrame(all_data)
        return df
    else:
        print("No data scraped.")
        return pd.DataFrame()  # Return an empty DataFrame

# Example usage
result_df = main()
print(result_df)



                                    Exhibitor Name   Exhibitor Booth
0                                      Freund GmbH  Hall 10.1 | F042
1                                Interaktive Kunst   Hall 4.2 | B016
2                         E.S. Kluft & Company LLC   Hall 9.1 | B023
3           Christine Kröncke Interior Design GmbH   Hall 4.2 | B028
4                1 DÜNYA YATAK SAN. TIC. LTD. STI.   Hall 9.1 | A034
..                                             ...               ...
724  Zhengzhou Ledrem Network Technology Co., Ltd.   Hall 8.1 | B043
725                  ZHONGLONG FURNITURE CO., LTD.  Hall 7.1 | C060a
726  ZIBO LIYAN HOME FURNISHING TECHNOLOGY CO.,LTD   Hall 8.1 | C056
727                                  Zijlstra B.V.   Hall 7.1 | D030
728       Zoy Overseas Furniture (Zhejiang) Co,Ltd  Hall 10.1 | H009

[729 rows x 2 columns]


In [14]:
#result_df.to_excel('G:\ONI-5\Final\Final_Booth.xlsx', index=False)