<a href="https://colab.research.google.com/github/EkeminiImeOtu/100daysofpython/blob/main/Hitta_Sample_Details.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

def fetch_data_from_api(api_url):
    try:
        response = requests.get(api_url, timeout=10)  # Adding a timeout for the request
        data = response.json()

        company_info = data['result']['companies']['company'][0]
        phone_number_one = None
        phone_number_two = None

        if 'phone' in company_info:
            phone_info = company_info['phone'][0]  # Assuming we're interested in the first phone entry
            phone_number_one = phone_info.get('displayAs')
            phone_number_two = phone_info.get('callTo')

        return api_url, phone_number_one, phone_number_two
    except Exception as e:
        print(f"Error processing {api_url}: {e}")
        return api_url, None, None

def process_urls_from_excel(file_path):
    df = pd.read_excel(file_path, usecols=["urls"])
    api_base_url = "https://api.hitta.se/search/v7/web/company/"

    # Preparing a list to store the data including original URLs
    tasks = [(url, f"{api_base_url}{url.rsplit('/', 1)[-1]}") for url in df['urls']]

    data = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(fetch_data_from_api, task[1]) for task in tasks]

        for future, task in tqdm(zip(as_completed(futures), tasks), total=len(tasks), desc="Fetching data"):
            _, phone_number_one, phone_number_two = future.result()
            original_url, api_url = task
            data.append({
                "original_url": original_url,
                "api_url": api_url,
                "phone_number_one": phone_number_one,
                "phone_number_two": phone_number_two
            })

    result_df = pd.DataFrame(data)
    return result_df

# Example usage
file_path = "hitta_list.xlsx"  # Update this to the path of your Excel file
result_df = process_urls_from_excel(file_path)

# Optionally, save the DataFrame to a new Excel file
result_df.to_excel("hitta_sample_details_data.xlsx", index=False)
print("Data extraction complete and saved to hitta_sample_details_data.xlsx.")


Fetching data: 100%|███████████████████████████████████████████████████████████████████| 50/50 [00:07<00:00,  6.85it/s]

Data extraction complete and saved to hitta_sample_details_data.xlsx.



