In [1]:
import requests
from bs4 import BeautifulSoup
import json
import time
import random

In [2]:
def get_headers():
    return {
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'Referer': 'https://www.google.com/',
        'DNT': '1'
    }

In [3]:
def scrape_telebirr_faqs(url):
    try:
        session = requests.Session()
        
       
        response = session.get(url, headers=get_headers())
        
        if response.status_code != 200:
            print(f"Failed to fetch page. Status code: {response.status_code}")
            return []

        soup = BeautifulSoup(response.text, 'html.parser')
        
        
        accordion_lists = soup.find_all('div', class_='eael-accordion-list')
        
        if not accordion_lists:
            print("No accordion items found. The page structure may have changed.")
            return []

        faqs = []
        
        for item in accordion_lists:
            try:
                
                question_element = item.find('div', class_='elementor-tab-title')
                question = question_element.find('span').get_text(strip=True) if question_element else "No question found"
                
               
                answer_element = item.find('div', class_='eael-accordion-content')
                answer = ""
                
                if answer_element:
                    
                    for content in answer_element.find_all(['p', 'ul', 'div']):
                        if content.name == 'p':
                            text = content.get_text(strip=True)
                            if text:  
                                answer += text + "\n"
                        elif content.name == 'ul':
                            for li in content.find_all('li'):
                                answer += f"• {li.get_text(strip=True)}\n"
                        elif content.name == 'div' and 'mm-accordion-list' in content.get('class', []):
                            for li in content.find_all('li'):
                                answer += f"• {li.get_text(strip=True)}\n"
                
                if question and answer:
                    faqs.append({
                        "question": question.strip(),
                        "answer": answer.strip(),
                        "source": "Telebirr",
                        "Language" : "English"

                    })
                    
            except Exception as e:
                print(f"Error processing accordion item: {str(e)}")
                continue
                
        return faqs

    except Exception as e:
        print(f"Scraping failed: {str(e)}")
        return []

if __name__ == "__main__":
    url = "https://www.ethiotelecom.et/telebirr/" 
    faqs = scrape_telebirr_faqs(url)
    
    if faqs:
        with open('telebirr_faqs.jsonl', 'w', encoding='utf-8') as f:
            json.dump(faqs, f, indent=2, ensure_ascii=False)
        
        print(f"Successfully scraped {len(faqs)} FAQs")
        print("Sample FAQ:")
        print(json.dumps(faqs[0], indent=2, ensure_ascii=False))
    else:
        print("No FAQs found. Please check the URL or website structure.")

Successfully scraped 26 FAQs
Sample FAQ:
{
  "question": "What is telebirr service?",
  "answer": "telebirr is a digital cash that enables you access a variety of financial services at ease, with convenience at the tip of your fingers using your mobile phone without the need to have a bank account.It enables cashless payments, which reduces dependency on cash and allows tracking of transaction records. This increases financial security and reduces inherent risks of cash handling such as loss, theft or fraud.telebirr platform is accessible via most basic mobile phones.It helps you purchase goods & services online\n• telebirr is a digital cash that enables you access a variety of financial services at ease, with convenience at the tip of your fingers using your mobile phone without the need to have a bank account.\n• It enables cashless payments, which reduces dependency on cash and allows tracking of transaction records. This increases financial security and reduces inherent risks of ca