In [1]:
import requests
from bs4 import BeautifulSoup
import json

In [2]:
def scrape_awash_faqs(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, 'html.parser')
        
       
        accordion = soup.find('div', class_='eael-adv-accordion')
        
        if not accordion:
            print("Could not find FAQ accordion container")
            return []
            
        faq_list = []
        
        
        faq_items = accordion.find_all('div', class_='eael-accordion-list')
        
        for item in faq_items:
            try:
                
                question_div = item.find('span', class_='eael-accordion-tab-title')
                question = question_div.get_text(strip=True) if question_div else ""
                
                answer_div = item.find('div', class_='eael-accordion-content')
                answer = ""
                
                if answer_div:
                    
                    for element in answer_div.find_all(['p', 'ul', 'li']):
                        if element.name == 'p':
                            answer += element.get_text(strip=True) + "\n"
                        elif element.name == 'ul':
                            for li in element.find_all('li'):
                                answer += f"• {li.get_text(strip=True)}\n"
                        elif element.name == 'li':
                            answer += f"• {element.get_text(strip=True)}\n"
                
                if question and answer:
                    faq_list.append({
                        "instruction": "Answer the following banking question",
                        "input": question,
                        "output": answer.strip(),
                        "source": url,
                        "category": "Awash Bank FAQ"
                    })
                    
            except Exception as e:
                print(f"Error processing item: {str(e)}")
                continue
                
        return faq_list
        
    except Exception as e:
        print(f"Scraping failed: {str(e)}")
        return []
url = "https://www.awashbank.com/faq"
faqs = scrape_awash_faqs(url)


if faqs:
    with open('awash_bank_faqs.json', 'w', encoding='utf-8') as f:
        json.dump(faqs, f, indent=2, ensure_ascii=False)
    
    print(f"Successfully scraped {len(faqs)} FAQs")
    print("Sample FAQ:")
    print(json.dumps(faqs[0], indent=2, ensure_ascii=False))
else:
    print("No FAQs found. Please check the URL or website structure.")

Successfully scraped 30 FAQs
Sample FAQ:
{
  "instruction": "Answer the following banking question",
  "input": "What are loyalty points and how can I convert loyalty point to airtime charge?",
  "output": "• Loyalty points are points awarded to customers per transaction using Awashbirr App or USSD. These points will be converted into airtime charge when you reach a minimum of 20 points.\n• Loyalty points are points awarded to customers per transaction using Awashbirr App or USSD. These points will be converted into airtime charge when you reach a minimum of 20 points.",
  "source": "https://www.awashbank.com/faq",
  "category": "Awash Bank FAQ"
}
