In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pyodbc
from urllib.parse import urljoin
from datetime import datetime, timedelta
import re
import os


In [2]:
url = "https://www.arabbank.com.jo/"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")


In [3]:
folder_name = "Arab_Bank_images"
os.makedirs(folder_name, exist_ok=True)


slides = soup.find_all("div", class_="swiper-slide")


In [4]:
data = []

for slide in slides:
    img_tag = slide.find("img")
    
    if img_tag:
        img_src = img_tag.get("src", "").strip()
        img_alt = img_tag.get("alt", "").strip()
        
        parent_a = img_tag.find_parent("a")
        article_link = ""
        if parent_a and parent_a.has_attr("href"):
            article_link = parent_a["href"].strip()
            if not article_link.startswith("http"):
                article_link = urljoin(url, article_link)  
        
        if img_src:
            img_name = img_alt.replace(" ", "_").replace("/", "_") or "no_name"
            img_extension = img_src.split(".")[-1].split("?")[0]
            img_filename = f"{img_name}.{img_extension}"
            img_path = os.path.join(folder_name, img_filename)
            
            full_img_url = img_src
            if not full_img_url.startswith("http"):
                full_img_url = urljoin(url, img_src)
            
            try:
                img_response = requests.get(full_img_url)
                img_response.raise_for_status()
                with open(img_path, "wb") as f:
                    f.write(img_response.content)
                print(f"✔️ تم تحميل الصورة: {img_filename}")
            except Exception as e:
                print(f"❌ فشل تحميل الصورة: {full_img_url}, السبب: {e}")
            
            # إضافة البيانات
            data.append({
                "Bank_Name": "Arab Bank",
                "Image_Name": img_alt,
                "Image_Link": full_img_url,
                "Article_Link": article_link  # رابط الخبر الجديد
            })


✔️ تم تحميل الصورة: Artboard_1_copy_12.png
✔️ تم تحميل الصورة: Artboard_1_copy_6.png
✔️ تم تحميل الصورة: Artboard_1_copy_14.png
✔️ تم تحميل الصورة: Artboard_1_copy_10.png
✔️ تم تحميل الصورة: Artboard_1_copy_4.png
✔️ تم تحميل الصورة: Bank_of_the_year-04.jpg
✔️ تم تحميل الصورة: Bank_of_the_year.jpg


In [5]:
df = pd.DataFrame(data)
df

Unnamed: 0,Bank_Name,Image_Name,Image_Link,Article_Link
0,Arab Bank,Artboard 1 copy 12,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/mainmenu/home/Cons...
1,Arab Bank,Artboard 1 copy 6,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/mainmenu/home/Cons...
2,Arab Bank,Artboard 1 copy 14,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/mainmenu/home/sme-...
3,Arab Bank,Artboard 1 copy 10,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/mainmenu/home/Cons...
4,Arab Bank,Artboard 1 copy 4,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/mainmenu/home/corp...
5,Arab Bank,Bank of the year-04,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/
6,Arab Bank,Bank of the year,https://www.arabbank.com.jo/images/default-sou...,https://www.arabbank.com.jo/


In [7]:
import pyodbc,  sys 

conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};'

                      'SERVER=.\\SQLEXPRESS;'

                      'DATABASE=BankScraping;'

                      'Trusted_Connection=yes;'
                      
                      'Encrypt=no')
cursor = conn.cursor()


In [8]:
# # يرست الداتا سيت

# cursor.execute("TRUNCATE TABLE BankScraping")
# conn.commit()

In [9]:
for index, row in df.iterrows():
    cursor.execute(
        """
        INSERT INTO BankScraping (BankName, ImageName, ImageLink, ArticleLink, ArticleTitle, Content)
        VALUES (?, ?, ?, ?, ?, ?)
        """,
        (
            row.get('Bank_Name'),      
            row.get('Image_Name'),        
            row.get('Image_Link'),       
            row.get('Article_Link'),      
            row.get('Article_Title', None),  
            row.get('Content', None)         
        )
    )

conn.commit()
