In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from datetime import datetime

In [2]:
def log_progress(message):
    with open("code_log.txt", "a") as f:
        f.write(f"{datetime.now()} : {message}\n")

In [4]:
def extract(url):
    page = requests.get(url).text
    soup = BeautifulSoup(page, "html.parser")
    
    # استخراج الجدول
    table = soup.find("table", {"class": "wikitable"})
    df = pd.read_html(str(table))[0]
    
    # اختيار الأعمدة المطلوبة فقط
    df = df[["Bank name", "Market cap (US$ billion)"]]
    df.columns = ["Bank name", "MC_USD_Billion"]
    
    # ناخد أول 10 بنوك
    df = df.head(10)
    
    log_progress("Data extraction completed")
    return df


In [5]:
def transform(df, exchange_rate_path):
    rates = pd.read_csv("exchange_rate.csv")
    rate_dict = dict(zip(rates['Currency'], rates['Rate']))
    
    df["MC_GBP_Billion"] = (df["MC_USD_Billion"] * rate_dict["GBP"]).round(2)
    df["MC_EUR_Billion"] = (df["MC_USD_Billion"] * rate_dict["EUR"]).round(2)
    df["MC_INR_Billion"] = (df["MC_USD_Billion"] * rate_dict["INR"]).round(2)
    
    log_progress("Data transformation completed")
    return df

In [6]:
def load_to_csv(df, csv_path):
    df.to_csv(csv_path, index=False)
    log_progress(f"Data saved to CSV at {csv_path}")

In [7]:
def load_to_db(df, db_name, table_name):
    conn = sqlite3.connect(db_name)
    df.to_sql(table_name, conn, if_exists="replace", index=False)
    conn.close()
    log_progress(f"Data saved to Database {db_name}, table {table_name}")

In [8]:
def main():
    url = "https://web.archive.org/web/20230908091635 /https://en.wikipedia.org/wiki/List_of_largest_banks"
    exchange_rate_path = "exchange_rate.csv"
    csv_path = "./Largest_banks_data.csv"
    db_name = "Banks.db"
    table_name = "Largest_banks"
    
    log_progress("ETL process started")
    
    df_extracted = extract(url)
    df_transformed = transform(df_extracted, "exchange_rate.csv")
    load_to_csv(df_transformed, csv_path)
    load_to_db(df_transformed, db_name, table_name)
    
    log_progress("ETL process completed")


if __name__ == "__main__":
    main()

NameError: name 'df_extracted' is not defined