# Another practice Mode

In [1]:
# Import all the relevant packages 

import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import requests
import numpy as np
import sqlite3

## Defind all url and file path including Database

In [3]:
url = 'https://web.archive.org/web/20230908091635 /https://en.wikipedia.org/wiki/List_of_largest_banks'
table_attribute = ['Name','MC_USD_Billion']
ftable_attribute =['Name','MC_USD_Billion','MC_GBP_Billion','MC_EUR_Billion','MC_INR_Billion']
target_file = './Largest_bank_data.csv'
db_name = 'Banks.db'
table_name='Largest_banks'
log_file = 'code_log.txt'

rate_file = pd.read_csv('./exchange_rate.csv')

exchange_rate = rate_file.set_index('Currency').to_dict()['Rate']



#### create a function to handle the progress logs

In [4]:
def log_progress(message):
    timestamp_format = '%y-%y-%d-%H:%M:%S'
    now = datetime.now()
    timestamp = now.strftime(timestamp_format)

    with open(log_file, 'a') as f:
        f.write(timestamp + "," + message + "\n")

### Extract the data from the web

In [5]:
def extract(url, table_attribute):
    page = requests.get(url).text
    html = BeautifulSoup(page, "html.parser")
    df = pd.DataFrame(columns=table_attribute)
    tables = html.find_all("tbody")
    rows = tables[1].find_all('tr')
    for row in rows:
        col = row.find_all('td')
        if col:
            name = str(col[1].find_all('a')[0].text)
            mc_usd_billion_str = col[2].contents[0].strip().replace('\n', '')
            mc_usd_billion_str = mc_usd_billion_str.replace(',','')
            data_dic = {'Name':name,
                       'MC_USD_Billion':float(mc_usd_billion_str) }
            df1 = pd.DataFrame(data_dic, index=[0])
            df = pd.concat([df,df1],ignore_index=True)    
    
    return df


#### transform the data

In [6]:
def transform(df, csv_path):
    df['MC_GBP_Billion'] = [np.round(x*exchange_rate['GBP'],2) for x in df['MC_USD_Billion']]
    df['MC_EUR_Billion'] = [np.round(x*exchange_rate['EUR'],2) for x in df['MC_USD_Billion']]
    df['MC_INR_Billion'] = [np.round(x*exchange_rate['INR'],2) for x in df['MC_USD_Billion']]

    return df

#### Load CSV

In [7]:
def load_to_csv(df, output_path):
    df.to_csv(output_path)

#### Load to Db

In [8]:
def load_to_db(df, sql_connection, table_name):
    df.to_sql(table_name, sql_connection, if_exists='replace', index=False)
    

### Run Query

In [9]:
def run_query(query_statement, sql_connection):
    print(query_statement)
    query_output = pd.read_sql(query_statement,sql_connection)
    print(query_output)

**Mk_Lateef**