In [1]:
import pandas as pd
from etl.help_extract_top_banks import extract
from etl.help_transform_top_banks import transform
from etl.help_load_top_banks import load_to_csv, load_to_db
from etl.query_banks_db import run_query
from sqlite_connection_manager.manage_sql_connection import execute_using_sql_connection

## **Initialize Preliminary inputs**

In [2]:
data_url = "https://web.archive.org/web/20230908091635 /https://en.wikipedia.org/wiki/List_of_largest_banks"
exchange_rates_path = "../exchange_rates/exchange_rate.csv"
table_name = "Largest_banks"
column_names = [
        "Rank",
        "Bank_name",
        "Market_cap_(USD_Billion)",
    ]

top_banks_df = pd.DataFrame(
    columns=column_names
).astype({"Rank": str,
        "Bank_name": str,
        "Market_cap_(USD_Billion)": float,})

The following ETL process extracts, transforms, and loads data on the world's top banks based on their market capitalization. Each stage of the process is logged in `logs/code_log` for tracking and debugging.

## **Extract Top Banks Table To DataFrame**

In [3]:
top_banks_df = extract(data_url, top_banks_df)
top_banks_df

Unnamed: 0,Rank,Bank_name,Market_cap_(USD_Billion)
0,1,JPMorgan Chase,432.92
1,2,Bank of America,231.52
2,3,Industrial and Commercial Bank of China,194.56
3,4,Agricultural Bank of China,160.68
4,5,HDFC Bank,157.91
5,6,Wells Fargo,155.87
6,7,HSBC,148.9
7,8,Morgan Stanley,140.83
8,9,China Construction Bank,139.82
9,10,Bank of China,136.81


## **Transform Top Banks DataFrame**

In [4]:
top_banks_df = transform(top_banks_df, exchange_rates_path)
top_banks_df

Unnamed: 0,Rank,Bank_name,Market_cap_(USD_Billion),MC_GBP_Billion,MC_EUR_Billion,MC_INR_Billion
0,1,JPMorgan Chase,432.92,346.34,402.62,35910.71
1,2,Bank of America,231.52,185.22,215.31,19204.58
2,3,Industrial and Commercial Bank of China,194.56,155.65,180.94,16138.75
3,4,Agricultural Bank of China,160.68,128.54,149.43,13328.41
4,5,HDFC Bank,157.91,126.33,146.86,13098.63
5,6,Wells Fargo,155.87,124.7,144.96,12929.42
6,7,HSBC,148.9,119.12,138.48,12351.26
7,8,Morgan Stanley,140.83,112.66,130.97,11681.85
8,9,China Construction Bank,139.82,111.86,130.03,11598.07
9,10,Bank of China,136.81,109.45,127.23,11348.39


## **Load Top Banks**

In [5]:
load_to_csv(top_banks_df, "../largest_banks/largest_banks_data.csv")
execute_using_sql_connection(load_to_db, top_banks_df, table_name)

- `largest_banks_data.csv` was created using `load_to_csv()`. It contains a list of top banks along with their market capitalization. To view the file, navigate to 📂 `largest_banks/largest_banks_data.csv` in the project's root directory.

- `Banks.db` was created using `load_to_db()`. It is a database storing top banks and their market capitalization. To access the database, open 📂 `banks_database/Banks.db` in the project's root directory.

## **Execute Queries**

In [6]:
query_output = execute_using_sql_connection(run_query, "SELECT * FROM Largest_banks")
column_names = [""] + column_names + ["MC_GBP_Billion","MC_EUR_Billion","MC_INR_Billion"]
output_df = pd.DataFrame(query_output, columns=column_names).set_index("")

output_df

Unnamed: 0,Rank,Bank_name,Market_cap_(USD_Billion),MC_GBP_Billion,MC_EUR_Billion,MC_INR_Billion
,,,,,,
0.0,1.0,JPMorgan Chase,432.92,346.34,402.62,35910.71
1.0,2.0,Bank of America,231.52,185.22,215.31,19204.58
2.0,3.0,Industrial and Commercial Bank of China,194.56,155.65,180.94,16138.75
3.0,4.0,Agricultural Bank of China,160.68,128.54,149.43,13328.41
4.0,5.0,HDFC Bank,157.91,126.33,146.86,13098.63
5.0,6.0,Wells Fargo,155.87,124.7,144.96,12929.42
6.0,7.0,HSBC,148.9,119.12,138.48,12351.26
7.0,8.0,Morgan Stanley,140.83,112.66,130.97,11681.85
8.0,9.0,China Construction Bank,139.82,111.86,130.03,11598.07


In [7]:
query_output_1 = execute_using_sql_connection(run_query, "SELECT AVG(MC_GBP_Billion) FROM Largest_banks")
f"Average GBP Billion: {query_output_1[0][0]}" 

'Average GBP Billion: 151.98700000000002'

In [8]:
query_output_2 = execute_using_sql_connection(run_query, "SELECT Bank_name FROM Largest_banks LIMIT 5")
query_output_2

[('JPMorgan Chase',),
 ('Bank of America',),
 ('Industrial and Commercial Bank of China',),
 ('Agricultural Bank of China',),
 ('HDFC Bank',)]