<a href="https://colab.research.google.com/github/CUOTR/SQL-BANKING/blob/main/TRANSACTIONS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PREPARE DATA

In [98]:
import sqlite3
import os

# Thay 'bank.db' bằng tên file thực tế của bạn
db_path = 'bank.db'

# Xóa file cũ nếu tồn tại
if os.path.exists(db_path):
    os.remove(db_path)
    print("Đã xóa database cũ.")

# Kết nối lại → SQLite sẽ tự tạo file mới
conn = sqlite3.connect(db_path)
print("Database mới đã được tạo.")

Đã xóa database cũ.
Database mới đã được tạo.


In [99]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('/content/bank.db')
cursor = conn.cursor()

cursor.execute("DROP TABLE IF EXISTS addresses")
cursor.execute("""
CREATE TABLE addresses (
    AddressID INT PRIMARY KEY,
    Street VARCHAR(100),
    City VARCHAR(100),
    Country VARCHAR(100)
);
""")

cursor.execute("DROP TABLE IF EXISTS customer_types")
cursor.execute("""
CREATE TABLE customer_types (
    CustomerTypeID INT PRIMARY KEY,
    TypeName VARCHAR(50)
);
""")

cursor.execute("DROP TABLE IF EXISTS customers")
cursor.execute("""
CREATE TABLE customers (
    CustomerID INT PRIMARY KEY,
    FirstName VARCHAR(50),
    LastName VARCHAR(50),
    DateOfBirth DATE,
    AddressID INT,
    CustomerTypeID INT,
    FOREIGN KEY (AddressID) REFERENCES addresses(AddressID),
    FOREIGN KEY (CustomerTypeID) REFERENCES customer_types(CustomerTypeID)
);
""")

cursor.execute("DROP TABLE IF EXISTS account_types")
cursor.execute("""
CREATE TABLE account_types (
    AccountTypeID INT PRIMARY KEY,
    TypeName VARCHAR(50)
);
""")

cursor.execute("DROP TABLE IF EXISTS account_statuses")
cursor.execute("""
CREATE TABLE account_statuses (
    AccountStatusID INT PRIMARY KEY,
    StatusName VARCHAR(50)
);
""")

cursor.execute("DROP TABLE IF EXISTS accounts")
cursor.execute("""
CREATE TABLE accounts (
    AccountID INT PRIMARY KEY,
    CustomerID INT,
    AccountTypeID INT,
    AccountStatusID INT,
    Balance DECIMAL(15,2),
    OpeningDate TEXT,
    FOREIGN KEY (CustomerID) REFERENCES customers(CustomerID),
    FOREIGN KEY (AccountTypeID) REFERENCES account_types(AccountTypeID),
    FOREIGN KEY (AccountStatusID) REFERENCES account_statuses(AccountStatusID)
);
""")

cursor.execute("DROP TABLE IF EXISTS transaction_types")
cursor.execute("""
CREATE TABLE transaction_types (
    TransactionTypeID INT PRIMARY KEY,
    TypeName VARCHAR(50)
);
""")

cursor.execute("DROP TABLE IF EXISTS branches")
cursor.execute("""
CREATE TABLE branches (
    BranchID INT PRIMARY KEY,
    BranchName VARCHAR(100),
    AddressID INT,
    FOREIGN KEY (AddressID) REFERENCES addresses(AddressID)
);
""")

cursor.execute("DROP TABLE IF EXISTS transactions")
cursor.execute("""
CREATE TABLE transactions (
    TransactionID INT PRIMARY KEY,
    AccountID INT,
    BranchID INT,
    TransactionTypeID INT,
    Amount DECIMAL(15,2),
    Description VARCHAR(255),
    TransactionDate DATETIME,
    AccountOriginID INT,
    AccountDestinationID INT,
    FOREIGN KEY (AccountID) REFERENCES accounts(AccountID),
    FOREIGN KEY (BranchID) REFERENCES branches(BranchID),
    FOREIGN KEY (TransactionTypeID) REFERENCES transaction_types(TransactionTypeID)
);
""")

cursor.execute("DROP TABLE IF EXISTS loan_statuses")
cursor.execute("""
CREATE TABLE loan_statuses (
    LoanStatusID INT PRIMARY KEY,
    StatusName VARCHAR(50)
);
""")

cursor.execute("DROP TABLE IF EXISTS loans")
cursor.execute("""
CREATE TABLE loans (
    LoanID INT PRIMARY KEY,
    AccountID INT,
    LoanStatusID INT,
    Amount DECIMAL(15,2),
    InterestRate DECIMAL(5,2),
    StartDate DATE,
    EndDate DATE,
    FOREIGN KEY (AccountID) REFERENCES accounts(AccountID),
    FOREIGN KEY (LoanStatusID) REFERENCES loan_statuses(LoanStatusID)
);
""")

conn.commit()

In [100]:
accounts = pd.read_csv('/content/accounts.csv')
account_types = pd.read_csv('/content/account_types.csv')
account_statuses = pd.read_csv('/content/account_statuses.csv')

customers = pd.read_csv('/content/customers.csv')
customer_types = pd.read_csv('/content/customer_types.csv')

loans = pd.read_csv('/content/loans.csv')
loan_statuses = pd.read_csv('/content/loan_statuses.csv')

transactions = pd.read_csv('/content/transactions.csv')
transaction_types = pd.read_csv('/content/transaction_types.csv')
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'], format='%m/%d/%Y %H:%M', errors='coerce')

branches = pd.read_csv('/content/branches.csv')
addresses = pd.read_csv('/content/addresses.csv')


cursor.execute("DROP TABLE IF EXISTS accounts")
cursor.execute("DROP TABLE IF EXISTS account_types")
cursor.execute("DROP TABLE IF EXISTS account_statuses")

cursor.execute("DROP TABLE IF EXISTS customers")
cursor.execute("DROP TABLE IF EXISTS customer_types")

cursor.execute("DROP TABLE IF EXISTS loans")
cursor.execute("DROP TABLE IF EXISTS loan_statuses")

cursor.execute("DROP TABLE IF EXISTS transaction_types")
cursor.execute("DROP TABLE IF EXISTS transactions")

cursor.execute("DROP TABLE IF EXISTS branches")
cursor.execute("DROP TABLE IF EXISTS addresses")

accounts.to_sql('accounts', conn, if_exists='append', index=False)
account_types.to_sql('account_types', conn, if_exists='append', index=False)
account_statuses.to_sql('account_statuses', conn, if_exists='append', index=False)

customers.to_sql('customers', conn, if_exists='append', index=False)
customer_types.to_sql('customer_types', conn, if_exists='append', index=False)

loans.to_sql('loans', conn, if_exists='append', index=False)
loan_statuses.to_sql('loan_statuses', conn, if_exists='append', index=False)

transactions.to_sql('transactions', conn, if_exists='append', index=False)
transaction_types.to_sql('transaction_types', conn, if_exists='append', index=False)

branches.to_sql('branches', conn, if_exists='append', index=False)
addresses.to_sql('addresses', conn, if_exists='append', index=False)

conn.commit()

EDA TRANSACTION

In [106]:
sql1 = '''
SELECT
    SUM(CASE WHEN TransactionID IS NULL THEN 1 ELSE 0 END) AS null_id,
    SUM(CASE WHEN Amount IS NULL THEN 1 ELSE 0 END) AS null_amount,
    SUM(CASE WHEN TransactionDate IS NULL THEN 1 ELSE 0 END) AS null_date,
    SUM(CASE WHEN TransactionTypeID IS NULL THEN 1 ELSE 0 END) AS null_type
FROM transactions;
'''
result = pd.read_sql_query(sql1, conn)
print(result)

   null_id  null_amount  null_date  null_type
0        0            0       1000          0


In [102]:
sql2 = '''
SELECT
    MIN(t.Amount) AS min_amount,
    MAX(t.Amount) AS max_amount,
    AVG(t.Amount) AS avg_amount,
    COUNT(*) AS total_transactions,
    MIN(t.TransactionDate) AS first_transaction_date,
    MAX(t.TransactionDate) AS last_transaction_date
FROM transactions t
JOIN transaction_types tt ON t.TransactionTypeID = tt.TransactionTypeID;
'''
result = pd.read_sql_query(sql2, conn)
print(result)

   min_amount  max_amount   avg_amount  total_transactions  \
0        1.01     4999.59  2503.049865               49508   

  first_transaction_date last_transaction_date  
0    2020-01-01 00:00:00   2024-02-08 23:00:00  


In [107]:
sql3 = '''
SELECT
    tt.TypeName,
    MAX(t.TransactionDate) AS last_transaction_date
FROM transactions t
JOIN transaction_types tt ON t.TransactionTypeID = tt.TransactionTypeID
GROUP BY tt.TypeName
ORDER BY last_transaction_date DESC;
'''
result = pd.read_sql_query(sql3, conn)
print(result)

     TypeName last_transaction_date
0  Withdrawal   2024-02-08 23:00:00
1    Transfer   2024-02-08 20:00:00
2     Payment   2024-02-08 19:00:00
3     Deposit   2024-02-08 18:00:00


In [104]:
demo = '''
WITH monthly_tx AS (
    SELECT
        strftime('%Y-%m', TransactionDate) AS Month_Year,
        SUM(Amount) AS TotalAmount
    FROM transactions
    GROUP BY Month_Year
)
SELECT
    Month_Year,
    TotalAmount,
    ROUND((TotalAmount - LAG(TotalAmount, 1) OVER (ORDER BY Month_Year))
    / LAG(TotalAmount, 1) OVER (ORDER BY Month_Year) * 100, 2) AS MoM_GrowthPercent,
    ROUND((TotalAmount - LAG(TotalAmount, 12) OVER (ORDER BY Month_Year))
    / LAG(TotalAmount, 12) OVER (ORDER BY Month_Year) * 100, 2) AS YoY_GrowthPercent
FROM monthly_tx
ORDER BY Month_Year;
'''
result = pd.read_sql_query(demo, conn)
print(result)

   Month_Year  TotalAmount  MoM_GrowthPercent  YoY_GrowthPercent
0        None   2501548.69                NaN                NaN
1     2020-01   2457236.86              -1.77                NaN
2     2020-02   2478384.28               0.86                NaN
3     2020-03   2420467.90              -2.34                NaN
4     2020-04   2530154.69               4.53                NaN
5     2020-05   2656478.82               4.99                NaN
6     2020-06   2574094.48              -3.10                NaN
7     2020-07   2406126.60              -6.53                NaN
8     2020-08   2535000.85               5.36                NaN
9     2020-09   2427517.34              -4.24                NaN
10    2020-10   2377056.02              -2.08                NaN
11    2020-11   2527686.50               6.34                NaN
12    2020-12   2489931.21              -1.49              -0.46
13    2021-01   2511979.07               0.89               2.23
14    2021-02   2247787.8

In [105]:
query = '''
SELECT
    b.BranchName,
    COUNT(t.TransactionID) AS TransactionCount,
    SUM(t.Amount) AS TotalAmount
FROM transactions t
JOIN branches b ON t.BranchID = b.BranchID
GROUP BY b.BranchName
ORDER BY TotalAmount DESC, TransactionCount DESC
LIMIT 5;
 '''
result = pd.read_sql_query(query, conn)
print(result)

  BranchName  TransactionCount  TotalAmount
0  Branch 47              1046   2687234.07
1  Branch 39              1013   2630971.42
2  Branch 50              1064   2600262.96
3  Branch 46              1044   2599029.73
4  Branch 25              1029   2598472.03
