# Week 2 - International Bank Account Numbers


In [1]:
import pandas as pd
import numpy as np

## Requirements

### 1) Input the data


In [33]:
lookup = pd.read_csv('data/Week2_Swift_Codes.csv')
trans = pd.read_csv('data/Week2_Transactions.csv')

#### Inspecting data

In [34]:
lookup.head()

Unnamed: 0,Bank,SWIFT code,Check Digits
0,Lloyds Bank,LOYD,C1
1,Barclays Bank,BARC,22
2,Halifax,HLFX,22
3,HSBC,HBUK,4B
4,Natwest,NWBK,2L


In [35]:
trans.head()

Unnamed: 0,Transaction ID,Account Number,Sort Code,Bank
0,3888,62230725,95-98-82,Data Source Bank
1,4746,83172326,42-86-38,Barclays Bank
2,5404,34302539,53-28-21,Barclays Bank
3,9013,13350031,93-87-71,Natwest
4,2535,68745993,57-14-32,Barclays Bank


### 2) In the Transactions table, there is a Sort Code field which contains dashes. We need to remove these so just have a 6 digit string

In [36]:
trans['Sort Code'] = trans['Sort Code'].str.replace('-', '')

### 3) Use the SWIFT Bank Code lookup table to bring in additional information about the SWIFT code and Check Digits of the receiving bank account 

In [37]:
all_data = pd.merge(lookup,trans, on='Bank')
all_data.head()

Unnamed: 0,Bank,SWIFT code,Check Digits,Transaction ID,Account Number,Sort Code
0,Lloyds Bank,LOYD,C1,4870,79724968,510741
1,Lloyds Bank,LOYD,C1,1126,69969270,458255
2,Lloyds Bank,LOYD,C1,8453,30335066,649458
3,Lloyds Bank,LOYD,C1,5955,71124764,978496
4,Lloyds Bank,LOYD,C1,8500,46967961,874938


In [38]:
# Inspecting full data set

all_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Bank            100 non-null    object
 1   SWIFT code      100 non-null    object
 2   Check Digits    100 non-null    object
 3   Transaction ID  100 non-null    int64 
 4   Account Number  100 non-null    int64 
 5   Sort Code       100 non-null    object
dtypes: int64(2), object(4)
memory usage: 5.5+ KB


### 4) Add a field for the Country Code (hint)
     Hint: all these transactions take place in the UK so the Country Code should be GB

In [39]:
all_data['Country Code'] = 'GB'

### 5) Create the IBAN as above (hint)
    Hint: watch out for trying to combine sting fields with numeric fields - check data types

In [40]:
# Checking data types (previously showed wint all_data.info())

all_data.dtypes


Bank              object
SWIFT code        object
Check Digits      object
Transaction ID     int64
Account Number     int64
Sort Code         object
Country Code      object
dtype: object

In [43]:
# One way is converting data to string to concatenate data later

#all_data = all_data.astype(str)

# But, another way is keep original data types, and convert to string only those column needed
#IBAN

# Country Code + Check digits + Swift code  + Sort Code + Account Number

all_data['IBAN'] = all_data['Country Code'] + all_data['Check Digits'] + all_data['SWIFT code'] + all_data['Sort Code'] + all_data['Account Number'].astype(str)

all_data.head()


Unnamed: 0,Bank,SWIFT code,Check Digits,Transaction ID,Account Number,Sort Code,Country Code,IBAN
0,Lloyds Bank,LOYD,C1,4870,79724968,510741,GB,GBC1LOYD51074179724968
1,Lloyds Bank,LOYD,C1,1126,69969270,458255,GB,GBC1LOYD45825569969270
2,Lloyds Bank,LOYD,C1,8453,30335066,649458,GB,GBC1LOYD64945830335066
3,Lloyds Bank,LOYD,C1,5955,71124764,978496,GB,GBC1LOYD97849671124764
4,Lloyds Bank,LOYD,C1,8500,46967961,874938,GB,GBC1LOYD87493846967961


### 6) Remove unnecessary fields 

In [55]:
output = all_data[['Transaction ID', 'IBAN']]


In [56]:
output

Unnamed: 0,Transaction ID,IBAN
0,4870,GBC1LOYD51074179724968
1,1126,GBC1LOYD45825569969270
2,8453,GBC1LOYD64945830335066
3,5955,GBC1LOYD97849671124764
4,8500,GBC1LOYD87493846967961
...,...,...
95,4063,GB12DSBX28178620220923
96,3770,GB12DSBX32549026013637
97,2760,GB12DSBX28066085744933
98,9530,GB12DSBX70700574379515
