In [1]:
# import essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
# load the dataset
data = pd.read_csv('data/bank_reviews3.csv')

# check the dimension of the dataset
print(data.shape)

# view the first 5 rows of the dataset
data.head()

(1000, 10)


Unnamed: 0,author,date,address,bank,rating,review_title_by_user,review,bank_image,rating_title_by_user,useful_count
0,AMRENDRA T,"Mar 21, 2020",New delhi,SBI,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,https://static.bankbazaar.com/images/common/ba...,Great!,133
1,BISHWA,"Mar 20, 2020",Kolkata,SBI,5.0,"""Good service""","I have my salary account in SBI, when I applie...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,89
2,SANTOSH,"Mar 20, 2020",Hooghly,Axis Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,48
3,MAHADEV,"Mar 20, 2020",Pune,HDFC Bank,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,52
4,R,"Mar 20, 2020",Bangalore,review,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,22


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   author                996 non-null    object 
 1   date                  1000 non-null   object 
 2   address               1000 non-null   object 
 3   bank                  1000 non-null   object 
 4   rating                1000 non-null   float64
 5   review_title_by_user  1000 non-null   object 
 6   review                1000 non-null   object 
 7   bank_image            1000 non-null   object 
 8   rating_title_by_user  1000 non-null   object 
 9   useful_count          1000 non-null   int64  
dtypes: float64(1), int64(1), object(8)
memory usage: 78.2+ KB


In [4]:
# total number of bank and address in the dataset

numAddress = data['address'].nunique()
numBank = data['bank'].nunique()

print(f'The reviews are collected from {numBank} different banks that located around {numAddress} of places in India.')
print('These banks are: ', data.bank.unique())

The reviews are collected from 10 different banks that located around 107 of places in India.
These banks are:  ['SBI' 'Axis Bank' 'HDFC Bank' 'review' 'IDBI' 'Kotak' 'IndusInd Bank'
 'Canara Bank' 'Citibank' 'Punjab National Bank']


In [5]:
# some of the data without 'bank' name and is named as 'review' in the dataset
# convert these data to 'Unknown'

data['bank'].replace('review','Unknown', inplace=True)

In [6]:
location_mapping = {'Agartala': 'Birmingham',
'Agra': 'Liverpool',
'Ahmedabad':'Glasgow',
'Ajmer':'Oldham',
'Allahabad':'Edinburgh',
'Alleppey':'Leeds',
'Amalapuram':'Bristol',
'Anantnag':'Sheffield',
'Aurangabad':'Newcastle upon Tyne',
'Bangalore':'Cardiff',
'Bareilly':'Belfast',
'Bellary': 'Nottingham',
'Bhilwara': 'Southampton',
'Bhopal': 'Brighton',
'Bulandshahr': 'Leicester',
'Chandigarh':'Coventry',
'Chennai': 'Hull',
'Chikballapura': 'Stoke-on-Trent',
'Chittoor': 'Plymouth',
'Chittorgarh': 'Wolverhampton',
'Coimbatore': 'Derby',
'Darbhanga':'Swansea',
'Dehradun': 'Aberdeen',
'Delhi': 'Manchester',
'Dhule': 'Dundee',
'Dindigul': 'Portsmouth',
'Disabled keonjhar': 'York',
'Ernakulam': 'Cambridge',
'Faridabad': 'Oxford',
'Gadag':'Reading',
'Gandhinagar': 'Blackpool',
'Ghaziabad': 'Bournemouth',
'Goa': 'Luton',
'Guntur': 'Preston',
'Gurgaon': 'Milton Keynes',
'Guwahati':'Norwich',
'Haridwar': 'Ipswich',
'Haveri': 'Swindon',
'Hindupur': 'Wigan',
'Hooghly': 'Croydon',
'Hosur':'Bolton',
'Hubli': 'Middlesborough',
'Hyderabad':'Peterborough',
'Indore':'Huddersfield',
'Jaipur':'Slough',
'Jammu':'Southend-on-Sea',
'Jamshedpur':'Watford',
'Jhunjhunu':'Newport',
'Jodhpur': 'Exeter',
'Kalaburagi':'Gloucester',
'Kanchipuram':'Blackburn',
'Kanpur':'Maidstone',
'Karnal':'Poole',
'Kochi':'Telford',
'Kolhapur':'Dundee',
'Kolkata':'Basildon',
'Kollam':'Worthing',
'Kota':'Chelmsford',
'Krishnagiri':'Colchester',
'Lucknow':'Dagenham',
'Ludhiana':'Sutton Coldfield',
'Madurai':'Rotherham',
'Mahabubnagar':'Doncaster',
'Malegaon':'Woking',
'Mangalore':'Bedford',
'Medak':'Eastbourne',
'Meerut':'Crawley',
'Mohali':'Sutton',
'Moradabad':'Warrington',
'Mumbai':'London',
'Nagpur':'Rochdale',
'Namakkal':'Basingstoke',
'Nanded':'Solihull',
'Nasik':'Walsall',
'Navi mumbai':'Bury',
'Nellore':'Hereford',
'New delhi':'Stockport',
'Noida':'Lincoln',
'Palwal':'Chesterfield',
'Panchkula':'Gillingham',
'Panipat':'Grimsby',
'Parvathipuram':'Barnsley',
'Pondicherry':'Halifax',
'Pune':'Stevenage',
'Raichur':'Harrogate',
'Raipur':'Hartlepool',
'Rajahmundry':'Burton upon Trent',
'Rangareddy':'Ashford',
'Salem':'Lowestoft',
'Shillong':'Kettering',
'Srikakulam':'Hastings',
'Surat':'St Albans',
'Thane':'Weston-super-Mare',
'Thanjavur':'St Helens',
'Thiruvananthapuram':'Chester',
'Thrissur':'Nuneaton',
'Tinsukia':'Rugby',
'Tiruchirapalli':'Accrington',
'Tirunelveli':'Lancaster',
'Tirupur':'Kings Lynn',
'Tuticorin':'Hove',
'Udhampur':'Torquay',
'Vadodara':'Guildford',
'Vijayawada':'Aylesbury',
'Visakhapatnam':'Scunthorpe',
'Vizianagaram':'Stafford',
'Warangal':'Folkestone'}

In [7]:
data = data.replace(location_mapping)

In [8]:
data

Unnamed: 0,author,date,address,bank,rating,review_title_by_user,review,bank_image,rating_title_by_user,useful_count
0,AMRENDRA T,"Mar 21, 2020",Stockport,SBI,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,https://static.bankbazaar.com/images/common/ba...,Great!,133
1,BISHWA,"Mar 20, 2020",Basildon,SBI,5.0,"""Good service""","I have my salary account in SBI, when I applie...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,89
2,SANTOSH,"Mar 20, 2020",Croydon,Axis Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,48
3,MAHADEV,"Mar 20, 2020",Stevenage,HDFC Bank,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,52
4,R,"Mar 20, 2020",Cardiff,Unknown,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,22
...,...,...,...,...,...,...,...,...,...,...
995,A L,"Nov 26, 2019",Cambridge,Unknown,3.5,"""Good network of ATM""",I am holding a saving account with FEDERAL ban...,https://static.bankbazaar.com/images/common/ba...,Pretty good,3
996,A L,"Nov 26, 2019",Cambridge,Axis Bank,4.0,"""Mobile app is good""",There is no mandatory balance to keep in my Ax...,https://static.bankbazaar.com/images/common/ba...,Great!,0
997,ANONYMS,"Nov 26, 2019",Cardiff,Axis Bank,4.0,"""Unhappy with the charges""","In Axis bank, every month they are charging me...",https://static.bankbazaar.com/images/common/ba...,Great!,0
998,SAJJANOJU,"Nov 26, 2019",Peterborough,Axis Bank,5.0,"""Good Bank""",I have a salary account with AXIS bank and I h...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,0


In [9]:
words_to_be_replaced = {'Axis Bank': 'Lloyds Bank', 'Axis bank':'Lloyds Bank', 'Axis':'Lloyds','Canara Bank':'Metro Bank', 'Citibank':'Halifax','HDFC bank':'HSBC','HDFC':'HSBC','HDFC Bank':'HSBC','IDBI':'Monzo','Indusind Bank':'NatWest','Indusind':'NatWest', 'IndusInd Bank':'NatWest','Kotak':'Barclays','Punjab National Bank': 'Nationwide','SBI':'Santander', 'rupees': 'pounds', 'rupee':'pound', 'Punjab':'England','Maharastra':'Scotland', 'India':'UK', 'State Bank Of India':'UK National Bank', 'AXIS bank':'Lloyds Bank', 'FEDERAL':'Royal Bank of Scotland'}

In [10]:
data = data.replace(words_to_be_replaced)

In [11]:
data.head()

Unnamed: 0,author,date,address,bank,rating,review_title_by_user,review,bank_image,rating_title_by_user,useful_count
0,AMRENDRA T,"Mar 21, 2020",Stockport,Santander,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,https://static.bankbazaar.com/images/common/ba...,Great!,133
1,BISHWA,"Mar 20, 2020",Basildon,Santander,5.0,"""Good service""","I have my salary account in SBI, when I applie...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,89
2,SANTOSH,"Mar 20, 2020",Croydon,Lloyds Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,48
3,MAHADEV,"Mar 20, 2020",Stevenage,HSBC,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,52
4,R,"Mar 20, 2020",Cardiff,Unknown,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,22


In [12]:
for old_word, new_word in words_to_be_replaced.items():
    data['review'] = data['review'].str.replace(old_word, new_word)

In [13]:
data['review']

0      State Bank Of UK is located nearby in our area...
1      I have my salary account in Santander, when I ...
2      I am using Lloyds Bank saving account for the ...
3      I have my salary bank account in HSBC for many...
4      Close to around 10 years, I am holding this Co...
                             ...                        
995    I am holding a saving account with Royal Bank ...
996    There is no mandatory balance to keep in my Ll...
997    In Lloyds Bank, every month they are charging ...
998    I have a salary account with Lloyds Bank and I...
999    Opened the savings account with Union bank of ...
Name: review, Length: 1000, dtype: object

In [14]:
data['bank'].unique()

array(['Santander', 'Lloyds Bank', 'HSBC', 'Unknown', 'Monzo', 'Barclays',
       'NatWest', 'Metro Bank', 'Halifax', 'Nationwide'], dtype=object)

In [15]:
data.to_csv('data_new.csv', index = False)