In [134]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity


In [135]:
bldata = pd.read_csv("bloodbank.csv", encoding = "latin1")
bldata.head()

Unnamed: 0,Sr No,Blood Bank Name,State,District,City,Address,Pincode,Contact No,Mobile,Helpline,...,Qualification Nodal Officer,Category,Blood Component Available,Apheresis,Service Time,License #,Date License Obtained,Date of Renewal,Latitude,Longitude
0,1,G.B. Pant Hospital Blood bank,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,Atlanta Point\r\nNear Cellular Jail\r\nP.O. Ab...,744104,03192 230628,9434266561,102,...,,Government,YES,NO,24X7,DL1/CLAA/ANI,14.6.1996,01.01.2012,11.675442,92.747338
1,2,I.N.H.S. Dhanvantri,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,"Ashvini Nagar, Port Blair, Andaman and Nicobar...",744103,03192 248759,8001452200,,...,"MBBS, MD (Pathology)",Government,NO,NO,24X7,DL2/CLAA/ANI,14.06.1996,31.12.2016,11.649693,92.717418
2,3,Pillar Health Centre Blood Bank,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,"Lamba Line,\r\nP.B. 526,\r\nP.O. Junglighat, \...",744104,"03192 233193, 03192 233993",9474247585,03192 233193,...,"MBBS, MD (Medicine)",Charity,NO,NO,24X7,DL3/CLAA/ANI,08.09.2010,10.06.2015,11.653229,92.730714
3,4,"Indian Red Cross Society Blood Bank, District ...",Andhra Pradesh,ANANTAPUR,Anantapur,"Near JNTU Engeneering College, Sarada Nagar, A...",515002,08554 246344,9441192365,,...,"MBBS, MD, DCP",Charity,YES,NO,24x7,13/ATP/AP/2005/BB/R,22.11.2005,21.11.2015,14.654936,77.609915
4,5,Government General Hospital Blood Bank,Andhra Pradesh,ANANTAPUR,Ananthapur,"Government General Hospital, Ananthapur",515001,08554 275024,9866695858,,...,"MBBS, MD (Pathology)",Government,YES,NO,24x7,50/ATP/AP/97/BB/R,28.01.1997,01.01.2013,14.671459,77.596684


# Step first is Data Cleaning

In [136]:
bldata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2823 entries, 0 to 2822
Data columns (total 27 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Sr No                         2823 non-null   int64  
 1    Blood Bank Name              2823 non-null   object 
 2    State                        2823 non-null   object 
 3    District                     2822 non-null   object 
 4    City                         2803 non-null   object 
 5    Address                      2754 non-null   object 
 6   Pincode                       2651 non-null   object 
 7    Contact No                   2524 non-null   object 
 8    Mobile                       2149 non-null   object 
 9    Helpline                     209 non-null    object 
 10   Fax                          494 non-null    object 
 11   Email                        1557 non-null   object 
 12   Website                      584 non-null    object 
 13   No

In [137]:
bldata.isna().sum()

Sr No                              0
 Blood Bank Name                   0
 State                             0
 District                          1
 City                             20
 Address                          69
Pincode                          172
 Contact No                      299
 Mobile                          674
 Helpline                       2614
 Fax                            2329
 Email                          1266
 Website                        2239
 Nodal Officer                   586
 Contact Nodal Officer          1501
 Mobile Nodal Officer            683
 Email Nodal Officer             832
 Qualification Nodal Officer     972
 Category                         79
 Blood Component Available       418
 Apheresis                       768
 Service Time                    402
 License #                       594
 Date License Obtained           698
 Date of Renewal                 680
 Latitude                          0
 Longitude                         0
d

In [138]:
bldata.shape

(2823, 27)

In [139]:
# copy data for modification
data = bldata.copy()

In [140]:
# remove white spaces from columns 
data.columns = data.columns.str.strip()
data.columns.tolist()

['Sr No',
 'Blood Bank Name',
 'State',
 'District',
 'City',
 'Address',
 'Pincode',
 'Contact No',
 'Mobile',
 'Helpline',
 'Fax',
 'Email',
 'Website',
 'Nodal Officer',
 'Contact Nodal Officer',
 'Mobile Nodal Officer',
 'Email Nodal Officer',
 'Qualification Nodal Officer',
 'Category',
 'Blood Component Available',
 'Apheresis',
 'Service Time',
 'License #',
 'Date License Obtained',
 'Date of Renewal',
 'Latitude',
 'Longitude']

In [141]:
data.drop(["Date of Renewal","Qualification Nodal Officer","Category","Blood Component Available","Apheresis","Service Time","License #","Date License Obtained","Date of Renewal"], axis = 1, inplace = True)
data.columns.tolist()

['Sr No',
 'Blood Bank Name',
 'State',
 'District',
 'City',
 'Address',
 'Pincode',
 'Contact No',
 'Mobile',
 'Helpline',
 'Fax',
 'Email',
 'Website',
 'Nodal Officer',
 'Contact Nodal Officer',
 'Mobile Nodal Officer',
 'Email Nodal Officer',
 'Latitude',
 'Longitude']

In [142]:
# remove rows which does not have any location
data.dropna(subset=['City','District'], inplace = True)
data.isna().sum()

Sr No                       0
Blood Bank Name             0
State                       0
District                    0
City                        0
Address                    67
Pincode                   166
Contact No                297
Mobile                    662
Helpline                 2593
Fax                      2309
Email                    1248
Website                  2218
Nodal Officer             573
Contact Nodal Officer    1488
Mobile Nodal Officer      669
Email Nodal Officer       818
Latitude                    0
Longitude                   0
dtype: int64

In [143]:
# remove rows where user can't contact
missing_data = data[data['Mobile'].isna() & data['Helpline'].isna() & data['Contact No'].isna()]
data.drop(missing_data.index, inplace = True)
data.drop(columns=['Fax'], axis = 1, inplace=True)

In [144]:
data.drop(['Website'],axis = 1, inplace = True)
data.head(10)

Unnamed: 0,Sr No,Blood Bank Name,State,District,City,Address,Pincode,Contact No,Mobile,Helpline,Email,Nodal Officer,Contact Nodal Officer,Mobile Nodal Officer,Email Nodal Officer,Latitude,Longitude
0,1,G.B. Pant Hospital Blood bank,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,Atlanta Point\r\nNear Cellular Jail\r\nP.O. Ab...,744104,03192 230628,9434266561,102,bbgbpant@gmail.com,Dr.Ganesh Samaddar,03192 230628,9434266561,ganeshsamadder7@gmail.com,11.675442,92.747338
1,2,I.N.H.S. Dhanvantri,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,"Ashvini Nagar, Port Blair, Andaman and Nicobar...",744103,03192 248759,8001452200,,,Dr.Smiriti Mathur,03192 248759,8001452200,mesmriti@yahoo.com,11.649693,92.717418
2,3,Pillar Health Centre Blood Bank,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,"Lamba Line,\r\nP.B. 526,\r\nP.O. Junglighat, \...",744104,"03192 233193, 03192 233993",9474247585,03192 233193,pillarbloodbank2016@gmail.com,Dr. Sr. Cross Mary,"03192 233193, 03192 233993",9474247585,crossgracy2003@yahoo.co.in,11.653229,92.730714
3,4,"Indian Red Cross Society Blood Bank, District ...",Andhra Pradesh,ANANTAPUR,Anantapur,"Near JNTU Engeneering College, Sarada Nagar, A...",515002,08554 246344,9441192365,,,Dr.G. Parvathi & Dr.G. Ramanjaneyulu Naidu,,"09441125444, 09440286555",ircsbloodbank.anantapur@gmail.com,14.654936,77.609915
4,5,Government General Hospital Blood Bank,Andhra Pradesh,ANANTAPUR,Ananthapur,"Government General Hospital, Ananthapur",515001,08554 275024,9866695858,,bloodbankgghatp@gmail.com,Dr. Shiva kumar & Dr. Swapna,08554 275024,"09866695858, 09912938380",bloodbankgghatp@gmail.com,14.671459,77.596684
5,6,Indian Red Cross Society,Andhra Pradesh,ANANTAPUR,Ananthapur,"Blood Bank Medical Officer,Blood Bank, Indian ...",515001,8554246344,Dist.Secretary 9866023293,,ircsbloodbank.anantapur@gmail.com,,,,,14.681888,77.600591
6,7,Rural Development Trust Hospital Blood Bank,Andhra Pradesh,ANANTAPUR,Bathalapalli,"Kadiri Road, Bathalapalli, Ananthapur",515661,08559 244259,9493268861,,,Dr. Hariharanadha Sharma,,9177155517,syanakishore@yahoo.co.in,14.652907,77.618826
7,8,South Central Railway Hospital Blood Bank,Andhra Pradesh,ANANTAPUR,Guntakal,"S.C. Railway Hospital, Guntakal, Ananthapuramu",515801,08552 227166,9701374501,,,,,,scrhbloodbank@gmail.com\t\t\t\t\t,15.172889,77.366103
8,9,Indian Red Cross Society Blood Bank,Andhra Pradesh,ANANTAPUR,Hindupur,"Government General Hospital Blood Bank, Hindup...",515201,08556 225900,,,ircshup@gmail.com,Dr. M. Satyam,9490978739,9490978739,ircshup@gmail.com,13.832533,77.492214
9,10,Indian Red Cross Society Blood bank RCH -II KA...,Andhra Pradesh,ANANTAPUR,Kadiri,"Ground Floor, APVVP (Government General Hospit...",515591,08494 221544,9989738689,,kadiriircs@gmail.com,Dr. G. P. Subbrayudu\t,,9989738689,kadiriircs@gmail.com,14.113089,78.157366


In [145]:
df1 = pd.DataFrame(columns= ['name','latitude','longitude'])
df1['name'] = data['Blood Bank Name']
df1['latitude'] = data['Latitude']
df1['longitude'] = data['Longitude']
df1.head()

Unnamed: 0,name,latitude,longitude
0,G.B. Pant Hospital Blood bank,11.675442,92.747338
1,I.N.H.S. Dhanvantri,11.649693,92.717418
2,Pillar Health Centre Blood Bank,11.653229,92.730714
3,"Indian Red Cross Society Blood Bank, District ...",14.654936,77.609915
4,Government General Hospital Blood Bank,14.671459,77.596684


# Step 2 Data Manupulation 

In [146]:
data['contact info'] = data['Contact No'].fillna('') + ', ' + data['Mobile'].fillna('') + ', ' + data['Helpline'].fillna('') + ', ' + data['Email'].fillna('')

data.drop(["Contact No","Mobile","Helpline","Email","Sr No"], axis = 1, inplace = True)

In [147]:
data.dropna(subset=['Pincode'], inplace = True)
data.shape

(2584, 13)

In [148]:
data['loc'] = data['State'].fillna('') +' ' + data['District'].fillna('') + ' ' + data['City'].fillna('') + ' ' + data['Pincode'].fillna('')
# df['address'] = df['Address'].fillna('') + ', ' + df['City'].fillna('') + '- ' + df['Pincode'].fillna('') + ', ' + df['District'].fillna('')

In [149]:
data['loc'] = data['loc'].str.lower()

In [150]:
# data['loc'] = data['loc'].str.replace(' ', '')

In [151]:
data.head(5)

Unnamed: 0,Blood Bank Name,State,District,City,Address,Pincode,Nodal Officer,Contact Nodal Officer,Mobile Nodal Officer,Email Nodal Officer,Latitude,Longitude,contact info,loc
0,G.B. Pant Hospital Blood bank,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,Atlanta Point\r\nNear Cellular Jail\r\nP.O. Ab...,744104,Dr.Ganesh Samaddar,03192 230628,9434266561,ganeshsamadder7@gmail.com,11.675442,92.747338,"03192 230628, 9434266561, 102, bbgbpant@gmail.com",andaman and nicobar islands south andaman port...
1,I.N.H.S. Dhanvantri,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,"Ashvini Nagar, Port Blair, Andaman and Nicobar...",744103,Dr.Smiriti Mathur,03192 248759,8001452200,mesmriti@yahoo.com,11.649693,92.717418,"03192 248759, 8001452200, ,",andaman and nicobar islands south andaman port...
2,Pillar Health Centre Blood Bank,Andaman And Nicobar Islands,SOUTH ANDAMAN,Port Blair,"Lamba Line,\r\nP.B. 526,\r\nP.O. Junglighat, \...",744104,Dr. Sr. Cross Mary,"03192 233193, 03192 233993",9474247585,crossgracy2003@yahoo.co.in,11.653229,92.730714,"03192 233193, 03192 233993, 9474247585, 03192 ...",andaman and nicobar islands south andaman port...
3,"Indian Red Cross Society Blood Bank, District ...",Andhra Pradesh,ANANTAPUR,Anantapur,"Near JNTU Engeneering College, Sarada Nagar, A...",515002,Dr.G. Parvathi & Dr.G. Ramanjaneyulu Naidu,,"09441125444, 09440286555",ircsbloodbank.anantapur@gmail.com,14.654936,77.609915,"08554 246344, 9441192365, ,",andhra pradesh anantapur anantapur 515002
4,Government General Hospital Blood Bank,Andhra Pradesh,ANANTAPUR,Ananthapur,"Government General Hospital, Ananthapur",515001,Dr. Shiva kumar & Dr. Swapna,08554 275024,"09866695858, 09912938380",bloodbankgghatp@gmail.com,14.671459,77.596684,"08554 275024, 9866695858, , bloodbankgghatp@gm...",andhra pradesh anantapur ananthapur 515001


# model Logic Building 

In [152]:
cv = CountVectorizer(max_features = 3000)

In [153]:
vectors = cv.fit_transform(data['loc']).toarray()

In [154]:
similarity = cosine_similarity(vectors)

In [155]:
sorted(list(enumerate(similarity[0])), reverse = True, key = lambda x:x[1])[:3]

[(0, 1.0), (2, 1.0), (1, 0.9090909090909093)]

In [156]:
def nearest_bank_recommendation(user_query):
    bank_index = data[data['loc'] == user_query].index[0]
    distance = similarity[bank_index]
    bank_list = sorted(list(enumerate(distance)), reverse = True, key = lambda x:x[1])[:3]
    
    for i in bank_list:
        print("- ", data.iloc[i[0]]['Blood Bank Name'])
        print("  State:", data.iloc[i[0]]['State'])
        print("  City:", data.iloc[i[0]]['City'])
        print("  Pincode:", data.iloc[i[0]]['Pincode'])
        print("  Contact Info:", data.iloc[i[0]]['contact info'])
        print("  Nodal Officer:", data.iloc[i[0]]['Contact Nodal Officer'])
        print("  Nodal officer Mobile:", data.iloc[i[0]]['Mobile Nodal Officer'])
        print("  Nodal Officer Email:", data.iloc[i[0]]['Email Nodal Officer'])
        print()

    
#     recommended = bank_list['Blood Bank Name','State', 'City']

In [157]:
nearest_bank_recommendation('andhra pradesh anantapur anantapur 515002')
# data['loc'][3]

-  Indian Red Cross Society Blood Bank, District Branch
  State: Andhra Pradesh
  City: Anantapur
  Pincode: 515002
  Contact Info: 08554 246344, 9441192365, , 
  Nodal Officer: nan
  Nodal officer Mobile: 09441125444, 09440286555
  Nodal Officer Email: ircsbloodbank.anantapur@gmail.com

-  Government General Hospital Blood Bank
  State: Andhra Pradesh
  City: Ananthapur
  Pincode: 515001
  Contact Info: 08554 275024, 9866695858, , bloodbankgghatp@gmail.com
  Nodal Officer: 08554 275024
  Nodal officer Mobile: 09866695858, 09912938380
  Nodal Officer Email: bloodbankgghatp@gmail.com

-  Indian Red Cross Society
  State: Andhra Pradesh
  City: Ananthapur
  Pincode: 515001
  Contact Info: 8554246344, Dist.Secretary 9866023293, , ircsbloodbank.anantapur@gmail.com
  Nodal Officer: nan
  Nodal officer Mobile: nan
  Nodal Officer Email: nan



In [158]:
pickle.dump(data.to_dict(), open('bank.pkl','wb'))

In [159]:
pickle.dump(similarity,open('similarity.pkl','wb'))