In [134]:
import pandas as pd
import numpy as np

In [135]:
data = pd.read_csv("helpers.csv")

In [136]:
columns = ["Name","Organisation","Contact","Interest"]

In [137]:
data = data[columns]

In [138]:
data = data.dropna(axis=0,how='all')

In [139]:
data.reset_index(drop=True,inplace=True)

In [140]:
features = data["Interest"].str.replace("/",'').str.strip()

In [141]:
selected_features = ['Access to Information','Citizenship',
       'Corruption', 'Criminal Justice','Economic Empowerment',
       'Education', 'Environmental Justice','Family',
       'Gender-based violence', 'Generalist Legal Services', 'Governance',
       'Health',"Women's Rights"]

In [142]:
def transform(feature):
    f = feature.split(",")
    f = [word.strip() for word in f]
    f = [word for word in f if word in selected_features]
    feature= ','.join(f)
    return feature

In [143]:
features = features.map(lambda x:transform(x))

In [144]:
features = features.str.get_dummies(sep=',')

In [145]:
features.columns

Index(['Access to Information', 'Citizenship', 'Corruption',
       'Criminal Justice', 'Economic Empowerment', 'Education',
       'Environmental Justice', 'Family', 'Gender-based violence',
       'Generalist Legal Services', 'Governance', 'Health', 'Women's Rights'],
      dtype='object')

In [146]:
features.shape

(18, 13)

In [147]:
from sklearn.neighbors import NearestNeighbors

In [148]:
knn = NearestNeighbors(n_neighbors=6)

In [149]:
knn.fit(features)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                 radius=1.0)

In [150]:
knn

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                 radius=1.0)

In [151]:
query = [0]*13

In [152]:
query[3]=1
query[5]=1
query[8]=1

In [153]:
query = np.array(query).reshape(1,-1)

In [154]:
_ , indices = knn.kneighbors(query)

In [155]:
top_match = indices[0,0]

In [156]:
data.iloc[16]

Name                                              Md. Tajul Islam
Organisation      Bangladesh Legal Aid and Services Trust (BLAST)
Contact         https://www.linkedin.com/company/bangladesh-le...
Interest        Access to Information, Citizenship, Criminal J...
Name: 16, dtype: object

In [157]:
from sklearn.externals import joblib


In [158]:
joblib.dump(knn,'knn.pickle')

['knn.pickle']

In [159]:
data['Organisation'].fillna('',inplace=True)
data['Contact'].fillna('',inplace=True)

In [160]:
def make_description(lawyer_id):
    row = data.iloc[lawyer_id]
    name = str(row["Name"])
    org = str(row["Organisation"])
    contact = str(row['Contact'])
    return str(name + '.' + org + '.' + contact)
    

In [161]:
make_description(16)

'Md. Tajul Islam.Bangladesh Legal Aid and Services Trust (BLAST).https://www.linkedin.com/company/bangladesh-legal-aid-and-services-trust'

In [162]:
user_input = "I want a lawyer who works on family and gender-based violence"

In [171]:
def make_query(text):
    result = []
    for feature in selected_features:
        if feature.lower() in user_input:
            result.append(1)
        else:
            result.append(0)
    return result

In [172]:
query = make_query(user_input)

In [173]:
query = np.array(result).reshape(1,-1)

In [174]:
query.shape

(1, 13)

In [175]:
_ , indices = knn.kneighbors(query)

In [176]:
indices[0][0]

9

In [177]:
make_description(9)

'Saila Yesmin.Bangladesh Legal Aid and Services Trust (BLAST).'

In [178]:
knn2 = joblib.load("knn.pickle")

In [179]:
_ , indices = knn2.kneighbors(query)

In [180]:
indices

array([[ 9, 11,  5,  3,  8, 10]], dtype=int64)