# Fuzzy matching algorithm

* https://towardsdatascience.com/fuzzy-matching-at-scale-84f2bfd0c536
* https://bergvca.github.io/2017/10/14/super-fast-string-matching.html

In [54]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import re

In [55]:
import numpy as np
from scipy.sparse import csr_matrix
import sparse_dot_topn.sparse_dot_topn as ct

In [56]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [57]:
import pandas as pd
import numpy as np
import os
import pickle #optional - for saving outputs => best for large dataset
import re
from tqdm import tqdm # used for progress bars (optional)
import time

#ngram best result

import re

from ftfy import fix_text #  text cleaning for decode issues..
#transforms company names with assumptions taken from: http://www.legislation.gov.uk/uksi/2015/17/regulation/2/made
def ngrams(string, n=3):
    """Takes an input string, cleans it and converts to ngrams. 
    This script is focussed on cleaning UK company names but can be made generic by removing lines below"""
    string = str(string)
    string = string.lower() # lower case
    string = fix_text(string) # fix text
    string = string.split('t/a')[0] # split on 'trading as' and return first name only
    #string = string.split('trading as')[0] # split on 'trading as' and return first name only
    string = string.encode("ascii", errors="ignore").decode() #remove non ascii chars
    chars_to_remove = [")","(",".","|","[","]","{","}","'","-"]
    rx = '[' + re.escape(''.join(chars_to_remove)) + ']' #remove punc, brackets etc...
    string = re.sub(rx, '', string)
    string = string.replace('&', 'and')
    #string = string.replace('limited', 'ltd')
    #string = string.replace('public limited company', 'plc')
    #string = string.replace('united kingdom', 'uk')
    #string = string.replace('community interest company', 'cic')
    string = string.title() # normalise case - capital at start of each word
    string = re.sub(' +',' ',string).strip() # get rid of multiple spaces and replace with a single
    string = ' '+ string +' ' # pad names for ngrams...
    ngrams = zip(*[string[i:] for i in range(n)])
    return [''.join(ngram) for ngram in ngrams]

Collecting ftfy
  Using cached https://files.pythonhosted.org/packages/af/da/d215a091986e5f01b80f5145cff6f22e2dc57c6b048aab2e882a07018473/ftfy-6.0.3.tar.gz
Building wheels for collected packages: ftfy
  Building wheel for ftfy (setup.py) ... [?25ldone
[?25h  Created wheel for ftfy: filename=ftfy-6.0.3-cp38-none-any.whl size=41933 sha256=5e5d5a0a2f8d5b4f9ca0ea3e3d662d59edbc37cef2112f439c433314e9c61758
  Stored in directory: /Users/guillaume/Library/Caches/pip/wheels/99/2c/e6/109c8a28fef7a443f67ba58df21fe1d0067ac3322e75e6b0b7
Successfully built ftfy
Installing collected packages: ftfy
Successfully installed ftfy-6.0.3
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [58]:
#output data
#df = pd.read_csv('./data/training_data.csv',error_bad_lines=False,encoding='cp1252',sep="\t")
df_x = pd.read_csv('./data/input_x.csv',error_bad_lines=False,encoding='utf-8',sep="\t").iloc[1:,:]
df_y = pd.read_csv('./data/input_y.csv',error_bad_lines=False,encoding='ISO 8859-1',sep="\t",header=None)

df_x['author'] = df_x['author'].astype('string')
df_y['author'] = df_y[1].astype('string')

del df_x['id']
del df_y[0]
del df_y[1]


df_merge = df_x.append(df_y, ignore_index=True)
df_merge = df_merge.dropna()

In [79]:
df_y.shape

(8302, 1)

In [68]:
artist_name = list(df_x['author'].dropna().unique())
vectorizer = TfidfVectorizer(min_df=1, analyzer=ngrams)
tf_idf_matrix = vectorizer.fit_transform(artist_name)
tf_idf_matrix

<50745x11912 sparse matrix of type '<class 'numpy.float64'>'
	with 672201 stored elements in Compressed Sparse Row format>

In [73]:
messy_artist = list(df_y['author'].unique())
messy_tf_idf_matrix = vectorizer.transform(messy_artist)

In [74]:
!pip install nmslib
import nmslib
from scipy.sparse import csr_matrix # may not be required 
from scipy.sparse import rand # may not be required


# create a random matrix to index
data_matrix = tf_idf_matrix#[0:1000000]

# Set index parameters
# These are the most important ones
M = 80
efC = 1000

num_threads = 4 # adjust for the number of threads
# Intitialize the library, specify the space, the type of the vector and add data points 
index = nmslib.init(method='simple_invindx', space='negdotprod_sparse_fast', data_type=nmslib.DataType.SPARSE_VECTOR) 

index.addDataPointBatch(data_matrix)
# Create an index
start = time.time()
index.createIndex() 
end = time.time() 
print('Indexing time = %f' % (end-start))

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Indexing time = 0.061519


In [75]:
# Number of neighbors => use Knn
num_threads = 4
K=1
query_matrix = messy_tf_idf_matrix
start = time.time() 
query_qty = query_matrix.shape[0]
nbrs = index.knnQueryBatch(query_matrix, k = K, num_threads = num_threads)
end = time.time() 
print('kNN time total=%f (sec), per query=%f (sec), per query adjusted for thread number=%f (sec)' % 
      (end-start, float(end-start)/query_qty, num_threads*float(end-start)/query_qty))

kNN time total=1.304228 (sec), per query=0.000180 (sec), per query adjusted for thread number=0.000719 (sec)


In [80]:
mts =[]
for i in range(len(nbrs)):
  origional_nm = messy_artist[i]
  try:
    matched_nm   = artist_name[nbrs[i][0][0]]
    conf         = nbrs[i][1][0]
  except:
    matched_nm   = "no match found"
    conf         = None
  mts.append([origional_nm,matched_nm,conf])

mts = pd.DataFrame(mts,columns=['origional_name','matched_name','conf'])
#results = df_CF.merge(mts,left_on='buyer',right_on='origional_name')
mts['conf'] = -mts['conf']
mts

Unnamed: 0,origional_name,matched_name,conf
0,Allen Eskens,Allen Eskens,1.000000
1,C.D. Reiss,C.D. Rose,0.668106
2,Catherine Gayle,Catherine,0.704948
3,Daniel J. Siegel,Daniel J. Siegel,1.000000
4,Debbie Macomber,Debbie Macomber,1.000000
...,...,...,...
7254,Zondervan,Zondervan,1.000000
7255,Zora Neale Hurston,Zora Neale Hurston,1.000000
7256,edited by Brian Kram,Brian Kreb,0.326442
7257,âConsumer Reportsâ,Bleacher Report,0.472838


In [81]:
mts.sort_values(["conf"], ascending=False).head(20)

Unnamed: 0,origional_name,matched_name,conf
1289,Ciji Ware,Ciji Ware,1.0
1541,Dav Pilkey,Dav Pilkey,1.0
2207,Eric Schlosser,Eric Schlosser,1.0
364,Angela Liddon,Angela Liddon,1.0
745,Bill OâReilly,Bill O'Reilly,1.0
744,Bill OReilly,Bill O'Reilly,1.0
741,Bill O'Reilly,Bill O'Reilly,1.0
3122,Jefferson Bass,Jefferson Bass,1.0
4329,Lindsay McKenna,Lindsay McKenna,1.0
1008,Carole Mortimer,Carole Mortimer,1.0


In [21]:
import numpy as np
from scipy.sparse import csr_matrix
!pip install sparse_dot_topn #uncomment to install
import sparse_dot_topn.sparse_dot_topn as ct


def awesome_cossim_top(A, B, ntop, lower_bound=0):
    # force A and B as a CSR matrix.
    # If they have already been CSR, there is no overhead
    A = A.tocsr()
    B = B.tocsr()
    M, _ = A.shape
    _, N = B.shape
 
    idx_dtype = np.int32
 
    nnz_max = M*ntop
 
    indptr = np.zeros(M+1, dtype=idx_dtype)
    indices = np.zeros(nnz_max, dtype=idx_dtype)
    data = np.zeros(nnz_max, dtype=A.dtype)

    ct.sparse_dot_topn(
        M, N, np.asarray(A.indptr, dtype=idx_dtype),
        np.asarray(A.indices, dtype=idx_dtype),
        A.data,
        np.asarray(B.indptr, dtype=idx_dtype),
        np.asarray(B.indices, dtype=idx_dtype),
        B.data,
        ntop,
        lower_bound,
        indptr, indices, data)

    return csr_matrix((data,indices,indptr),shape=(M,N))



In [25]:
from sklearn.feature_extraction.text import TfidfVectorizer

artist_names = df_y[1].dropna().unique()
vectorizer = TfidfVectorizer(min_df=1, analyzer=ngrams)
tf_idf_matrix = vectorizer.fit_transform(artist_names)

matches = awesome_cossim_top(tf_idf_matrix, tf_idf_matrix.transpose(), 10, 0.85)

In [27]:
def get_matches_df(sparse_matrix, name_vector, top=100):
    non_zeros = sparse_matrix.nonzero()
    
    sparserows = non_zeros[0]
    sparsecols = non_zeros[1]
    
    if top:
        nr_matches = top
    else:
        nr_matches = sparsecols.size
    
    left_side = np.empty([nr_matches], dtype=object)
    right_side = np.empty([nr_matches], dtype=object)
    similairity = np.zeros(nr_matches)
    
    for index in range(0, nr_matches):
        left_side[index] = name_vector[sparserows[index]]
        right_side[index] = name_vector[sparsecols[index]]
        similairity[index] = sparse_matrix.data[index]
    
    return pd.DataFrame({'left_side': left_side,
                          'right_side': right_side,
                           'similairity': similairity})

In [29]:

matches_df = get_matches_df(matches, artist_name, top=1000)
matches_df = matches_df[matches_df['similairity'] < 0.99999] # Remove all exact matches
matches_df.sample(20)
matches_df.sort_values(['similairity'], ascending=False).head(20)

Unnamed: 0,left_side,right_side,similairity
256,Christie Craig,David Clarke,0.937906
258,David Clarke,Christie Craig,0.937906
875,Elizabeth Swados,Lecia Cornwall,0.927152
870,Lecia Cornwall,Elizabeth Swados,0.927152
657,Maria Goldverg,Andrea Schulz,0.920448
14,Faith Hunter,Rod Duncan,0.911747
505,Ally Condie,Mussolini,0.910818
507,Mussolini,Ally Condie,0.910818
181,Chloe Caldwell,Brain Fitness Lab Lisa Mosconi,0.905992
183,Brain Fitness Lab Lisa Mosconi,Chloe Caldwell,0.905992


In [37]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import re



artist_name_clean = df_x['author'].dropna().unique()

print('Vectorizing the data - this could take a few minutes for large datasets...')
vectorizer = TfidfVectorizer(min_df=1, analyzer=ngrams, lowercase=False)
tfidf = vectorizer.fit_transform(artist_name_clean)
print('Vectorizing completed...')

from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=1, n_jobs=-1).fit(tfidf)

org_column = 'buyer' #column to match against in the messy data
unique_artist = set(df_y['author'].values) # set used for increased performance


###matching query:
def getNearestN(query):
  queryTFIDF_ = vectorizer.transform(query)
  distances, indices = nbrs.kneighbors(queryTFIDF_)
  return distances, indices

import time
t1 = time.time()
print('getting nearest n...')
distances, indices = getNearestN(unique_org)
t = time.time()-t1
print("COMPLETED IN:", t)

unique_org = list(unique_org) #need to convert back to a list
print('finding matches...')
matches = []
for i,j in enumerate(indices):
  temp = [round(distances[i][0],2), artist_name_clean.values[j][0][0],unique_artist[i]]
  matches.append(temp)

print('Building data frame...')  
matches = pd.DataFrame(matches, columns=['Match confidence (lower is better)','Matched name','Origional name'])
print('Done') 

Vectorizing the data - this could take a few minutes for large datasets...
Vectorizing completed...
getting nearest n...


KeyboardInterrupt: 

In [35]:
df_y = df_y.rename(columns={1: "author"})
df_y

Unnamed: 0,0,author
0,1,Allen Eskens
1,2,C.D. Reiss
2,3,Catherine Gayle
3,4,Daniel J. Siegel
4,5,Debbie Macomber
...,...,...
8297,8298,Zondervan
8298,8299,Zora Neale Hurston
8299,8300,edited by Brian Kram
8300,8301,âConsumer Reportsâ


# Last try Guillaume
* https://towardsdatascience.com/fuzzy-string-match-with-python-on-large-dataset-and-why-you-should-not-use-fuzzywuzzy-4ec9f0defcd

In [1]:
import numpy as np
import pandas as pd
import random
import gc
import timeit
import sys
from datetime import datetime as dt
import re

import matplotlib.pyplot as plt

In [2]:
df_x = pd.read_csv('./data/input_x.csv',error_bad_lines=False,encoding='utf-8',sep="\t").iloc[1:,:]
df_y = pd.read_csv('./data/input_y.csv',error_bad_lines=False,encoding='ISO 8859-1',sep="\t",header=None)

df_x = df_x.iloc[:1000,:]
df_y = df_y.iloc[:1000,:]

In [3]:
df_y.head(5)

Unnamed: 0,0,1
0,1,Allen Eskens
1,2,C.D. Reiss
2,3,Catherine Gayle
3,4,Daniel J. Siegel
4,5,Debbie Macomber


In [4]:
def text_prepare(text):
    """
        text: a string
        
        return: a clean string
    """
    REPLACE_BY_SPACE_RE = re.compile('[\n\"\'/(){}\[\]\|@,;#]')
    text = re.sub(REPLACE_BY_SPACE_RE, ' ', text)
    text = re.sub(' +', ' ', text)
    text = text.lower()

    # delete stopwords from text
    text = ' '.join([word for word in text.split()]) 
    text = text.strip()
    return text

In [5]:
df_x['author'] = df_x['author'].astype('string')
df_x.dtypes

id         int64
author    string
dtype: object

In [6]:
df_y.head(10)

Unnamed: 0,0,1
0,1,Allen Eskens
1,2,C.D. Reiss
2,3,Catherine Gayle
3,4,Daniel J. Siegel
4,5,Debbie Macomber
5,6,Dick Morris
6,7,Dolen Perkins-Valdez
7,8,Donna Woolfolk Cross
8,9,Douglas Preston and Lincoln Child
9,10,Emily Giffin


In [7]:
df_x = df_x.fillna("none")

In [8]:
df_x['Author'] = df_x['author'].apply(lambda x: text_prepare(x))
df_y['author'] = df_y[1].apply(lambda x: text_prepare(x))
df_x.head(5)

Unnamed: 0,id,author,Author
1,1,Charles Sykes,charles sykes
2,2,Yogi Berra,yogi berra
3,3,Michael Crichton,michael crichton
4,4,Hegar,hegar
5,5,Ken Bensinger,ken bensinger


In [9]:
#transform text to vectors with TF-IDF

from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(ngram_range=(1,2), max_df=0.9, min_df=5, token_pattern='(\S+)')
tf_idf_matrix_x = tfidf_vectorizer.fit_transform(df_x['Author'])
tf_idf_matrix_y = tfidf_vectorizer.fit_transform(df_y['author'])

In [10]:
from scipy.sparse import csr_matrix
!pip install sparse_dot_topn 
import sparse_dot_topn.sparse_dot_topn as ct

def awesome_cossim_top(A, B, ntop, lower_bound=0):
    # force A and B as a CSR matrix.
    # If they have already been CSR, there is no overhead
    A = A.tocsr()
    B = B.tocsr()
    M, _ = A.shape
    _, N = B.shape
 
    idx_dtype = np.int32
 
    nnz_max = M*ntop
 
    indptr = np.zeros(M+1, dtype=idx_dtype)
    indices = np.zeros(nnz_max, dtype=idx_dtype)
    data = np.zeros(nnz_max, dtype=A.dtype)
    ct.sparse_dot_topn(
            M, N, np.asarray(A.indptr, dtype=idx_dtype),
            np.asarray(A.indices, dtype=idx_dtype),
            A.data,
            np.asarray(B.indptr, dtype=idx_dtype),
            np.asarray(B.indices, dtype=idx_dtype),
            B.data,
            ntop,
            lower_bound,
            indptr, indices, data)
    return csr_matrix((data,indices,indptr),shape=(M,N))


import time
t1 = time.time()

# adjust lower bound: 0.8
# keep top 10 similar results
matches = awesome_cossim_top(tf_idf_matrix_x, tf_idf_matrix_y.transpose(), 10, 0.8)

t = time.time()-t1
print("finished in:", t)

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m
finished in: 0.002249002456665039


In [24]:
def get_matches_df(sparse_matrix, name_vector, top=100):
    non_zeros = sparse_matrix.nonzero()
    
    sparserows = non_zeros[0]
    sparsecols = non_zeros[1]
    
    if top:
        nr_matches = top
    else:
        nr_matches = sparsecols.size
    
    left_side = np.empty([nr_matches], dtype=object)
    right_side = np.empty([nr_matches], dtype=object)
    similairity = np.zeros(nr_matches)
    
    for index in range(0, nr_matches):
        left_side[index] = name_vector[sparserows[index]]
        right_side[index] = name_vector[sparsecols[index]]
        similairity[index] = sparse_matrix.data[index]
    
    return pd.DataFrame({'TITLE': left_side,
                          'SIMILAR_TITLE': right_side,
                           'similairity_score': similairity})
  
  
  
matches_df = pd.DataFrame()
matches_df = get_matches_df(matches, df_x['Author'], top=10000)
# Remove all exact matches
matches_df = matches_df[matches_df['similairity_score'] < 0.99999] 
matches_df.sample(10)

KeyError: 'Author'

In [52]:
matches_df.sort_values(["similairity_score"], ascending=False).head(20)

Unnamed: 0,TITLE,SIMILAR_TITLE,similairity_score
9708,nebula awards,nebula awards awards,0.952539
3658,arthur ellis unhanged arthur award,arthur ellis award,0.947048
3657,arthur ellis unhanged arthur award,arthur ellis award,0.947048
3656,arthur ellis unhanged arthur award,arthur ellis award,0.947048
7281,nino ricci,award nino ricci,0.944778
1949,charlotte zolotow,charlotte zolotow award,0.933302
8574,tom o donnell,sunshine o donnell,0.921246
9507,wallace stegner fellow abigail ulman,wallace stegner fellow,0.919925
9508,wallace stegner fellow abigail ulman,wallace stegner fellow,0.919925
9509,wallace stegner fellow abigail ulman,wallace stegner fellow,0.919925


### tester cette méthode: https://github.com/Christopher-Thornton/hmni
* Deezy match (https://github.com/Living-with-machines/DeezyMatch)

In [16]:
# test avec append

In [48]:
import pandas as pd
import fuzzy_pandas as fpd
import re
def text_prepare(text):
    """
        text: a string
        
        return: a clean string
    """
    REPLACE_BY_SPACE_RE = re.compile('[\n\"\'/(){}\[\]\|@,;#]')
    text = re.sub(REPLACE_BY_SPACE_RE, ' ', text)
    text = re.sub(' +', ' ', text)
    text = text.lower()

    # delete stopwords from text
    text = ' '.join([word for word in text.split()]) 
    text = text.strip()
    return text

df_x = pd.read_csv('./data/input_x.csv',error_bad_lines=False,encoding='utf-8',sep="\t").iloc[1:,:]
df_y = pd.read_csv('./data/input_y.csv',error_bad_lines=False,encoding='ISO 8859-1',sep="\t",header=None)

df_x['author'] = df_x['author'].astype('string')
df_y['author'] = df_y[1].astype('string')

del df_x['id']
del df_y[0]
del df_y[1]


df_merge = df_x.append(df_y, ignore_index=True)
df_merge = df_merge.dropna()
df_merge['author'] = df_merge['author'].apply(lambda x: text_prepare(x))
df_merge.head(5)

Unnamed: 0,author
0,charles sykes
1,yogi berra
2,michael crichton
3,hegar
4,ken bensinger


In [49]:
#transform text to vectors with TF-IDF

from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(ngram_range=(1,2), max_df=0.9, min_df=5, token_pattern='(\S+)')
tf_idf_matrix = tfidf_vectorizer.fit_transform(df_merge['author'])

In [50]:
from scipy.sparse import csr_matrix
import sparse_dot_topn.sparse_dot_topn as ct
import numpy as np

def awesome_cossim_top(A, B, ntop, lower_bound=0):
    # force A and B as a CSR matrix.
    # If they have already been CSR, there is no overhead
    A = A.tocsr()
    B = B.tocsr()
    M, _ = A.shape
    _, N = B.shape
 
    idx_dtype = np.int32
 
    nnz_max = M*ntop
 
    indptr = np.zeros(M+1, dtype=idx_dtype)
    indices = np.zeros(nnz_max, dtype=idx_dtype)
    data = np.zeros(nnz_max, dtype=A.dtype)
    ct.sparse_dot_topn(
            M, N, np.asarray(A.indptr, dtype=idx_dtype),
            np.asarray(A.indices, dtype=idx_dtype),
            A.data,
            np.asarray(B.indptr, dtype=idx_dtype),
            np.asarray(B.indices, dtype=idx_dtype),
            B.data,
            ntop,
            lower_bound,
            indptr, indices, data)
    return csr_matrix((data,indices,indptr),shape=(M,N))


import time
t1 = time.time()

# adjust lower bound: 0.8
# keep top 10 similar results
matches = awesome_cossim_top(tf_idf_matrix, tf_idf_matrix.transpose(), 10, 0.8)

t = time.time()-t1
print("finished in:", t)

finished in: 0.6329741477966309


In [51]:
def get_matches_df(sparse_matrix, name_vector, top=100):
    non_zeros = sparse_matrix.nonzero()
    
    sparserows = non_zeros[0]
    sparsecols = non_zeros[1]
    
    if top:
        nr_matches = top
    else:
        nr_matches = sparsecols.size
    
    print(nr_matches)
    left_side = np.empty([nr_matches], dtype=object)
    right_side = np.empty([nr_matches], dtype=object)
    similairity = np.zeros(nr_matches)
    
    for index in range(0, nr_matches):
        #print(index)
        try:
            left_side[index] = name_vector[sparserows[index]]
            right_side[index] = name_vector[sparsecols[index]]
            similairity[index] = sparse_matrix.data[index]
        except Exception:
            continue
    
    return pd.DataFrame({'TITLE': left_side,
                          'SIMILAR_TITLE': right_side,
                           'similairity_score': similairity})
  
  
  
matches_df = pd.DataFrame()
matches_df = get_matches_df(matches, df_merge['author'], top=10000)
# Remove all exact matches
matches_df = matches_df[matches_df['similairity_score'] < 0.99999] 
matches_df.sample(10)

10000


Unnamed: 0,TITLE,SIMILAR_TITLE,similairity_score
2733,marta elva,leslie cerier,0.84017
7694,gill lewis,anita albus,0.816033
6146,yaa gyasi,guinness world records staff,0.839506
4795,joya ryan,stephen walker,0.819333
4537,richard moskovitz,isabel losada,0.917485
461,geraldine brooks,jodi daynard,0.83851
5840,chris struyk-bonn,jeffrey,0.80397
2832,brandi dougherty,robert mcgovern,0.826604
8162,michael harvey,karyn marcus,0.854737
170,john harvey,stead,0.845525


In [52]:
matches_df.shape

(686, 3)

In [53]:
matches_df.sort_values(["similairity_score"], ascending=False).head(20)

Unnamed: 0,TITLE,SIMILAR_TITLE,similairity_score
9735,megan abbott,liza donnelly,0.952504
6285,kelley armstrong,anita mills,0.95112
6686,peter straub,anita mills,0.95112
3642,history,isabel losada,0.947379
3643,history,liza marklund,0.947379
3644,history,paulette kouffman sherman,0.947379
7268,twitter,john wasik,0.94347
1930,skila brown,james altucher,0.932588
8452,daniel,max allan collins,0.926822
8594,mindy meja,david lynch,0.922086
