# Thefuzz library experiment, fuzzy logic in action

In [45]:
import numpy as np 
import pandas as pd 

In [46]:
from thefuzz import fuzz
from thefuzz import process

## Some important concepts of the library used

## Simple examples

In [47]:
word1 = 'HELLO FUZZYWUZZY'
word2 = 'hello fuzzywuzzy'

In [48]:
fuzz.ratio(word1,word2)

6

In [49]:
fuzz.partial_ratio(word1,word2)

6

In [50]:
fuzz.token_sort_ratio(word1,word2)

100

In [51]:
fuzz.token_set_ratio(word1,word2)

100

## Creating our dataset

In [52]:
# list of previous points
prelist = ['There is a hurt person nearby to the Santa Lucia school, in the 67 street', 
        'A traffic accident in the 96 avenue, with direction to the 45 street',
        'There is a kid who fall in a hole in the 75 stret , sec 23, near to the Alfred Cafe',
        'there is a collision between vehicules at the traffic line of the 45 section, 67 street',
        'A taxi crashed against a school bus in the San Juan Avenue, with direction to the 67 streeet'
      ]
  
# list of new points
newlist = ['in the 45 street there is a person blackout, at the 96 avenue',
          'at the section 23 of the 75 street there is a oldman that needs medical attention',
          'collision in the 67 street, with direction to the school',
          'a motorcicle crash in the 67 streett, 45 sec, medical support required',
          'At the 67th street, a collision between vehicules around the San Juan Ave.']

In [53]:
df_fuzzy = pd.DataFrame(list(zip(prelist, newlist)),columns =['previous', 'new'])
df_fuzzy[:5]

Unnamed: 0,previous,new
0,There is a hurt person nearby to the Santa Luc...,"in the 45 street there is a person blackout, a..."
1,"A traffic accident in the 96 avenue, with dire...",at the section 23 of the 75 street there is a ...
2,There is a kid who fall in a hole in the 75 st...,"collision in the 67 street, with direction to ..."
3,there is a collision between vehicules at the ...,"a motorcicle crash in the 67 streett, 45 sec, ..."
4,A taxi crashed against a school bus in the San...,"At the 67th street, a collision between vehicu..."


## Using 'Fuzz' to find Ratios

In [54]:
def measureRatio(row):
    return fuzz.ratio(str(row['previous']).lower(),str(row['new']).lower())

def measurePartialRatio(row):
    return fuzz.partial_ratio(str(row['previous']).lower(),str(row['new']).lower())

def measureTokenSortRatio(row):
    return fuzz.token_sort_ratio(row['previous'],row['new'])

def measureTokenSetRatio(row):
    return fuzz.token_set_ratio(row['previous'],row['new'])

In [55]:
df_fuzzy['measureRatio'] = df_fuzzy.apply(lambda row: measureRatio(row), axis = 1)

In [56]:
df_fuzzy['measurePartialRatio'] = df_fuzzy.apply(lambda row: measurePartialRatio(row), axis = 1)

In [57]:
df_fuzzy['measureTokenSortRatio'] = df_fuzzy.apply(lambda row: measureTokenSortRatio(row), axis = 1)

In [58]:
df_fuzzy['measureTokenSetRatio'] = df_fuzzy.apply(lambda row: measureTokenSetRatio(row), axis = 1)

In [59]:
df_fuzzy[:]

Unnamed: 0,previous,new,measureRatio,measurePartialRatio,measureTokenSortRatio,measureTokenSetRatio
0,There is a hurt person nearby to the Santa Luc...,"in the 45 street there is a person blackout, a...",51,48,61,71
1,"A traffic accident in the 96 avenue, with dire...",at the section 23 of the 75 street there is a ...,44,44,54,54
2,There is a kid who fall in a hole in the 75 st...,"collision in the 67 street, with direction to ...",49,59,53,52
3,there is a collision between vehicules at the ...,"a motorcicle crash in the 67 streett, 45 sec, ...",43,46,48,49
4,A taxi crashed against a school bus in the San...,"At the 67th street, a collision between vehicu...",45,50,58,56


## Using 'Process' to find better matches

In [60]:
process.extract('collision in the 67 street, with direction to the school', df_fuzzy['new'],scorer=fuzz.token_sort_ratio)

[('collision in the 67 street, with direction to the school', 100, 2),
 ('in the 45 street there is a person blackout, at the 96 avenue', 54, 0),
 ('At the 67th street, a collision between vehicules around the San Juan Ave.',
  52,
  4),
 ('at the section 23 of the 75 street there is a oldman that needs medical attention',
  51,
  1),
 ('a motorcicle crash in the 67 streett, 45 sec, medical support required',
  50,
  3)]

In this case the best solution is the first one because it gave a 100 in similirity.