/
Levenshtein.cs
93 lines (79 loc) · 3 KB
/
Levenshtein.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
namespace Fastenshtein
{
/// <summary>
/// Measures the difference between two strings.
/// Uses the Levenshtein string difference algorithm.
/// </summary>
public partial class Levenshtein
{
/*
* WARRING this class is performance critical (Speed).
*/
private readonly string storedValue;
private readonly int[] costs;
/// <summary>
/// Creates a new instance with a value to test other values against
/// </summary>
/// <param Name="value">Value to compare other values to.</param>
public Levenshtein(string value)
{
this.storedValue = value;
// Create matrix row
this.costs = new int[this.storedValue.Length];
}
/// <summary>
/// gets the length of the stored value that is tested against
/// </summary>
public int StoredLength => this.storedValue.Length;
/// <summary>
/// Compares a value to the stored value.
/// Not thread safe.
/// </summary>
/// <returns>Difference. 0 complete match.</returns>
public int DistanceFrom(string value)
{
if (costs.Length == 0)
{
return value.Length;
}
// Add indexing for insertion to first row
for (int i = 0; i < this.costs.Length;)
{
this.costs[i] = ++i;
}
for (int i = 0; i < value.Length; i++)
{
// cost of the first index
int cost = i;
int previousCost = i;
// cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
char value1Char = value[i];
for (int j = 0; j < this.storedValue.Length; j++)
{
int currentCost = cost;
// assigning this here reduces the array reads we do, improvement of the old version
cost = costs[j];
if (value1Char != this.storedValue[j])
{
if (previousCost < currentCost)
{
currentCost = previousCost;
}
if (cost < currentCost)
{
currentCost = cost;
}
++currentCost;
}
/*
* Improvement on the older versions.
* Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
*/
costs[j] = currentCost;
previousCost = currentCost;
}
}
return this.costs[this.costs.Length - 1];
}
}
}