Skip to content

Commit

Permalink
Bharat 🙏
Browse files Browse the repository at this point in the history
Signed-off-by: Finbarrs Oketunji <f@finbarrs.eu>
  • Loading branch information
0xnu committed Apr 20, 2024
1 parent 7405e2e commit 83db080
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 54 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 1.0.2 - 2024-04-20
* Hindi Phonetic Patterns

## 1.0.1 - 2024-04-19
* Enhanced for Yoruba, Igbo and Hausa names

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func main() {
"Olanrewaju Akinyele", // Yoruba
"Obinwanne Obiora", // Igbo
"Abdussalamu Abubakar", // Hausa
"Virat Kohli", // Hindi
}

// Encode each name using NYSIIS
Expand Down
163 changes: 109 additions & 54 deletions nysiis.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,20 @@ func (n *Nysiis) translateFirstCharacters(name string) string {
name = "S" + name[2:] // Yoruba: 'Ts' -> 'S'
case strings.HasPrefix(name, "SH"):
name = "S" + name[2:] // Hausa: 'Sh' -> 'S'
case strings.HasPrefix(name, "BH"):
name = "B" + name[2:] // Hindi: 'Bh' -> 'B'
case strings.HasPrefix(name, "DH"):
name = "D" + name[2:] // Hindi: 'Dh' -> 'D'
case strings.HasPrefix(name, "GH"):
name = "G" + name[2:] // Hindi: 'Gh' -> 'G'
case strings.HasPrefix(name, "JH"):
name = "J" + name[2:] // Hindi: 'Jh' -> 'J'
case strings.HasPrefix(name, "KH"):
name = "K" + name[2:] // Hindi: 'Kh' -> 'K'
case strings.HasPrefix(name, "PH"):
name = "F" + name[2:] // Hindi: 'Ph' -> 'F'
case strings.HasPrefix(name, "TH"):
name = "T" + name[2:] // Hindi: 'Th' -> 'T'
}
return name
}
Expand All @@ -74,60 +88,10 @@ func (n *Nysiis) generateKey(name string) string {
if n.vowels[char] {
char = 'A'
}
if char == 'E' && i+1 < len(name) && name[i+1] == 'V' {
char = 'A'
i++
} else if char == 'Q' {
char = 'G'
} else if char == 'Z' {
char = 'S'
} else if char == 'M' {
char = 'N'
} else if char == 'K' {
if i+1 < len(name) && name[i+1] == 'N' {
continue
} else {
char = 'C'
}
} else if char == 'S' && i+2 < len(name) && name[i:i+3] == "SCH" {
char = 'S'
i += 2
} else if char == 'P' && i+1 < len(name) && name[i+1] == 'H' {
char = 'F'
i++
} else if char == 'H' && (prevChar != 'A' && prevChar != 'E' && prevChar != 'I' && prevChar != 'O' && prevChar != 'U' || (i+1 < len(name) && rune(name[i+1]) != 'A' && rune(name[i+1]) != 'E' && rune(name[i+1]) != 'I' && rune(name[i+1]) != 'O' && rune(name[i+1]) != 'U')) {
char = prevChar
} else if char == 'W' && (prevChar == 'A' || prevChar == 'E' || prevChar == 'I' || prevChar == 'O' || prevChar == 'U') {
char = prevChar
} else if char == 'G' && i+1 < len(name) && name[i+1] == 'B' {
char = 'J' // Igbo: 'Gb' -> 'J'
} else if char == 'K' && i+1 < len(name) && name[i+1] == 'P' {
char = 'P' // Igbo: 'Kp' -> 'P'
} else if char == 'N' && i+1 < len(name) && name[i+1] == 'W' {
char = 'W' // Igbo: 'Nw' -> 'W'
} else if char == 'T' && i+1 < len(name) && name[i+1] == 'S' {
char = 'S' // Yoruba: 'Ts' -> 'S'
} else if char == 'S' && i+1 < len(name) && name[i+1] == 'H' {
char = 'S' // Hausa: 'Sh' -> 'S'
}

// Handle vowel harmony and tonal differences
if n.vowels[char] && i > 0 && n.vowels[prevChar] {
if prevChar == 'A' || prevChar == 'O' || prevChar == 'U' {
if char == 'E' || char == 'I' {
char = 'A'
}
} else if prevChar == 'E' || prevChar == 'I' {
if char == 'A' || char == 'O' || char == 'U' {
char = 'E'
}
}
}

// Ignore tonal differences
if char >= 'A' && char <= 'Z' {
char = rune(strings.ToUpper(string(char))[0])
}
char = n.translateChar(char, name, i)
char = n.handleVowelHarmony(char, prevChar)
char = n.ignoreTonalDifferences(char)

if char != prevChar {
key += string(char)
Expand All @@ -136,22 +100,113 @@ func (n *Nysiis) generateKey(name string) string {
prevChar = char
}

key = n.removeTrailingS(key)
key = n.translateAY(key)
key = n.removeTrailingA(key)
key = n.truncateKey(key)

return key
}

func (n *Nysiis) translateChar(char rune, name string, i int) rune {
if char == 'E' && i+1 < len(name) && name[i+1] == 'V' {
char = 'A'
} else if char == 'Q' {
char = 'G'
} else if char == 'Z' {
char = 'S'
} else if char == 'M' {
char = 'N'
} else if char == 'K' {
if i+1 < len(name) && name[i+1] == 'N' {
char = rune(name[i])
} else {
char = 'C'
}
} else if char == 'S' && i+2 < len(name) && name[i:i+3] == "SCH" {
char = 'S'
} else if char == 'P' && i+1 < len(name) && name[i+1] == 'H' {
char = 'F'
} else if char == 'H' && (i == 0 || i+1 == len(name) || !n.vowels[rune(name[i-1])] || !n.vowels[rune(name[i+1])]) {
char = rune(name[i-1])
} else if char == 'W' && i > 0 && n.vowels[rune(name[i-1])] {
char = rune(name[i-1])
} else if char == 'G' && i+1 < len(name) && name[i+1] == 'B' {
char = 'J' // Igbo: 'Gb' -> 'J'
} else if char == 'K' && i+1 < len(name) && name[i+1] == 'P' {
char = 'P' // Igbo: 'Kp' -> 'P'
} else if char == 'N' && i+1 < len(name) && name[i+1] == 'W' {
char = 'W' // Igbo: 'Nw' -> 'W'
} else if char == 'T' && i+1 < len(name) && name[i+1] == 'S' {
char = 'S' // Yoruba: 'Ts' -> 'S'
} else if char == 'S' && i+1 < len(name) && name[i+1] == 'H' {
char = 'S' // Hausa: 'Sh' -> 'S'
} else if char == 'B' && i+1 < len(name) && name[i+1] == 'H' {
char = 'B' // Hindi: 'Bh' -> 'B'
} else if char == 'D' && i+1 < len(name) && name[i+1] == 'H' {
char = 'D' // Hindi: 'Dh' -> 'D'
} else if char == 'G' && i+1 < len(name) && name[i+1] == 'H' {
char = 'G' // Hindi: 'Gh' -> 'G'
} else if char == 'J' && i+1 < len(name) && name[i+1] == 'H' {
char = 'J' // Hindi: 'Jh' -> 'J'
} else if char == 'K' && i+1 < len(name) && name[i+1] == 'H' {
char = 'K' // Hindi: 'Kh' -> 'K'
} else if char == 'P' && i+1 < len(name) && name[i+1] == 'H' {
char = 'F' // Hindi: 'Ph' -> 'F'
} else if char == 'T' && i+1 < len(name) && name[i+1] == 'H' {
char = 'T' // Hindi: 'Th' -> 'T'
}

return char
}

func (n *Nysiis) handleVowelHarmony(char, prevChar rune) rune {
if n.vowels[char] && n.vowels[prevChar] {
if prevChar == 'A' || prevChar == 'O' || prevChar == 'U' {
if char == 'E' || char == 'I' {
char = 'A'
}
} else if prevChar == 'E' || prevChar == 'I' {
if char == 'A' || char == 'O' || char == 'U' {
char = 'E'
}
}
}
return char
}

func (n *Nysiis) ignoreTonalDifferences(char rune) rune {
if char >= 'A' && char <= 'Z' {
char = rune(strings.ToUpper(string(char))[0])
}
return char
}

func (n *Nysiis) removeTrailingS(key string) string {
if len(key) > 1 && strings.HasSuffix(key, "S") {
key = key[:len(key)-1]
}
return key
}

func (n *Nysiis) translateAY(key string) string {
if strings.HasSuffix(key, "AY") {
key = key[:len(key)-2] + "Y"
}
return key
}

func (n *Nysiis) removeTrailingA(key string) string {
if len(key) > 1 && strings.HasSuffix(key, "A") {
key = key[:len(key)-1]
}
return key
}

func (n *Nysiis) truncateKey(key string) string {
if len(key) > 6 {
key = key[:6]
}

return key
}

Expand Down
5 changes: 5 additions & 0 deletions nysiis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,16 @@ func TestNysiis_Encode(t *testing.T) {
encodedName6 := nysiis.NewNysiis().Encode(name6)
fmt.Printf("Encoded name for %q: %s\n", name6, encodedName6)

name7 := "Virat Kohli"
encodedName7 := nysiis.NewNysiis().Encode(name7)
fmt.Printf("Encoded name for %q: %s\n", name7, encodedName7)

// Output:
// Encoded name for "Watkins": WATCAN
// Encoded name for "Robert Johnson": RABART
// Encoded name for "Samantha Williams": SANANT
// Encoded name for "Olanrewaju Akinyele": OLANRA
// Encoded name for "Obinwanne Obiora": OBAWAN
// Encoded name for "Abdussalamu Abubakar": ABDASA
// Encoded name for "Virat Kohli": VARATC
}

0 comments on commit 83db080

Please sign in to comment.