AbdullahDiaa · hmmftg · Jul 4, 2023 · Sep 9, 2023 · Sep 9, 2023 · Sep 9, 2023
diff --git a/cases_test.go b/cases_test.go
@@ -1,6 +1,6 @@
 package garabic
 
-//removeHarakatTestCases contains all test cases for TestRemoveHarakat function
+// removeHarakatTestCases contains all test cases for TestRemoveHarakat function
 var removeHarakatTestCases = []struct {
 	description string
 	input       string
@@ -34,7 +34,7 @@ var removeHarakatTestCases = []struct {
 	},
 }
 
-//normalizeTestCases contains all test cases for TestNormalize function
+// normalizeTestCases contains all test cases for TestNormalize function
 var normalizeTestCases = []struct {
 	description string
 	input       string
@@ -97,7 +97,7 @@ var normalizeTestCases = []struct {
 	},
 }
 
-//spellNumberTestCases contains all test cases for reading a number in arabic
+// spellNumberTestCases contains all test cases for reading a number in arabic
 var spellNumberTestCases = []struct {
 	input    int
 	expected string
@@ -208,7 +208,7 @@ var spellNumberTestCases = []struct {
 	},
 }
 
-//tashkeelTestCases contains all test cases for adding tashkeel to arabic text
+// tashkeelTestCases contains all test cases for adding tashkeel to arabic text
 var tashkeelTestCases = []struct {
 	description string
 	input       string
@@ -226,12 +226,17 @@ var tashkeelTestCases = []struct {
 	},
 }
 
-//shapingTestCases contains all test cases for shaping arabic text
+// shapingTestCases contains all test cases for shaping arabic text
 var shapingTestCases = []struct {
 	description string
 	input       string
 	expected    string
 }{
+	{
+		"Shaping lam alef",
+		"السلام علیکم",
+		"ﻢﮑﻴﻠﻋ ﻡﻼﺴﻟا",
+	},
 	{
 		"Shaping 1 word without tashkeel",
 		"بالعربي",
@@ -252,15 +257,14 @@ var shapingTestCases = []struct {
 		"قِفا نَبكِ مِن ذِكرى حَبيبٍ وَمَنزِلِ   ****   بِسِقطِ اللِوى بَينَ الدَخولِ فَحَومَلِ",
 		"ِﻞَﻣﻮَﺤَﻓ ِﻝﻮﺧَﺪﻟا َﻦﻴَﺑ ﻯﻮِﻠﻟا ِﻂﻘِﺴِﺑ **** ِﻝِﺰﻨَﻣَو ٍﺐﻴﺒَﺣ ﻯﺮﻛِذ ﻦِﻣ ِﻚﺒَﻧ ﺎﻔِﻗ",
 	},
-
 	{
 		"Shaping 1 word without tashkeel",
 		"المصفوفة (Multidimentional Array) هي",
 		"ﻲﻫ (Multidimentional Array) ﺔﻓﻮﻔﺼﻤﻟا",
 	},
 }
 
-//arabicLetterTestCases
+// arabicLetterTestCases
 var arabicLetterTestCases = []struct {
 	description string
 	input       rune

diff --git a/garabic.go b/garabic.go
@@ -1,4 +1,4 @@
-//Package garabic provides a set of functions for Arabic text processing in golang
+// Package garabic provides a set of functions for Arabic text processing in golang
 package garabic
 
 import (
@@ -11,20 +11,20 @@ import (
 	"golang.org/x/text/transform"
 )
 
-//letterGroup represents the letter and bounding letters
+// letterGroup represents the letter and bounding letters
 type letterGroup struct {
 	backLetter  rune
 	letter      rune
 	frontLetter rune
 }
 
-//letterShape represents all shapes of arabic letters in a word
+// letterShape represents all shapes of arabic letters in a word
 // https://web.stanford.edu/dept/lc/arabic/alphabet/incontextletters.html
 type letterShape struct {
 	Independent, Initial, Medial, Final rune
 }
 
-//Map of different shapes of arabic alphabet
+// Map of different shapes of arabic alphabet
 var arabicAlphabetShapes = map[rune]letterShape{
 	// Letter (ﺃ)
 	'\u0623': {Independent: '\uFE83', Initial: '\u0623', Medial: '\uFE84', Final: '\uFE84'},
@@ -42,6 +42,8 @@ var arabicAlphabetShapes = map[rune]letterShape{
 	'\u0626': {Independent: '\uFE89', Initial: '\uFE8B', Medial: '\uFE8C', Final: '\uFE8A'},
 	// Letter (ﺏ)
 	'\u0628': {Independent: '\uFE8F', Initial: '\uFE91', Medial: '\uFE92', Final: '\uFE90'},
+	// Letter (پ)
+	'\u067e': {Independent: '\uFB56', Initial: '\uFB58', Medial: '\uFB59', Final: '\uFB57'},
 	// Letter (ﺕ)
 	'\u062A': {Independent: '\uFE95', Initial: '\uFE97', Medial: '\uFE98', Final: '\uFE96'},
 	// Letter (ﺓ)
@@ -50,6 +52,8 @@ var arabicAlphabetShapes = map[rune]letterShape{
 	'\u062B': {Independent: '\uFE99', Initial: '\uFE9B', Medial: '\uFE9C', Final: '\uFE9A'},
 	// Letter (ﺝ)
 	'\u062C': {Independent: '\uFE9D', Initial: '\uFE9F', Medial: '\uFEA0', Final: '\uFE9E'},
+	// Letter (چ)
+	'\u0686': {Independent: '\uFB7A', Initial: '\uFB7C', Medial: '\uFB7D', Final: '\uFB7B'},
 	// Letter (ﺡ)
 	'\u062D': {Independent: '\uFEA1', Initial: '\uFEA3', Medial: '\uFEA4', Final: '\uFEA2'},
 	// Letter (ﺥ)
@@ -62,6 +66,8 @@ var arabicAlphabetShapes = map[rune]letterShape{
 	'\u0631': {Independent: '\uFEAD', Initial: '\u0631', Medial: '\uFEAE', Final: '\uFEAE'},
 	// Letter (ﺯ)
 	'\u0632': {Independent: '\uFEAF', Initial: '\u0632', Medial: '\uFEB0', Final: '\uFEB0'},
+	// Letter (ژ)
+	'\u0698': {Independent: '\uFB8A', Initial: '\uFB8A', Medial: '\uFB8B', Final: '\uFB8B'},
 	// Letter (ﺱ)
 	'\u0633': {Independent: '\uFEB1', Initial: '\uFEB3', Medial: '\uFEB4', Final: '\uFEB2'},
 	// Letter (ﺵ)
@@ -84,6 +90,10 @@ var arabicAlphabetShapes = map[rune]letterShape{
 	'\u0642': {Independent: '\uFED5', Initial: '\uFED7', Medial: '\uFED8', Final: '\uFED6'},
 	// Letter (ﻙ)
 	'\u0643': {Independent: '\uFED9', Initial: '\uFEDB', Medial: '\uFEDC', Final: '\uFEDA'},
+	// Letter (ک)
+	'\u06a9': {Independent: '\ufb8e', Initial: '\ufb90', Medial: '\ufb91', Final: '\ufb8f'},
+	// Letter (گ)
+	'\u06AF': {Independent: '\ufb92', Initial: '\ufb94', Medial: '\ufb95', Final: '\ufb93'},
 	// Letter (ﻝ)
 	'\u0644': {Independent: '\uFEDD', Initial: '\uFEDF', Medial: '\uFEE0', Final: '\uFEDE'},
 	// Letter (ﻡ)
@@ -98,6 +108,8 @@ var arabicAlphabetShapes = map[rune]letterShape{
 	'\u064A': {Independent: '\uFEF1', Initial: '\uFEF3', Medial: '\uFEF4', Final: '\uFEF2'},
 	// Letter (ﻯ)
 	'\u0649': {Independent: '\uFEEF', Initial: '\u0649', Medial: '\uFEF0', Final: '\uFEF0'},
+	// Letter (ﻯ)
+	'\u06cc': {Independent: '\ufeef', Initial: '\ufef3', Medial: '\ufef4', Final: '\ufef0'},
 	// Letter (ـ)
 	'\u0640': {Independent: '\u0640', Initial: '\u0640', Medial: '\u0640', Final: '\u0640'},
 	// Letter (ﻻ)
@@ -151,6 +163,8 @@ const (
 	AlefHamzaBelow = '\u0625'
 	//Yae =>  ي
 	Yae = '\u064A'
+	//Yeh =>  ي
+	Yeh = '\u06cc'
 	//DotlessYae =>  ى
 	DotlessYae = '\u0649'
 	//TehMarbuta => ة
@@ -161,7 +175,7 @@ const (
 	AlefWaslah = '\u0671'
 )
 
-//Number groups in Arabic
+// Number groups in Arabic
 var _zeroToNine = []string{
 	"صفر", "واحد", "اثنان", "ثلاثة", "أربعة",
 	"خمسة", "ستة", "سبعة", "ثمانية", "تسعة",
@@ -183,7 +197,7 @@ var _scaleNumbers = []string{
 	"", "ألف", "مليون", "مليار",
 }
 
-//RemoveHarakat will remove harakat from arabic text
+// RemoveHarakat will remove harakat from arabic text
 func RemoveHarakat(input string) string {
 	input = normalizeTransform(input)
 	runes := bytes.Runes([]byte(input))
@@ -198,7 +212,7 @@ func RemoveHarakat(input string) string {
 	return string(runes)
 }
 
-//Normalize will prepare an arabic text to search and index
+// Normalize will prepare an arabic text to search and index
 func Normalize(input string) string {
 	input = normalizeTransform(input)
 	runes := bytes.Runes([]byte(input))
@@ -226,7 +240,7 @@ func normalizeTransform(input string) string {
 	return input
 }
 
-//deleteRune will delete a rune from the slice while keeping the order of runes
+// deleteRune will delete a rune from the slice while keeping the order of runes
 func deleteRune(runes []rune, i int) []rune {
 	if i >= len(runes) {
 		return runes
@@ -291,7 +305,6 @@ func SpellNumber(input int) string {
 			}
 		case 1:
 			stringOfNum = append(stringOfNum, _elevenToNineteen[zeros])
-			break
 		default:
 			if zeros > 0 {
 				word := fmt.Sprintf("و %s و %s", _zeroToNine[zeros], _tens[tens])
@@ -303,7 +316,6 @@ func SpellNumber(input int) string {
 					stringOfNum = append(stringOfNum, _tens[tens])
 				}
 			}
-			break
 		}
 
 		// Scale position
@@ -340,7 +352,7 @@ func contains(s []string, str string) bool {
 	return false
 }
 
-//Shape will reconstruct arabic text to be connected correctly
+// Shape will reconstruct arabic text to be connected correctly
 func Shape(input string) string {
 	var langSections []string
 	var continousLangAr string
@@ -362,11 +374,11 @@ func Shape(input string) string {
 		}
 	}
 	if len(continousLangLt) > 0 {
-		fmt.Println(continousLangLt)
+		// fmt.Println(continousLangLt)
 		langSections = append(langSections, strings.TrimSpace(continousLangLt))
 	}
 	if len(continousLangAr) > 0 {
-		fmt.Printf("\"%s\"\n", continousLangAr)
+		// fmt.Printf("\"%s\"\n", continousLangAr)
 		langSections = append(langSections, strings.TrimSpace(continousLangAr))
 	}
 
@@ -388,7 +400,28 @@ func Shape(input string) string {
 	return strings.Join(shapedSentence, " ")
 }
 
-//shapeWord will reconstruct an arabic word to be connected correctly
+func fixLamAlef(group letterGroup) rune {
+	switch group.letter {
+	case '\u0644': // lam
+		switch group.frontLetter {
+		case '\u0623': // alef hamze above
+			return '\uFEF7'
+		case '\u0627': // alef
+			return '\uFEFB'
+		case '\u0625': // alef hamze below
+			return '\uFEF9'
+		case '\u0622': // alef madd
+			return '\uFEF5'
+		}
+	case '\u0623', '\u0627', '\u0625', '\u0622': // alef types
+		if group.backLetter == '\u0644' { // lam
+			return 0
+		}
+	}
+	return group.letter
+}
+
+// shapeWord will reconstruct an arabic word to be connected correctly
 func shapeWord(input string) string {
 	if !IsArabic(input) {
 		return input
@@ -398,6 +431,7 @@ func shapeWord(input string) string {
 
 	//Convert input into runes
 	inputRunes := []rune(RemoveHarakat(input))
+	countIgnored := 0
 	for i := range inputRunes {
 		//Get Bounding back and front letters
 		var backLetter, frontLetter rune
@@ -410,14 +444,18 @@ func shapeWord(input string) string {
 		//Fix the letter based on bounding letters
 		if _, ok := arabicAlphabetShapes[inputRunes[i]]; ok {
 			adjustedLetter := adjustLetter(letterGroup{backLetter, inputRunes[i], frontLetter})
-			shapedInput.WriteRune(adjustedLetter)
+			if adjustedLetter != 0 {
+				shapedInput.WriteRune(adjustedLetter)
+			} else {
+				countIgnored++
+			}
 		} else {
 			shapedInput.WriteRune(inputRunes[i])
 		}
 	}
 
 	//In case no Tashkeel deteted, same size of runes
-	if len([]rune(shapedInput.String())) == len([]rune(input)) {
+	if len([]rune(shapedInput.String())) == len([]rune(input))-countIgnored {
 		return reverse(shapedInput.String())
 	}
 
@@ -439,7 +477,7 @@ func shapeWord(input string) string {
 
 }
 
-//reverse the arabic string for RTL support in rendering
+// reverse the arabic string for RTL support in rendering
 func reverse(s string) string {
 	runes := []rune(s)
 	for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
@@ -448,8 +486,13 @@ func reverse(s string) string {
 	return string(runes)
 }
 
-//adjustLetter will adjust the arabic letter depending on its position
+// adjustLetter will adjust the arabic letter depending on its position
 func adjustLetter(g letterGroup) rune {
+	g.letter = fixLamAlef(g)
+
+	if g.letter == 0 {
+		return 0
+	}
 
 	switch {
 	//Inbetween 2 letters
@@ -475,9 +518,9 @@ func adjustLetter(g letterGroup) rune {
 	}
 }
 
-//Check if the letter is always .Initial
+// Check if the letter is always .Initial
 func isAlwaysInitial(letter rune) bool {
-	alwaysInitial := [13]rune{'\u0627', '\u0623', '\u0622', '\u0625', '\u0649', '\u0621', '\u0624', '\u0629', '\u062f', '\u0630', '\u0631', '\u0632', '\u0648'}
+	alwaysInitial := [14]rune{'\u0627', '\u0623', '\u0622', '\u0625', '\u0649', '\u0621', '\u0624', '\u0629', '\u062f', '\u0630', '\u0631', '\u0632', '\u0648', '\u0698'}
 	for _, item := range alwaysInitial {
 		if item == letter {
 			return true
@@ -486,12 +529,12 @@ func isAlwaysInitial(letter rune) bool {
 	return false
 }
 
-//IsArabicLetter checks if the letter is arabic
+// IsArabicLetter checks if the letter is arabic
 func IsArabicLetter(ch rune) bool {
 	return (ch >= 0x600 && ch <= 0x6FF)
 }
 
-//IsArabic checks if the input string contains arabic unicode only
+// IsArabic checks if the input string contains arabic unicode only
 func IsArabic(input string) bool {
 
 	var isArabic = true
@@ -503,7 +546,7 @@ func IsArabic(input string) bool {
 	return isArabic
 }
 
-//ToArabicDigits will convert english numbers to arabic numbers in text
+// ToArabicDigits will convert english numbers to arabic numbers in text
 func ToArabicDigits(input string) string {
 	return strings.NewReplacer(
 		"0", "٠",
@@ -519,7 +562,23 @@ func ToArabicDigits(input string) string {
 	).Replace(input)
 }
 
-//ToEnglishDigits will convert arabic numbers to english numbers in text
+// ToPersianDigits will convert english numbers to persian numbers in text
+func ToPersianDigits(input string) string {
+	return strings.NewReplacer(
+		"0", "٠",
+		"1", "١",
+		"2", "٢",
+		"3", "٣",
+		"4", "۴",
+		"5", "۵",
+		"6", "۶",
+		"7", "٧",
+		"8", "٨",
+		"9", "٩",
+	).Replace(input)
+}
+
+// ToEnglishDigits will convert arabic numbers to english numbers in text
 func ToEnglishDigits(input string) string {
 	return strings.NewReplacer(
 		"٠", "0",