In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import  confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate

# X-Vector

In [21]:
def report_classifer(y_test,predicts,target_names):
    print("-----------------------------")
    print("Report Classifier")
    cm=confusion_matrix(y_test,predicts,labels=target_names)
    print(cm)
    print(classification_report(y_test, predicts, target_names=target_names))
    print("Report Classifier")
    print("-----------------------------")

In [9]:
def read_data_set(csv_vectore,number_feature=513):
    vdf=pd.read_csv(csv_vectore,header=None)
    columns=[ "f{}".format(i) for i in range(1,number_feature)]
    columns.append('class')
    vdf.columns=columns
    features=vdf.iloc[:,0:-1]
    dialects=vdf.iloc[:,-1]
    return features,dialects

In [10]:
features,dialects=read_data_set('kurdish_dialect_vectors/x-vectors.csv')

In [None]:
metrics=['precision_macro', 'recall_macro','f1_macro','accuracy']

# Evaluation on SVM

In [11]:
from sklearn.svm import SVC


In [14]:
clf = SVC(gamma='auto',random_state=42)
scores_svc = cross_validate(clf, features, dialects, cv=5,verbose=1,scoring=metrics)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.9min finished


In [30]:
scores_svc

{'fit_time': array([21.92436886, 27.82718873, 25.40692639, 26.72578549, 27.32287526]),
 'score_time': array([8.37715936, 9.64869094, 8.93532491, 9.10164309, 9.41169715]),
 'test_precision_macro': array([0.75629476, 0.92402513, 0.84106935, 0.91252354, 0.92649355]),
 'test_recall_macro': array([0.77174641, 0.88604648, 0.79784286, 0.90938627, 0.92513816]),
 'test_f1_macro': array([0.75039506, 0.89693276, 0.80596344, 0.90883399, 0.92552472]),
 'test_accuracy': array([0.7549588 , 0.91211474, 0.83307904, 0.90357034, 0.92246642])}

In [31]:
print("Precision on CSV: ", round(scores_svc["test_precision_macro"].mean(),2))
print("Recall on CSV : ", round(scores_svc["test_recall_macro"].mean(),2))
print("F1_score  on CSV: ", round(scores_svc["test_f1_macro"].mean(),2))
print("Acuracy  on CSV: ", round(scores_svc["test_accuracy"].mean(),2))

Precision on CSV:  0.87
Recall on CSV :  0.86
F1_score  on CSV:  0.86
Acuracy  on CSV:  0.87


# Evaluation On Decision Tree

In [17]:
from sklearn.tree import DecisionTreeClassifier

In [18]:
dt = DecisionTreeClassifier(criterion='entropy',random_state=42)
scores_dt = cross_validate(dt, features, dialects, cv=5,verbose=1,scoring=metrics)


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.6min finished


In [19]:
scores_dt

{'fit_time': array([19.30527163, 18.89547205, 18.76223326, 20.17893291, 18.53920627]),
 'score_time': array([0.04593062, 0.04519272, 0.04411554, 0.04695487, 0.04546976]),
 'test_precision_macro': array([0.55347717, 0.63650537, 0.63393178, 0.70664886, 0.70429903]),
 'test_recall_macro': array([0.56215631, 0.61529657, 0.6118164 , 0.69584829, 0.70414667]),
 'test_f1_macro': array([0.55395708, 0.61896913, 0.61715293, 0.69558588, 0.70361734]),
 'test_accuracy': array([0.5627098 , 0.64418676, 0.6396094 , 0.69209643, 0.7014652 ])}

In [20]:
print("Precision on DT: ", round(scores_dt["test_precision_macro"].mean(),2))
print("Recall on DT: ", round(scores_dt["test_recall_macro"].mean(),2))
print("F1_score on DT: ", round(scores_dt["test_f1_macro"].mean(),2))
print("Acuracy on DT: ", round(scores_dt["test_accuracy"].mean(),2))

Precision on DT:  0.65
Recall on DT:  0.64
F1_score on DT:  0.64
Acuracy on DT:  0.65


---------------

# Conusion Matrix on SVM

In [22]:
from sklearn.model_selection import cross_val_predict


In [24]:
labels=["hawrami", "kalhori", "kurmanji","sorani","zazaki"]

In [26]:
clf = SVC(gamma='auto',random_state=42)
y_pred = cross_val_predict(clf, features, dialects, cv=5,verbose=1)


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.9min finished


In [27]:
report_classifer(dialects, y_pred,labels)

-----------------------------
Report Classifier
[[1508   50   71  201   46]
 [  95 3294  100   72  119]
 [ 117   47 2907  371  161]
 [  49   26  407 2869   35]
 [  50   35  131   25 3598]]
              precision    recall  f1-score   support

     hawrami       0.83      0.80      0.82      1876
     kalhori       0.95      0.90      0.92      3680
    kurmanji       0.80      0.81      0.81      3603
      sorani       0.81      0.85      0.83      3386
      zazaki       0.91      0.94      0.92      3839

    accuracy                           0.87     16384
   macro avg       0.86      0.86      0.86     16384
weighted avg       0.87      0.87      0.87     16384

Report Classifier
-----------------------------


------------------------------

# Confusion Matrix on DT

In [28]:
dt = DecisionTreeClassifier(criterion='entropy',random_state=42)
y_pred = cross_val_predict(dt, features, dialects, cv=5,verbose=1)


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.6min finished


In [29]:
report_classifer(dialects, y_pred,labels)

-----------------------------
Report Classifier
[[1055  194  205  234  188]
 [ 246 2435  290  383  326]
 [ 131  212 2135  801  324]
 [ 145  155  811 2077  198]
 [ 140  230  321  233 2915]]
              precision    recall  f1-score   support

     hawrami       0.61      0.56      0.59      1876
     kalhori       0.75      0.66      0.71      3680
    kurmanji       0.57      0.59      0.58      3603
      sorani       0.56      0.61      0.58      3386
      zazaki       0.74      0.76      0.75      3839

    accuracy                           0.65     16384
   macro avg       0.65      0.64      0.64     16384
weighted avg       0.65      0.65      0.65     16384

Report Classifier
-----------------------------
