In [1]:
import pandas as pd

#importing libraries for different classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, NuSVC



In [2]:
#loading Data
voiceDataSet = pd.read_csv("voice.csv", encoding="utf-8")
voiceDataSet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3168 entries, 0 to 3167
Data columns (total 21 columns):
meanfreq    3168 non-null float64
sd          3168 non-null float64
median      3168 non-null float64
Q25         3168 non-null float64
Q75         3168 non-null float64
IQR         3168 non-null float64
skew        3168 non-null float64
kurt        3168 non-null float64
sp.ent      3168 non-null float64
sfm         3168 non-null float64
mode        3168 non-null float64
centroid    3168 non-null float64
meanfun     3168 non-null float64
minfun      3168 non-null float64
maxfun      3168 non-null float64
meandom     3168 non-null float64
mindom      3168 non-null float64
maxdom      3168 non-null float64
dfrange     3168 non-null float64
modindx     3168 non-null float64
label       3168 non-null object
dtypes: float64(20), object(1)
memory usage: 507.4+ KB


In [3]:
#converting label column from object to float/integer
voiceDataSet.label=[1 if each =="female" else 0 for each in voiceDataSet.label]
voiceDataSet.label.values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [8]:

#creating Module and factor
train = voiceDataSet.drop("label", axis=1)
test = voiceDataSet["label"]


#columnModuleTest = testData[["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Title", "Family Size", "Child", "Mother"]]

#data spliting
X_train, X_test, y_train, y_test = train_test_split(
     train, test, test_size=0.3, random_state=42)


#different classifiers
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="rbf", C=0.025, probability=True),
    NuSVC(probability=True),
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()
    ]


#result of classifiers in precision, accuracy, recall, fmeasure
for clf in classifiers:
    clf.fit(X_train, y_train)
    Y_pred = clf.predict(X_test)
    accuracy = accuracy_score(Y_pred, y_test)
    precision = precision_score(Y_pred, y_test)
    recall = recall_score(Y_pred, y_test)
    f_measure = f1_score(Y_pred, y_test)
    print('On',clf.__class__.__name__,'Accuracy is: ',accuracy*100,'%')
    print('On',clf.__class__.__name__,'Precision is: ',precision*100,'%')
    print('On',clf.__class__.__name__,'Recall is: ',recall*100,'%')
    print('On',clf.__class__.__name__,'F_Measure is: ',f_measure*100,'%')
    


On KNeighborsClassifier Accuracy is:  70.13669821240799 %
On KNeighborsClassifier Precision is:  68.58407079646017 %
On KNeighborsClassifier Recall is:  68.58407079646017 %
On KNeighborsClassifier F_Measure is:  68.58407079646017 %




On SVC Accuracy is:  61.51419558359621 %
On SVC Precision is:  78.76106194690266 %
On SVC Recall is:  56.86900958466453 %
On SVC F_Measure is:  66.04823747680891 %




On NuSVC Accuracy is:  83.59621451104101 %
On NuSVC Precision is:  80.97345132743364 %
On NuSVC Recall is:  83.94495412844036 %
On NuSVC F_Measure is:  82.43243243243244 %
On LogisticRegression Accuracy is:  90.22082018927445 %
On LogisticRegression Precision is:  84.2920353982301 %
On LogisticRegression Recall is:  94.54094292803971 %
On LogisticRegression F_Measure is:  89.12280701754386 %
On DecisionTreeClassifier Accuracy is:  96.21451104100946 %
On DecisionTreeClassifier Precision is:  96.68141592920354 %
On DecisionTreeClassifier Recall is:  95.41484716157204 %
On DecisionTreeClassifier F_Measure is:  96.04395604395603 %




On RandomForestClassifier Accuracy is:  97.89695057833859 %
On RandomForestClassifier Precision is:  97.78761061946902 %
On RandomForestClassifier Recall is:  97.78761061946902 %
On RandomForestClassifier F_Measure is:  97.78761061946902 %
On AdaBoostClassifier Accuracy is:  97.47634069400631 %
On AdaBoostClassifier Precision is:  96.68141592920354 %
On AdaBoostClassifier Recall is:  97.98206278026906 %
On AdaBoostClassifier F_Measure is:  97.32739420935413 %
On GradientBoostingClassifier Accuracy is:  97.58149316508938 %
On GradientBoostingClassifier Precision is:  98.00884955752213 %
On GradientBoostingClassifier Recall is:  96.93654266958424 %
On GradientBoostingClassifier F_Measure is:  97.46974697469749 %
On GaussianNB Accuracy is:  89.37960042060989 %
On GaussianNB Precision is:  88.05309734513274 %
On GaussianNB Recall is:  89.43820224719101 %
On GaussianNB F_Measure is:  88.74024526198438 %
On LinearDiscriminantAnalysis Accuracy is:  96.84542586750788 %
On LinearDiscriminantAna

