In [35]:
import librosa
import mysql.connector
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import joblib
import time
import os

#Veri tabanına bağlanma
db = mysql.connector.connect(user=os.getenv("db_user"), password=os.getenv("db_password"), host='localhost', database=os.getenv("db_name"))

#Veri çekme    
cursor = db.cursor()
cursor.execute("select * from kayitlar")
results=cursor.fetchall()

labels=[]
features=[]

#Verilerden kişi isimleri ve ses özellikleri alınır
for sound_file in results:
    labels.append(sound_file[0])
    labels.append(sound_file[0])
    labels.append(sound_file[0])
    audio, sample_rate=librosa.load(sound_file[1])
    mfcc=librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=20)
    mfcc_mean = np.mean(mfcc.T, axis=0)  
    features.append(mfcc_mean)
    features.append(mfcc_mean)
    features.append(mfcc_mean)
        
db.close()

#Kişiler sayısallaştırılır
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
joblib.dump(label_encoder, "label_encoder.pkl")

#Veriler normalize edilir
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features)
joblib.dump(scaler, "scaler.pkl")

# Özellikler ve etiketleri pandas DataFrame'ine dönüştürüyoruz
df = pd.DataFrame(features_normalized)  # Özellikleri DataFrame'e dönüştürüyoruz
df['label'] = encoded_labels  # Etiketleri ekliyoruz

df.to_csv("ses.csv", index=False)

#Farklı modeller denenecek
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

models = [
    ("KNeighborsClassifier", KNeighborsClassifier()),
    ("SVC", SVC()),
    ("DecisionTreeClassifier", DecisionTreeClassifier()),
    ("RandomForestClassifier", RandomForestClassifier()),
    ("GaussianNB", GaussianNB()),
    ("LogisticRegression", LogisticRegression(max_iter=1000)),
    ("MLPClassifier", MLPClassifier(max_iter=1000)),
]

x_train, x_test, y_train, y_test = train_test_split(features_normalized, encoded_labels, test_size=0.2, random_state=25)

#Modeller eğitilip test ediliyor ve doğruluk oranları yazdırılıyor
for name, model in models:
    start_time = time.time()

    model.fit(x_train, y_train)  
    y_pred = model.predict(x_test)
    print(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    end_Time=time.time()-start_time

    results=[]

    results.append({
        "Algoritma":name,
        'Doğruluk': accuracy,
        'Zaman (s)': end_Time
    })
    
    print (results)

[1 4 5 4 5]
[{'Algoritma': 'KNeighborsClassifier', 'Doğruluk': 0.2, 'Zaman (s)': 0.0067861080169677734}]
[0 3 5 3 7]
[{'Algoritma': 'SVC', 'Doğruluk': 1.0, 'Zaman (s)': 0.0}]
[0 3 5 3 7]
[{'Algoritma': 'DecisionTreeClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.0}]
[0 3 5 3 7]
[{'Algoritma': 'RandomForestClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.06965279579162598}]
[0 3 5 3 7]
[{'Algoritma': 'GaussianNB', 'Doğruluk': 1.0, 'Zaman (s)': 0.0}]
[0 3 5 3 7]
[{'Algoritma': 'LogisticRegression', 'Doğruluk': 1.0, 'Zaman (s)': 0.007750511169433594}]
[0 3 5 3 7]
[{'Algoritma': 'MLPClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.040810346603393555}]


In [42]:
#Daha iyi sonuçlar alabilmek ve sütun sayısını azaltmak için feature selection yapılıyor

import pandas as pd
from sklearn.ensemble import RandomForestClassifier  

df = pd.read_csv('ses.csv')

x=df[["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19"]]
y=df["label"]

model = RandomForestClassifier  ()
model.fit(x, y)

#Özellik önem sıralaması
feature_importances = pd.DataFrame({
    "Feature": x.columns,
    "Importance": model.feature_importances_
}).sort_values(by="Importance", ascending=False)

print(feature_importances)

   Feature  Importance
17      17    0.062200
7        7    0.059026
6        6    0.058207
8        8    0.054749
2        2    0.054743
5        5    0.054069
11      11    0.052937
4        4    0.052731
16      16    0.052368
9        9    0.050873
0        0    0.050209
10      10    0.049226
3        3    0.047647
14      14    0.046962
15      15    0.046080
12      12    0.045439
18      18    0.045243
19      19    0.043488
1        1    0.038086
13      13    0.035716


In [43]:
#Feature selection yapıldı. Bundan sonra 1 ile 13. kolonlar kullanılmayacak
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

models = [
    ("KNeighborsClassifier", KNeighborsClassifier()),
    ("SVC", SVC()),
    ("DecisionTreeClassifier", DecisionTreeClassifier()),
    ("RandomForestClassifier", RandomForestClassifier()),
    ("GaussianNB", GaussianNB()),
    ("LogisticRegression", LogisticRegression(max_iter=1000)),
    ("MLPClassifier", MLPClassifier(max_iter=1000)),
]

#1 ve 13 yok
x=df[["0","2","3","4","5","6","7","8","9","10","11","12","14","15","16","17","18","19"]]
y=df["label"]

x_train, x_test, y_train, y_test = train_test_split(features_normalized, encoded_labels, test_size=0.2, random_state=25)

#Bir kere daha modeller deneniyor
for name, model in models:
    start_time = time.time()

    model.fit(x_train, y_train)  
    y_pred = model.predict(x_test)
    print(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    end_Time=time.time()-start_time

    results=[]

    results.append({
        "Algoritma":name,
        'Doğruluk': accuracy,
        'Zaman (s)': end_Time
    })
    
    print (results)

[1 4 5 4 5]
[{'Algoritma': 'KNeighborsClassifier', 'Doğruluk': 0.2, 'Zaman (s)': 0.012702703475952148}]
[0 3 5 3 7]
[{'Algoritma': 'SVC', 'Doğruluk': 1.0, 'Zaman (s)': 0.002000570297241211}]
[0 3 5 3 7]
[{'Algoritma': 'DecisionTreeClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.0010023117065429688}]
[0 3 5 3 7]
[{'Algoritma': 'RandomForestClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.08002662658691406}]
[0 3 5 3 7]
[{'Algoritma': 'GaussianNB', 'Doğruluk': 1.0, 'Zaman (s)': 0.0019774436950683594}]
[0 3 5 3 7]
[{'Algoritma': 'LogisticRegression', 'Doğruluk': 1.0, 'Zaman (s)': 0.0035610198974609375}]
[0 3 5 3 7]
[{'Algoritma': 'MLPClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.049480438232421875}]


In [6]:
import joblib 
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import pandas as pd

df=pd.read_csv("ses.csv")

x=df[["0","2","3","4","5","6","7","8","9","10","11","12","14","15","16","17","18","19"]]
y=df["label"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=25)

#En iyi model
model=DecisionTreeClassifier()
model.fit(x_train, y_train) 

#Model .joblib formatında kaydedilir
joblib.dump(model, "best_model.joblib")

['best_model.joblib']