In [2]:
import librosa
import mysql.connector
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import joblib
import time
import os
import random
from dotenv import load_dotenv

#Veri tabanına bağlanma
load_dotenv()
db = mysql.connector.connect(user=os.getenv("db_user"), password=os.getenv("db_password"), host='localhost', database=os.getenv("db_name"))

#Veri çekme    
cursor = db.cursor()
cursor.execute("select * from kayitlar")
results=cursor.fetchall()

labels=[]
features=[]

#Verilerden kişi isimleri ve ses özellikleri alınır
for sound_file in results:
    labels.append(sound_file[0])
    audio, sample_rate=librosa.load(sound_file[1])
    mfcc=librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=20)
    mfcc_mean = np.mean(mfcc.T, axis=0)  
    features.append(mfcc_mean)
        
db.close()

#Kişiler sayısallaştırılır
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
joblib.dump(label_encoder, "label_encoder.pkl")

#Veriler normalize edilir
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features)
joblib.dump(scaler, "scaler.pkl")

# Özellikler ve etiketleri pandas DataFrame'ine dönüştürüyoruz
df = pd.DataFrame(features_normalized)  # Özellikleri DataFrame'e dönüştürüyoruz
df['label'] = encoded_labels  # Etiketleri ekliyoruz

df.to_csv("ses.csv", index=False)

#Daha iyi sonuçlar alabilmek ve sütun sayısını azaltmak için feature selection yapılıyor

import pandas as pd
from sklearn.ensemble import RandomForestClassifier  

df = pd.read_csv('ses.csv')

x=df[["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19"]]
y=df["label"]

model = RandomForestClassifier  ()
model.fit(x, y)

#Özellik önem sıralaması
feature_importances = pd.DataFrame({
    "Feature": x.columns,
    "Importance": model.feature_importances_
}).sort_values(by="Importance", ascending=False)

print(feature_importances)

   Feature  Importance
2        2    0.062915
9        9    0.062226
10      10    0.060363
0        0    0.057269
1        1    0.056806
11      11    0.055739
15      15    0.054871
16      16    0.054817
5        5    0.054757
17      17    0.051471
4        4    0.048664
13      13    0.047049
6        6    0.046495
18      18    0.046465
3        3    0.044777
12      12    0.040202
14      14    0.040130
7        7    0.039164
8        8    0.038777
19      19    0.037042


In [None]:
#Feature selection yapıldı. Bundan sonra 1 ile 13. kolonlar kullanılmayacak
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import joblib 

models = [
    ("KNeighborsClassifier", KNeighborsClassifier()),
    ("SVC", SVC()),
    ("DecisionTreeClassifier", DecisionTreeClassifier()),
    ("RandomForestClassifier", RandomForestClassifier()),
    ("GaussianNB", GaussianNB()),
    ("LogisticRegression", LogisticRegression(max_iter=1000)),
    ("MLPClassifier", MLPClassifier(max_iter=1000)),
]

#1 ve 13 yok
x=df[["0","2","3","4","5","6","7","8","9","10","11","12","14","15","16","17","18","19"]]
y=df["label"]

x=x.to_numpy()
y=y.to_numpy()
train=random.randint(0,len(x)-4)

x_train=x
x_test=x[train:train+4]
y_train=y
y_test=y[train:train+4]

best_accuracy=0
best_time=10

#En iyi model seçiliyor
for name, model in models:
    start_time = time.time()

    model.fit(x_train, y_train)  
    y_pred = model.predict(x_test)
    print(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    end_Time=time.time()-start_time

    results=[]

    results.append({
        "Algoritma":name,
        'Doğruluk': accuracy,
        'Zaman (s)': end_Time
    })

    if best_accuracy<accuracy:
        if best_time>end_Time:
            #Model .joblib formatında kaydedilir
            joblib.dump(model, "best_model.joblib")
    
    print(results)

[1 3 1 4]
[{'Algoritma': 'KNeighborsClassifier', 'Doğruluk': 0.0, 'Zaman (s)': 0.24727725982666016}]
[13 10 11 12]
[{'Algoritma': 'SVC', 'Doğruluk': 1.0, 'Zaman (s)': 0.0}]
[13 10 11 12]
[{'Algoritma': 'DecisionTreeClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.0}]
[13 10 11 12]
[{'Algoritma': 'RandomForestClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.06473612785339355}]
[13 10 11 12]
[{'Algoritma': 'GaussianNB', 'Doğruluk': 1.0, 'Zaman (s)': 0.0}]
[13 10 11 12]
[{'Algoritma': 'LogisticRegression', 'Doğruluk': 1.0, 'Zaman (s)': 0.017269372940063477}]
[13 10 11 12]
[{'Algoritma': 'MLPClassifier', 'Doğruluk': 1.0, 'Zaman (s)': 0.0629119873046875}]
