In [1]:
!pip install opendatasets librosa gradio resampy

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Collecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: resampy, opendatasets
Successfully installed opendatasets-0.1.22 resampy-0.4.3


In [2]:
# model.py

%%writefile model.py

import tensorflow as tf
from tensorflow.keras.models import Sequential # <--- Unutulan kısım burasıydı!
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, LSTM, BatchNormalization

def create_hybrid_model(input_shape, num_classes):
    model = Sequential()

    # CNN Kısmı (Özellik Çıkarıcı)
    # BatchNormalization ekledik, eğitimi hızlandırır
    model.add(Conv1D(128, kernel_size=5, padding='same', activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))

    model.add(Conv1D(64, kernel_size=5, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    # LSTM Kısmı (Zaman Analizi)
    # CNN çıkışını LSTM'e veriyoruz. Flatten yapmıyoruz!
    # return_sequences=True diyoruz çünkü bir sonraki de LSTM katmanı
    model.add(LSTM(64, return_sequences=True))
    model.add(LSTM(32)) # Son karar için tek vektör

    # Sınıflandırma Kısmı
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

Writing model.py


In [3]:
# train.py (GÜNCELLENMİŞ - NORMALİZASYON EKLENDİ)

%%writefile train.py

import os
import librosa
import numpy as np
import pandas as pd
import opendatasets as od
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from model import create_hybrid_model

# --- 1. VERİ SETLERİNİ İNDİR ---
# TESS İndir
if not os.path.exists("./toronto-emotional-speech-set-tess"):
    od.download("https://www.kaggle.com/datasets/ejlok1/toronto-emotional-speech-set-tess")

# RAVDESS İndir (YENİ!)
if not os.path.exists("./ravdess-emotional-speech-audio"):
    od.download("https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio")

DATA_PATH_TESS = "/content/toronto-emotional-speech-set-tess/TESS Toronto emotional speech set data"
DATA_PATH_RAVDESS = "/content/ravdess-emotional-speech-audio"

MODEL_SAVE_PATH = "duygu_modeli.h5"
LABELS_SAVE_PATH = "etiketler.npy"
SCALER_SAVE_PATH = "scaler.save"

# --- AUGMENTATION ---
def noise(data):
    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
    data = data + noise_amp * np.random.normal(size=data.shape[0])
    return data

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor)

# --- VERİ İŞLEME VE BİRLEŞTİRME ---
data = []
print("Veriler taranıyor (TESS + RAVDESS)...")

# 1. TESS VERİLERİNİ OKU
print("TESS işleniyor...")
for dizin, alt_dizinler, dosyalar in os.walk(DATA_PATH_TESS):
    for dosya in dosyalar:
        if dosya.endswith(".wav"):
            # TESS formatı: OAF_happy.wav -> 'happy'
            duygu = dosya.split('_')[-1].split('.')[0].lower()
            dosya_tam_yolu = os.path.join(dizin, dosya)
            try:
                audio, sr = librosa.load(dosya_tam_yolu, res_type='kaiser_fast')
                # TESS verisini ekle (Augmentation ile)
                data.append([np.mean(librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40).T, axis=0), duygu])
                data.append([np.mean(librosa.feature.mfcc(y=noise(audio), sr=sr, n_mfcc=40).T, axis=0), duygu])
            except: pass

# 2. RAVDESS VERİLERİNİ OKU (YENİ!)
print("RAVDESS işleniyor...")
# RAVDESS Duygu Haritası (Sayı -> Yazı)
ravdess_map = {
    '01': 'neutral', '02': 'neutral', # Calm'ı da Neutral yapıyoruz
    '03': 'happy', '04': 'sad', '05': 'angry',
    '06': 'fear', '07': 'disgust', '08': 'surprise'
}

for dizin, alt_dizinler, dosyalar in os.walk(DATA_PATH_RAVDESS):
    for dosya in dosyalar:
        if dosya.endswith(".wav"):
            try:
                # Dosya ismi örn: 03-01-06-01-01-01-01.wav (3. parça '06' yani fear)
                parcalar = dosya.split('-')
                if len(parcalar) > 2:
                    duygu_kodu = parcalar[2]
                    if duygu_kodu in ravdess_map:
                        duygu = ravdess_map[duygu_kodu]

                        # TESS ile etiket uyumu sağlamak için 'surprise' -> 'pleasant_surprise' düzeltmesi
                        if duygu == 'surprise': duygu = 'pleasant_surprise'
                        if duygu == 'fear': duygu = 'fear' # TESS'te klasör adı neyse o olmalı

                        dosya_tam_yolu = os.path.join(dizin, dosya)
                        audio, sr = librosa.load(dosya_tam_yolu, res_type='kaiser_fast')

                        # RAVDESS verisini ekle (Sadece orjinal ve gürültülü)
                        data.append([np.mean(librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40).T, axis=0), duygu])
                        data.append([np.mean(librosa.feature.mfcc(y=noise(audio), sr=sr, n_mfcc=40).T, axis=0), duygu])
            except: pass

df = pd.DataFrame(data, columns=['Ozellikler', 'Duygu_Etiketi'])
print(f"Toplam Birleştirilmiş Veri Sayısı: {len(df)}")

# --- HAZIRLIK ---
X = np.array(df['Ozellikler'].tolist())
y = np.array(df['Duygu_Etiketi'].tolist())

# Scaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
joblib.dump(scaler, SCALER_SAVE_PATH)

# Etiketleme
le = LabelEncoder()
y_encoded = to_categorical(le.fit_transform(y))
np.save(LABELS_SAVE_PATH, le.classes_)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# --- EĞİTİM ---
print("Model iki veri setiyle eğitiliyor...")
model = create_hybrid_model(input_shape=(40, 1), num_classes=y_encoded.shape[1])

from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=0.0005)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
checkpoint = ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

history = model.fit(
    X_train_cnn, y_train,
    batch_size=64, # Veri çok olduğu için batch arttı
    epochs=50,
    validation_data=(X_test_cnn, y_test),
    callbacks=[early_stop, checkpoint],
    verbose=1
)

# Grafikler
plt.figure(figsize=(10,4))
plt.plot(history.history['accuracy'], label='Eğitim')
plt.plot(history.history['val_accuracy'], label='Test')
plt.title(f"Model Başarısı (Veri Sayısı: {len(df)})")
plt.legend()
plt.savefig("basari_grafigi.png")
plt.show()

y_pred = np.argmax(model.predict(X_test_cnn), axis=1)
y_true = np.argmax(y_test, axis=1)
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_, cmap='Blues')
plt.savefig("confusion_matrix.png")
plt.show()

Writing train.py


In [4]:
# serve.py (SCALER EKLENDİ)

%%writefile serve.py

import gradio as gr
import librosa
import numpy as np
import tensorflow as tf
import joblib # Scaler'ı yüklemek için

MODEL_PATH = "duygu_modeli.h5"
LABELS_PATH = "etiketler.npy"
SCALER_PATH = "scaler.save" # Scaler dosyamız

print("Sistem yükleniyor...")
model = tf.keras.models.load_model(MODEL_PATH)
etiketler = np.load(LABELS_PATH, allow_pickle=True)
scaler = joblib.load(SCALER_PATH) # Eğittiğimiz matematiği geri çağırıyoruz

def tahmin_et(ses_dosyasi):
    if ses_dosyasi is None:
        return "Lütfen ses dosyası yükleyin."

    # Sesi işle
    audio, sample_rate = librosa.load(ses_dosyasi, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_raw = np.mean(mfccs.T, axis=0)

    # --- KRİTİK ADIM: NORMALİZASYON ---
    # Gelen sesi de aynı şekilde sıkıştırıyoruz
    # (reshape(1, -1) tek bir örnek olduğu için gerekli)
    mfccs_std = scaler.transform(mfccs_scaled_raw.reshape(1, -1))

    # Modele uygun boyuta getir
    veri = mfccs_std.reshape(1, 40, 1)

    # Tahmin
    olasiliklar = model.predict(veri)
    tahmin_index = np.argmax(olasiliklar, axis=1)[0]
    sonuc = etiketler[tahmin_index]

    return f"Sonuç: {sonuc.upper()}"

interface = gr.Interface(
    fn=tahmin_et,
    inputs=gr.Audio(type="filepath", label="Test Et"),
    outputs="text",
    title="Hibrit DeepTone Analizi"
)

if __name__ == "__main__":
    interface.launch(share=True)

Writing serve.py


In [5]:
!python train.py

2025-12-26 09:48:10.427926: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766742490.473761     302 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766742490.494437     302 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766742490.561922     302 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766742490.561956     302 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766742490.561961     302 computation_placer.cc:177] computation placer alr

In [6]:
!python serve.py

2025-12-26 09:57:52.997963: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766743073.042570    3527 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766743073.062024    3527 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766743073.086165    3527 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766743073.086204    3527 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766743073.086213    3527 computation_placer.cc:177] computation placer alr