In [4]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# --- GPU setup: enable memory growth so TF doesn’t grab all GPU RAM ---
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
print("Num GPUs Available:", len(gpus))

def load_magnitude_data(base_dir):
    """
    Scans base_dir/{fake,real} for CSVs, reads only the 'magnitude' column,
    and returns (X, y) where y=0 for fake, 1 for real.
    """
    X, y = [], []
    counts = {'fake': 0, 'real': 0}
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        print(f"→ Scanning {cls_path!r} for .csv files...")
        for csv_file in glob.glob(os.path.join(cls_path, '*.csv')):
            df = pd.read_csv(csv_file, usecols=['magnitude'])
            X.append(df['magnitude'].values)
            y.append(label)
            counts[cls] += 1
        print(f"   Loaded {counts[cls]} '{cls}' samples")
    X = np.array(X)
    y = np.array(y)
    print(f"Total loaded from {base_dir!r}: {len(X)} samples\n")
    return X, y

def build_model(input_dim):
    model = Sequential([
        Dense(1024, activation='relu', input_shape=(input_dim,)),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid'),
    ])
    model.compile(
        optimizer=Adam(1e-3),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

if __name__ == "__main__":
    # --- Load data ---
    print("=== LOADING TRAINING DATA ===")
    X_train, y_train = load_magnitude_data('./FFT_Audio/for-norm/for-norm/training/')
    print("=== LOADING VALIDATION DATA ===")
    X_val,   y_val   = load_magnitude_data('./FFT_Audio//for-norm/for-norm/validation/')
    print("=== LOADING TEST DATA ===")
    X_test,  y_test  = load_magnitude_data('./FFT_Audio//for-norm/for-norm/testing/')

    # --- Inspect shapes before scaling ---
    print(f"Shapes before scaling: X_train={X_train.shape}, X_val={X_val.shape}, X_test={X_test.shape}\n")

    # --- Scale features ---
    print("=== SCALING FEATURES ===")
    scaler  = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val   = scaler.transform(X_val)
    X_test  = scaler.transform(X_test)
    print("Scaling complete.\n")

    # --- Build & summarize model ---
    print("=== BUILDING MODEL ===")
    model = build_model(X_train.shape[1])
    model.summary()
    print()

    # --- Train on GPU if available ---
    print("=== STARTING TRAINING ===")
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=32,
        verbose=1  # show per-epoch progress bar
    )

    # --- Evaluate ---
    print("\n=== EVALUATING ON TEST SET ===")
    loss, acc = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}\n")

    # --- Save ---
    print("=== SAVING MODEL ===")
    model.save('voice_magnitude_classifier.h5')
    print("Model saved to voice_magnitude_classifier.h5")

Num GPUs Available: 1
=== LOADING TRAINING DATA ===
→ Scanning './FFT_Audio/for-norm/for-norm/training/fake' for .csv files...


KeyboardInterrupt: 

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
from concurrent.futures import ThreadPoolExecutor, as_completed
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import mixed_precision
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# --- GPU setup: memory growth + mixed precision ---
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
print("Num GPUs Available:", len(gpus))

mixed_precision.set_global_policy('mixed_float16')
print("Mixed precision policy:", mixed_precision.global_policy().name)

def _read_csv_file(path_label):
    path, label = path_label
    df = pd.read_csv(path, usecols=['magnitude'])
    vec = df['magnitude'].values.astype(np.float32)
    return vec, label

def load_magnitude_data(base_dir):
    # Find alle filer og tæll hvor mange
    files_and_labels = []
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        for csv_file in glob.glob(os.path.join(cls_path, '*.csv')):
            files_and_labels.append((csv_file, label))

    total_files = len(files_and_labels)
    print(f"→ Found {total_files} CSV files in {base_dir!r}")

    X_list, y_list = [], []
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as exe:
        futures = {exe.submit(_read_csv_file, pl): pl for pl in files_and_labels}
        for i, fut in enumerate(as_completed(futures), start=1):
            vec, lbl = fut.result()
            X_list.append(vec)
            y_list.append(lbl)
            # Du kan fjerne linjen herunder, hvis du ikke ønsker live-status:
            print(f"   Read file {i}/{total_files}")

    print(f"→ Completed reading {len(X_list)} files\n")

    # Byg arrays
    X = np.vstack([x[np.newaxis, :] for x in X_list])
    y = np.array(y_list, dtype=np.int32)
    return X, y

def build_model(input_dim):
    model = Sequential([
        Dense(1024, activation='relu', input_shape=(input_dim,)),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid', dtype='float32'),
    ])
    model.compile(
        optimizer=Adam(1e-3),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

2025-05-03 15:20:35.504389: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746285635.564213    8212 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746285635.581293    8212 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746285635.732859    8212 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746285635.732887    8212 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746285635.732889    8212 computation_placer.cc:177] computation placer alr

Num GPUs Available: 1
Mixed precision policy: mixed_float16


In [2]:
print("=== LOADING TRAINING DATA ===")
X_train, y_train = load_magnitude_data('./FFT_Audio/for-norm/for-norm/training/')
print("=== LOADING VALIDATION DATA ===")
X_val,   y_val   = load_magnitude_data('./FFT_Audio/for-norm/for-norm/validation/')
print("=== LOADING TEST DATA ===")
X_test,  y_test  = load_magnitude_data('./FFT_Audio/for-norm/for-norm/testing/')

=== LOADING TRAINING DATA ===
→ Found 53868 CSV files in './FFT_Audio/for-norm/for-norm/training/'
   Read file 1/53868
   Read file 2/53868
   Read file 3/53868
   Read file 4/53868
   Read file 5/53868
   Read file 6/53868
   Read file 7/53868
   Read file 8/53868
   Read file 9/53868
   Read file 10/53868
   Read file 11/53868
   Read file 12/53868
   Read file 13/53868
   Read file 14/53868
   Read file 15/53868
   Read file 16/53868
   Read file 17/53868
   Read file 18/53868
   Read file 19/53868
   Read file 20/53868
   Read file 21/53868
   Read file 22/53868
   Read file 23/53868
   Read file 24/53868
   Read file 25/53868
   Read file 26/53868
   Read file 27/53868
   Read file 28/53868
   Read file 29/53868
   Read file 30/53868
   Read file 31/53868
   Read file 32/53868
   Read file 33/53868
   Read file 34/53868
   Read file 35/53868
   Read file 36/53868
   Read file 37/53868
   Read file 38/53868
   Read file 39/53868
   Read file 40/53868
   Read file 41/53868
   Read 

In [1]:
print(f"Shapes before scaling: X_train={X_train.shape}, X_val={X_val.shape}, X_test={X_test.shape}\n")

print("=== SCALING FEATURES ===")
scaler  = StandardScaler()
X_train = scaler.fit_transform(X_train).astype(np.float32)
X_val   = scaler.transform(X_val).astype(np.float32)
X_test  = scaler.transform(X_test).astype(np.float32)
# Labels som int32 (eller float32 hvis du foretrækker)
# y_train = y_train.astype(np.int32)
# y_val   = y_val.astype(np.int32)
# y_test  = y_test.astype(np.int32)
print("Scaling + casting complete.\n")

NameError: name 'X_train' is not defined

In [None]:
strategy = tf.distribute.MirroredStrategy() if len(gpus) > 1 else tf.distribute.get_strategy()
with strategy.scope():
    model = build_model(X_train.shape[1])
model.summary()

batch_size = 32
train_ds = (tf.data.Dataset.from_tensor_slices((X_train, y_train))
            .shuffle(1024).batch(batch_size).prefetch(tf.data.AUTOTUNE))
val_ds   = (tf.data.Dataset.from_tensor_slices((X_val, y_val))
            .batch(batch_size).prefetch(tf.data.AUTOTUNE))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1746286280.152147    8212 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1768 MB memory:  -> device: 0, name: NVIDIA RTX A1000 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


2025-05-03 15:31:35.622660: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 8610476592 exceeds 10% of free system memory.
2025-05-03 15:31:49.295080: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:501] Allocator (GPU_0_bfc) ran out of memory trying to allocate 8.02GiB (rounded to 8610476800)requested by op _EagerConst
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2025-05-03 15:31:49.295116: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1058] BFCAllocator dump for GPU_0_bfc
2025-05-03 15:31:49.295128: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1065] Bin (256): 	Total Chunks: 23, Chunks in use: 23. 5.8KiB allocated for chunks. 5.8KiB in use in bin. 644B client-requested in use in bin.
2025-05-03 15:31:49.295133: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [None]:

print("=== STARTING TRAINING ===")
history = model.fit(train_ds, validation_data=val_ds, epochs=20, verbose=1)

print("\n=== EVALUATING ON TEST SET ===")
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)
loss, acc = model.evaluate(test_ds, verbose=1)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}\n")

print("=== SAVING MODEL ===")
model.save('voice_magnitude_classifier.h5')
print("Model saved to voice_magnitude_classifier.h5")


In [4]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# --- GPU setup: enable memory growth so TF doesn’t grab all GPU RAM ---
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
print("Num GPUs Available:", len(gpus))

from concurrent.futures import ThreadPoolExecutor, as_completed
import os
import glob
import numpy as np
import pandas as pd

def load_magnitude_data(base_dir):
    """
    Scans base_dir/{fake,real} for CSVs in parallel, reads only the 'magnitude' column,
    casts to float32 for GPU-venlighed, og returnerer (X, y) som NumPy-arrays.
    Printer hvor mange filer der er fundet og læst.
    """
    # 1) Find alle CSV-filer med label
    files_and_labels = []
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        files = glob.glob(os.path.join(cls_path, '*.csv'))
        print(f"→ Found {len(files)} files in '{cls_path}'")
        for f in files:
            files_and_labels.append((f, label))
    total_files = len(files_and_labels)
    print(f"→ Total files to read: {total_files}\n")

    # 2) Parallelt I/O: læs hver fil i sin egen tråd
    X_list, y_list = [], []
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        future_to_path = {
            executor.submit(pd.read_csv, path, usecols=['magnitude']): (path, lbl)
            for path, lbl in files_and_labels
        }
        for i, fut in enumerate(as_completed(future_to_path), start=1):
            path, lbl = future_to_path[fut]
            df = fut.result()
            vec = df['magnitude'].values.astype(np.float32)   # GPU-venlig dtype
            X_list.append(vec)
            y_list.append(lbl)
            # print(f"   [{i}/{total_files}] Read")

    # 3) Sammensæt til arrays
    X = np.vstack([x[np.newaxis, :] for x in X_list])
    y = np.array(y_list, dtype=np.int32)
    print(f"\n→ Completed loading {len(X)} samples ({np.sum(y==0)} fake, {np.sum(y==1)} real)\n")
    return X, y


def build_model(input_dim):
    model = Sequential([
        Dense(1024, activation='relu', input_shape=(input_dim,)),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid'),
    ])
    model.compile(
        optimizer=Adam(1e-3),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

2025-05-03 19:13:11.797334: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746299591.853524   31244 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746299591.872747   31244 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746299592.024593   31244 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746299592.024621   31244 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746299592.024622   31244 computation_placer.cc:177] computation placer alr

Num GPUs Available: 1


In [5]:

# --- Load data ---
print("=== LOADING TRAINING DATA ===")
X_train, y_train = load_magnitude_data('./FFT_Audio/for-norm/for-norm/training')
print("=== LOADING VALIDATION DATA ===")
X_val,   y_val   = load_magnitude_data('./FFT_Audio/for-norm/for-norm/validation')
print("=== LOADING TEST DATA ===")
X_test,  y_test  = load_magnitude_data('./FFT_Audio/for-norm/for-norm/testing')

# --- Inspect shapes before scaling ---
print(f"Shapes before scaling: X_train={X_train.shape}, X_val={X_val.shape}, X_test={X_test.shape}\n")

=== LOADING TRAINING DATA ===
→ Found 26927 files in './FFT_Audio/for-norm/for-norm/training/fake'
→ Found 26941 files in './FFT_Audio/for-norm/for-norm/training/real'
→ Total files to read: 53868


→ Completed loading 53868 samples (26927 fake, 26941 real)

=== LOADING VALIDATION DATA ===
→ Found 5398 files in './FFT_Audio/for-norm/for-norm/validation/fake'
→ Found 5400 files in './FFT_Audio/for-norm/for-norm/validation/real'
→ Total files to read: 10798


→ Completed loading 10798 samples (5398 fake, 5400 real)

=== LOADING TEST DATA ===
→ Found 2370 files in './FFT_Audio/for-norm/for-norm/testing/fake'
→ Found 2264 files in './FFT_Audio/for-norm/for-norm/testing/real'
→ Total files to read: 4634


→ Completed loading 4634 samples (2370 fake, 2264 real)

Shapes before scaling: X_train=(53868, 39961), X_val=(10798, 39961), X_test=(4634, 39961)



In [None]:

# --- Scale features ---
# print("=== SCALING FEATURES ===")
# scaler  = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_val   = scaler.transform(X_val)
# X_test  = scaler.transform(X_test)
# print("Scaling complete.\n")
print("=== SCALING FEATURES (float32 + in-place) ===")

# 1) Cast everything to float32 up front
X_train = X_train.astype(np.float32)
X_val   = X_val.astype(np.float32)
X_test  = X_test.astype(np.float32)

# 2) Instruct StandardScaler _not_ to make a copy
scaler = StandardScaler(copy=False)
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

print("Scaling + casting complete.\n")

=== SCALING FEATURES (float32 + in-place) ===


: 

: 

: 

In [None]:

# --- Build & summarize model ---
print("=== BUILDING MODEL ===")
model = build_model(X_train.shape[1])
model.summary()
print()

In [None]:

# --- Train on GPU if available ---
print("=== STARTING TRAINING ===")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    verbose=1  # show per-epoch progress bar
)



In [None]:
# --- Evaluate ---
print("\n=== EVALUATING ON TEST SET ===")
loss, acc = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}\n")



In [None]:
# --- Save ---
print("=== SAVING MODEL ===")
model.save('voice_magnitude_classifier.h5')
print("Model saved to voice_magnitude_classifier.h5")