In [1]:
pip install tensorflow

^C
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow
  Downloading tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tens



In [2]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# --- GPU setup: enable memory growth so TF doesn’t grab all GPU RAM ---
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
print("Num GPUs Available:", len(gpus))

def load_magnitude_data(base_dir):
    """
    Scans base_dir/{fake,real} for CSVs, reads only the 'magnitude' column,
    and returns (X, y) where y=0 for fake, 1 for real.
    """
    X, y = [], []
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        for csv_file in glob.glob(os.path.join(cls_path, '*.csv')):
            df = pd.read_csv(csv_file, usecols=['magnitude'])
            X.append(df['magnitude'].values)
            y.append(label)
    return np.array(X), np.array(y)

def build_model(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_dim,)),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid'),
    ])
    model.compile(
        optimizer=Adam(1e-3),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

if __name__ == "__main__":
    # --- Load data ---
    X_train, y_train = load_magnitude_data('./for-2sec/for-2seconds/training')
    X_val,   y_val   = load_magnitude_data('./for-2sec/for-2seconds/validation')
    X_test,  y_test  = load_magnitude_data('./for-2sec/for-2seconds/testing')

    # --- Scale features ---
    scaler  = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val   = scaler.transform(X_val)
    X_test  = scaler.transform(X_test)

    # --- Build & summarize ---
    model = build_model(X_train.shape[1])
    model.summary()

    # --- Train on GPU if available ---
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=32
    )

    # --- Evaluate ---
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")

    # --- Save ---
    model.save('voice_magnitude_classifier.h5')
    print("Saved model to voice_magnitude_classifier.h5")

Num GPUs Available: 0


KeyboardInterrupt: 

In [5]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# --- GPU setup: enable memory growth so TF doesn’t grab all GPU RAM ---
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
print("Num GPUs Available:", len(gpus))

def load_magnitude_data(base_dir):
    """
    Scans base_dir/{fake,real} for CSVs, reads only the 'magnitude' column,
    and returns (X, y) where y=0 for fake, 1 for real.
    """
    X, y = [], []
    counts = {'fake': 0, 'real': 0}
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        print(f"→ Scanning {cls_path!r} for .csv files...")
        for csv_file in glob.glob(os.path.join(cls_path, '*.csv')):
            df = pd.read_csv(csv_file, usecols=['magnitude'])
            X.append(df['magnitude'].values)
            y.append(label)
            counts[cls] += 1
        print(f"   Loaded {counts[cls]} '{cls}' samples")
    X = np.array(X)
    y = np.array(y)
    print(f"Total loaded from {base_dir!r}: {len(X)} samples\n")
    return X, y

def build_model(input_dim):
    model = Sequential([
        Dense(1024, activation='relu', input_shape=(input_dim,)),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid'),
    ])
    model.compile(
        optimizer=Adam(1e-3),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

if __name__ == "__main__":
    # --- Load data ---
    print("=== LOADING TRAINING DATA ===")
    X_train, y_train = load_magnitude_data('./for-2sec/for-2seconds/training')
    print("=== LOADING VALIDATION DATA ===")
    X_val,   y_val   = load_magnitude_data('./for-2sec/for-2seconds/validation')
    print("=== LOADING TEST DATA ===")
    X_test,  y_test  = load_magnitude_data('./for-2sec/for-2seconds/testing')

    # --- Inspect shapes before scaling ---
    print(f"Shapes before scaling: X_train={X_train.shape}, X_val={X_val.shape}, X_test={X_test.shape}\n")

    # --- Scale features ---
    print("=== SCALING FEATURES ===")
    scaler  = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val   = scaler.transform(X_val)
    X_test  = scaler.transform(X_test)
    print("Scaling complete.\n")

    # --- Build & summarize model ---
    print("=== BUILDING MODEL ===")
    model = build_model(X_train.shape[1])
    model.summary()
    print()

    # --- Train on GPU if available ---
    print("=== STARTING TRAINING ===")
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=32,
        verbose=1  # show per-epoch progress bar
    )

    # --- Evaluate ---
    print("\n=== EVALUATING ON TEST SET ===")
    loss, acc = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}\n")

    # --- Save ---
    print("=== SAVING MODEL ===")
    model.save('voice_magnitude_classifier.h5')
    print("Model saved to voice_magnitude_classifier.h5")

Num GPUs Available: 0
=== LOADING TRAINING DATA ===
→ Scanning './for-2sec/for-2seconds/training\\fake' for .csv files...
   Loaded 6978 'fake' samples
→ Scanning './for-2sec/for-2seconds/training\\real' for .csv files...
   Loaded 6978 'real' samples
Total loaded from './for-2sec/for-2seconds/training': 13956 samples

=== LOADING VALIDATION DATA ===
→ Scanning './for-2sec/for-2seconds/validation\\fake' for .csv files...
   Loaded 1413 'fake' samples
→ Scanning './for-2sec/for-2seconds/validation\\real' for .csv files...
   Loaded 1413 'real' samples
Total loaded from './for-2sec/for-2seconds/validation': 2826 samples

=== LOADING TEST DATA ===
→ Scanning './for-2sec/for-2seconds/testing\\fake' for .csv files...
   Loaded 544 'fake' samples
→ Scanning './for-2sec/for-2seconds/testing\\real' for .csv files...
   Loaded 544 'real' samples
Total loaded from './for-2sec/for-2seconds/testing': 1088 samples

Shapes before scaling: X_train=(13956, 16000), X_val=(2826, 16000), X_test=(1088, 16

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



=== STARTING TRAINING ===
Epoch 1/20
[1m437/437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 54ms/step - accuracy: 0.6109 - loss: 3.6287 - val_accuracy: 0.7972 - val_loss: 0.5607
Epoch 2/20
[1m437/437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 52ms/step - accuracy: 0.7309 - loss: 0.5971 - val_accuracy: 0.8638 - val_loss: 0.3894
Epoch 3/20
[1m437/437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 53ms/step - accuracy: 0.8138 - loss: 0.4478 - val_accuracy: 0.8878 - val_loss: 0.2767
Epoch 4/20
[1m437/437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 53ms/step - accuracy: 0.8731 - loss: 0.3299 - val_accuracy: 0.9321 - val_loss: 0.2023
Epoch 5/20
[1m437/437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 55ms/step - accuracy: 0.8987 - loss: 0.2804 - val_accuracy: 0.9009 - val_loss: 0.2263
Epoch 6/20
[1m437/437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 55ms/step - accuracy: 0.9112 - loss: 0.2415 - val_accuracy: 0.9105 - val_lo



Test Loss: 0.5729, Test Accuracy: 0.8309

=== SAVING MODEL ===
Model saved to voice_magnitude_classifier.h5


TypeError: Model.save() missing 1 required positional argument: 'filepath'

In [3]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix
)
from tensorflow.keras.models import load_model

def load_magnitude_data(base_dir):
    X, y = [], []
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        for csv_file in glob.glob(os.path.join(cls_path, '*.csv')):
            df = pd.read_csv(csv_file, usecols=['magnitude'])
            X.append(df['magnitude'].values)
            y.append(label)
    return np.array(X), np.array(y)

if __name__ == "__main__":
    # 1. Load test data (make sure this matches how you processed it before)
    X_test, y_test = load_magnitude_data('./for-2sec/for-2seconds/testing')

    # 2. Reload the same scaler you used during training
    scaler = StandardScaler()
    # If you saved your scaler via joblib/pickle, load it here; otherwise re-fit on train.
    # For example:
    #   from joblib import load
    #   scaler = load('scaler.joblib')
    #
    # Here we’ll assume you refit it on train again:
    X_train_dummy, _ = load_magnitude_data('./for-2sec/for-2seconds/training')
    scaler.fit(X_train_dummy)
    X_test = scaler.transform(X_test)

    # 3. Load your trained model
    model = load_model('voice_magnitude_classifier.h5')

    # 4. Predict probabilities and classes
    y_prob = model.predict(X_test, verbose=0).ravel()
    y_pred = (y_prob >= 0.5).astype(int)

    # 5. Compute metrics
    acc   = accuracy_score(y_test, y_pred)
    prec  = precision_score(y_test, y_pred)
    rec   = recall_score(y_test, y_pred)
    f1    = f1_score(y_test, y_pred)
    cm    = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['fake','real'])

    # 6. Print results
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-Score : {f1:.4f}\n")
    print("Confusion Matrix:")
    print(cm, "\n")
    print("Full Classification Report:")
    print(report)


KeyboardInterrupt: 

In [4]:
#!/usr/bin/env python3
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix
)
from tensorflow.keras.models import load_model

def load_magnitude_data(base_dir):
    X, y = [], []
    counts = {'fake': 0, 'real': 0}
    for label, cls in enumerate(['fake', 'real']):
        cls_path = os.path.join(base_dir, cls)
        print(f">>> Scanning for CSVs in: {cls_path}")
        for csv_file in glob.glob(os.path.join(cls_path, '*.csv')):
            df = pd.read_csv(csv_file, usecols=['magnitude'])
            X.append(df['magnitude'].values)
            y.append(label)
            counts[cls] += 1
        print(f"    Loaded {counts[cls]} samples of class '{cls}'")
    X = np.array(X)
    y = np.array(y)
    print(f"<<< Finished loading from {base_dir}: total samples = {len(y)}\n")
    return X, y

if __name__ == "__main__":
    # 1. Load test data 
    print("=== STEP 1: LOAD TEST DATA ===")
    X_test, y_test = load_magnitude_data('./for-2sec/for-2seconds/testing')
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}\n")

    # 2. Reload and fit scaler on training data
    print("=== STEP 2: FIT SCALER ON TRAINING DATA ===")
    scaler = StandardScaler()
    X_train_dummy, _ = load_magnitude_data('./for-2sec/for-2seconds/training')
    print(f"X_train_dummy shape: {X_train_dummy.shape}")
    scaler.fit(X_train_dummy)
    print("Scaler mean (first 5):", np.round(scaler.mean_[:5], 5))
    X_test = scaler.transform(X_test)
    print("First row of X_test after scaling (first 5 features):", np.round(X_test[0, :5], 5), "\n")

    # 3. Load your trained model
    print("=== STEP 3: LOAD MODEL ===")
    model = load_model('voice_magnitude_classifier.h5')
    print(model.summary(), "\n")

    # 4. Predict probabilities and classes
    print("=== STEP 4: PREDICT ON TEST SET ===")
    y_prob = model.predict(X_test, verbose=1).ravel()
    y_pred = (y_prob >= 0.5).astype(int)
    print("Sample probabilities (first 10):", np.round(y_prob[:10], 4))
    print("Sample predictions  (first 10):", y_pred[:10], "\n")

    # 5. Compute metrics
    print("=== STEP 5: COMPUTE METRICS ===")
    acc   = accuracy_score(y_test, y_pred)
    prec  = precision_score(y_test, y_pred)
    rec   = recall_score(y_test, y_pred)
    f1    = f1_score(y_test, y_pred)
    cm    = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['fake','real'])

    # 6. Print results
    print(f"Accuracy  : {acc:.4f}")
    print(f"Precision : {prec:.4f}")
    print(f"Recall    : {rec:.4f}")
    print(f"F1-Score  : {f1:.4f}\n")

    print("Confusion Matrix:")
    print(cm, "\n")

    print("Full Classification Report:")
    print(report)

=== STEP 1: LOAD TEST DATA ===
>>> Scanning for CSVs in: ./for-2sec/for-2seconds/testing\fake
    Loaded 544 samples of class 'fake'
>>> Scanning for CSVs in: ./for-2sec/for-2seconds/testing\real
    Loaded 544 samples of class 'real'
<<< Finished loading from ./for-2sec/for-2seconds/testing: total samples = 1088

X_test shape: (1088, 16000), y_test shape: (1088,)

=== STEP 2: FIT SCALER ON TRAINING DATA ===
>>> Scanning for CSVs in: ./for-2sec/for-2seconds/training\fake
    Loaded 6978 samples of class 'fake'
>>> Scanning for CSVs in: ./for-2sec/for-2seconds/training\real
    Loaded 6978 samples of class 'real'
<<< Finished loading from ./for-2sec/for-2seconds/training: total samples = 13956

X_train_dummy shape: (13956, 16000)
Scaler mean (first 5): [7922714.51691  852276.64066  795753.57982  790667.69626  804376.5369 ]
First row of X_test after scaling (first 5 features): [-0.20899 -0.28763 -0.28234 -0.27971 -0.22428] 

=== STEP 3: LOAD MODEL ===




None 

=== STEP 4: PREDICT ON TEST SET ===
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Sample probabilities (first 10): [0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e-04 9.563e-01 0.000e+00
 2.246e-01 5.200e-03 5.035e-01]
Sample predictions  (first 10): [0 0 0 0 0 1 0 0 0 1] 

=== STEP 5: COMPUTE METRICS ===
Accuracy  : 0.8309
Precision : 0.7639
Recall    : 0.9577
F1-Score  : 0.8499

Confusion Matrix:
[[383 161]
 [ 23 521]] 

Full Classification Report:
              precision    recall  f1-score   support

        fake       0.94      0.70      0.81       544
        real       0.76      0.96      0.85       544

    accuracy                           0.83      1088
   macro avg       0.85      0.83      0.83      1088
weighted avg       0.85      0.83      0.83      1088

