In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
shoaib2580_truthfull_path = kagglehub.dataset_download('shoaib2580/truthfull')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
from moviepy.editor import VideoFileClip

# Define source folders (read-only)
base_path = "/kaggle/input/truthfull/Real-life_Deception_Detection_2016/Clips"
deceptive_path = os.path.join(base_path, "Deceptive")
truthful_path = os.path.join(base_path, "Truthful")

# Define output folders (writable)
output_base = "/kaggle/working/audio"
deceptive_output = os.path.join(output_base, "Deceptive")
truthful_output = os.path.join(output_base, "Truthful")

# Create output directories if they don't exist
os.makedirs(deceptive_output, exist_ok=True)
os.makedirs(truthful_output, exist_ok=True)

def extract_audio(video_folder, output_folder):
    for filename in os.listdir(video_folder):
        if filename.endswith(".mp4"):
            video_path = os.path.join(video_folder, filename)
            audio_path = os.path.join(output_folder, filename.replace(".mp4", ".wav"))
            try:
                clip = VideoFileClip(video_path)
                clip.audio.write_audiofile(audio_path, verbose=False, logger=None)
                print(f"✅ Extracted: {filename}")
            except Exception as e:
                print(f"❌ Failed to extract {filename}: {e}")

# Extract from both folders
extract_audio(deceptive_path, deceptive_output)
extract_audio(truthful_path, truthful_output)


In [None]:
from IPython.display import Audio, display

# Example: play one Deceptive clip
audio_path = "/kaggle/working/audio/Truthful/trial_truth_002.wav"
display(Audio(audio_path))


# Normalization

In [None]:
import os
import librosa
import soundfile as sf

# Input folders
input_base = "/kaggle/working/audio"
deceptive_folder = os.path.join(input_base, "Deceptive")
truthful_folder = os.path.join(input_base, "Truthful")

# Output folders
processed_base = "/kaggle/working/processed_audio"
os.makedirs(processed_base, exist_ok=True)
deceptive_out = os.path.join(processed_base, "Deceptive")
truthful_out = os.path.join(processed_base, "Truthful")
os.makedirs(deceptive_out, exist_ok=True)
os.makedirs(truthful_out, exist_ok=True)

def preprocess_audio(input_path, output_path, sr=16000):
    try:
        audio, _ = librosa.load(input_path, sr=sr, mono=True)
        audio = librosa.util.normalize(audio)
        sf.write(output_path, audio, sr)
        return True
    except Exception as e:
        print(f"❌ Error: {e} for {input_path}")
        return False

def process_folder(input_folder, output_folder):
    for file in os.listdir(input_folder):
        if file.endswith(".wav"):
            inp = os.path.join(input_folder, file)
            out = os.path.join(output_folder, file)
            if preprocess_audio(inp, out):
                print(f"✅ Processed: {file}")

# Run preprocessing
print("🔄 Processing Deceptive audio...")
process_folder(deceptive_folder, deceptive_out)

print("🔄 Processing Truthful audio...")
process_folder(truthful_folder, truthful_out)


In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Paths
base_audio = "/kaggle/working/audio"
deceptive_audio = os.path.join(base_audio, "Deceptive")
truthful_audio = os.path.join(base_audio, "Truthful")

def extract_mfcc(file_path, n_mfcc=13):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc.T, axis=0)  # Flatten to 1D

# Load data
data = []
labels = []

# Deceptive
for file in os.listdir(deceptive_audio):
    if file.endswith(".wav"):
        path = os.path.join(deceptive_audio, file)
        mfcc_feat = extract_mfcc(path)
        data.append(mfcc_feat)
        labels.append(1)  # 1 for lie

# Truthful
for file in os.listdir(truthful_audio):
    if file.endswith(".wav"):
        path = os.path.join(truthful_audio, file)
        mfcc_feat = extract_mfcc(path)
        data.append(mfcc_feat)
        labels.append(0)  # 0 for truth

# Convert to DataFrame
X = np.array(data)
y = np.array(labels)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Evaluation
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Paths
base_audio = "/kaggle/working/audio"
deceptive_audio = os.path.join(base_audio, "Deceptive")
truthful_audio = os.path.join(base_audio, "Truthful")

# Function to extract MFCC features
def extract_mfcc(file_path, n_mfcc=13):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc.T, axis=0)  # Flatten to 1D

# Load data
data = []
labels = []

# Deceptive
for file in os.listdir(deceptive_audio):
    if file.endswith(".wav"):
        path = os.path.join(deceptive_audio, file)
        mfcc_feat = extract_mfcc(path)
        data.append(mfcc_feat)
        labels.append(1)  # 1 for lie

# Truthful
for file in os.listdir(truthful_audio):
    if file.endswith(".wav"):
        path = os.path.join(truthful_audio, file)
        mfcc_feat = extract_mfcc(path)
        data.append(mfcc_feat)
        labels.append(0)  # 0 for truth

# Convert to DataFrame
X = np.array(data)
y = np.array(labels)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an XGBoost classifier
clf = XGBClassifier(eval_metric='mlogloss', use_label_encoder=False)
clf.fit(X_train, y_train)

# Evaluation
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


# textual