# Importing libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import shutil

# Extracting the dataset

In [None]:
!unzip /kaggle/input/in-the-wild-dataset/download -d /kaggle/working
!ls /kaggle/working/


# Finding the meta data file and moving it out

In [None]:
def move_file_outside_folder(folder_path, file_name, destination_path):
    """
    Moves a file outside a folder to a specified destination.

    Parameters:
    - folder_path (str): The path to the folder containing the file.
    - file_name (str): The name of the file to move.
    - destination_path (str): The path to the destination directory.

    Returns:
    - bool: True if the file was moved successfully, False otherwise.
    """
    try:
        # Construct full paths
        source_path = os.path.join(folder_path, file_name)
        destination_path = os.path.join(destination_path, file_name)

        # Move the file
        shutil.move(source_path, destination_path)

        return True
    except FileNotFoundError:
        print(f"File not found: {os.path.join(folder_path, file_name)}")
        return False

# Example usage:
folder_path = '/kaggle/working/release_in_the_wild/'
file_name = 'meta.csv'
destination_path = '/kaggle/working'
success = move_file_outside_folder(folder_path, file_name, destination_path)

if success:
    print(f"File '{file_name}' moved successfully to '{destination_path}'")

# Create new modified metadata file
### The metadata file had an additional column for the speaker's name and renaming the label to real and fake  

In [None]:
# Path to the CSV file
csv_path = '/kaggle/working/meta.csv'

# Read CSV file into a DataFrame
df = pd.read_csv(csv_path)

# Remove the 'speaker' column
df.drop(columns=['speaker'], inplace=True)

# Replace label values
df['label'] = df['label'].replace({'spoof': 'fake', 'bona-fide': 'real'})

# Save the modified DataFrame to a new CSV file without the index column
output_csv_path = '/kaggle/working/modified_meta.csv'
df.to_csv(output_csv_path, index=False)


# Moving the files into seperate folders for each label  

In [None]:
def process_files(folder_path, csv_path):
    # Sort files in the folder
    files = sorted(os.listdir(folder_path))
    
    # Read CSV file into a DataFrame
    df = pd.read_csv(csv_path)
    
    # Iterate over sorted files
    for filename in files:
        if filename.endswith('.wav'):
            file_id = os.path.splitext(filename)[0]  # Extract file ID from filename
            file_id_with_extension = filename  # Get file ID with extension for printing
            
            # Get label from DataFrame based on file ID
            label = df.loc[df['file'] == file_id_with_extension, 'label'].values[0]
            
            # Create folder for label if it doesn't exist
            label_folder = os.path.join(folder_path, label)
            os.makedirs(label_folder, exist_ok=True)
            
            # Move file to corresponding label folder
            source_file = os.path.join(folder_path, filename)
            destination_file = os.path.join(label_folder, filename)
            shutil.move(source_file, destination_file)
            print(f"Moved {filename} to {label_folder}")
    print("Data split completed successfully!")

folder_path = '/kaggle/working/release_in_the_wild'
csv_path = '/kaggle/working/modified_meta.csv'
process_files(folder_path, csv_path)