In [1]:
import os
import pandas as pd
import soundfile as sf
import numpy as np
import matplotlib.pyplot as plt

# Path to the CSV file

csv_file_path = "C:\\Users\\djc426\\Documents\\data\\KSWSFilesCombined_gibbons.csv"

# Output directory
output_base_path = "C:\\Users\\djc426\\Desktop\\KSWS_WA_trial\\gibbons\\segments\\"

# Set random seed for reproducibility
np.random.seed(3)

# Total detections limit
total_detections = 150

# Read the CSV into a DataFrame
KSWSFilesCombined = pd.read_csv(csv_file_path)

# Remove rows with 'nocall' in the 'Common.Name' column
KSWSFilesCombined = KSWSFilesCombined[KSWSFilesCombined["Common.Name"] != 'nocall']

# Normalize file paths to handle backslashes
KSWSFilesCombined["Begin.Path"] = KSWSFilesCombined["Begin.Path"].str.replace("//", "\\", regex=False)

# Unique classes
unique_classes = KSWSFilesCombined["Species.Code"].unique()

# Initialize an empty DataFrame for random selections
random_selections_df = pd.DataFrame()

# Process each unique species
for unique_class in unique_classes:
    
# Filter based on species
    sample_table_sub = KSWSFilesCombined[KSWSFilesCombined["Species.Code"] == unique_class]

# Determine number of detections to select
    n_detections = min(len(sample_table_sub), total_detections)

# Randomly sample rows
    sampled_rows = sample_table_sub.sample(n=n_detections, random_state=42)  # Set random_state for reproducibility

    for _, temp_row in sampled_rows.iterrows():
        try:
            # Load FLAC audio
            flac_path = temp_row["Begin.Path"]
            print(temp_row["Begin.Path"])
            data, samplerate = sf.read(flac_path)
            
            # Extract a short audio segment
            begin_time = temp_row["Begin.Time..s."]
            end_time = temp_row["End.Time..s."]
            start_sample = max(0, int((begin_time - 3) * samplerate))
            end_sample = min(len(data), int((end_time + 3) * samplerate))
            temp_wav_short = data[start_sample:end_sample]

            # Prepare output directories
            spectro_dir = os.path.join(output_base_path, unique_class, "Spectro")
            wav_dir = os.path.join(output_base_path, unique_class, "Wav")
            os.makedirs(spectro_dir, exist_ok=True)
            os.makedirs(wav_dir, exist_ok=True)

            # Generate filenames
            base_name = f"{temp_row['Confidence']}_{temp_row['Species.Code']}_{temp_row['Begin.Time..s.']}_{temp_row['End.Time..s.']}_{temp_row['TempName']}"
            spectro_filename = os.path.join(spectro_dir, f"{base_name}.jpg")
            wav_filename = os.path.join(wav_dir, f"{base_name}.flac")

            # Save the spectrogram
            plt.figure(figsize=(10, 4))
            plt.specgram(temp_wav_short, Fs=samplerate, NFFT=1024, noverlap=512, cmap="viridis")
            plt.axis('off')
            plt.savefig(spectro_filename, bbox_inches='tight', pad_inches=0, dpi=50)
            plt.close()

            # Save the short FLAC file
            sf.write(wav_filename, temp_wav_short, samplerate)

            # Append the row to the random selections DataFrame
            random_selections_df = pd.concat([random_selections_df, pd.DataFrame([temp_row])])

        except Exception as e:
            print(f"Error processing file {flac_path}: {e}")

# Save the random selections to a CSV file
random_selections_csv = os.path.join(output_base_path, "RandomSelectionsDF.csv")
random_selections_df.to_csv(random_selections_csv, index=False)

print(f"Processing complete. Random selections saved to {random_selections_csv}")


S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T05_R1054-SD3833\S1139KS01_R1054-SD3833_20240513\S1139KS01_032K_WA-T05_R1054-SD3833_20240513_040003+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240428\S1139KS01_032K_WA-T01_R1443-SD3814_20240428_160007+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240428\S1139KS01_032K_WA-T01_R1443-SD3814_20240428_220009+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T04_R1453-SD3837\S1139KS01_R1453-SD3837_20240602\S1139KS01_032K_WA-T04_R1453-SD3837_20240602_130005+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240807\S1139KS01_032K_WA-T01_R1443-SD3814_2

  Z = 10. * np.log10(spec)


S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T05_R1054-SD3833\S1139KS01_R1054-SD3833_20240511\S1139KS01_032K_WA-T05_R1054-SD3833_20240511_120005+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240428\S1139KS01_032K_WA-T01_R1443-SD3814_20240428_090004+0700.flac


  Z = 10. * np.log10(spec)


S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T04_R1453-SD3837\S1139KS01_R1453-SD3837_20240602\S1139KS01_032K_WA-T04_R1453-SD3837_20240602_140006+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T08_R1460-SD3816\S1139KS01_R1460-SD3816_20240505\S1139KS01_032K_WA-T08_R1460-SD3816_20240505_160006+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240423\S1139KS01_032K_WA-T01_R1443-SD3814_20240423_133206+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240429\S1139KS01_032K_WA-T01_R1443-SD3814_20240429_160007+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T04_R1453-SD3837\S1139KS01_R1453-SD3837_20240601\S1139KS01_032K_WA-T04_R1453-SD3837_2

  Z = 10. * np.log10(spec)


S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240429\S1139KS01_032K_WA-T01_R1443-SD3814_20240429_130006+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240807\S1139KS01_032K_WA-T01_R1443-SD3814_20240807_180007+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T04_R1453-SD3837\S1139KS01_R1453-SD3837_20240603\S1139KS01_032K_WA-T04_R1453-SD3837_20240603_070004+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T08_R1460-SD3816\S1139KS01_R1460-SD3816_20240504\S1139KS01_032K_WA-T08_R1460-SD3816_20240504_170007+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240426\S1139KS01_032K_WA-T01_R1443-SD3814_2

  Z = 10. * np.log10(spec)


S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240426\S1139KS01_032K_WA-T01_R1443-SD3814_20240426_170007+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240424\S1139KS01_032K_WA-T01_R1443-SD3814_20240424_100004+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T02_R1463-SD3841\S1139KS01_R1463-SD3841_20240725\S1139KS01_032K_WA-T02_R1463-SD3841_20240725_040003+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T05_R1054-SD3833\S1139KS01_R1054-SD3833_20240511\S1139KS01_032K_WA-T05_R1054-SD3833_20240511_110005+0700.flac
S:\projects\2024_WCS_Cambodia_S1139\2024_WCS_Cambodia_S1139\S1139_Dep01_FLAC\S1139KS01_032K_WA-T01_R1443-SD3814\S1139KS01_R1443-SD3814_20240428\S1139KS01_032K_WA-T01_R1443-SD3814_2

KeyboardInterrupt: 

In [2]:
unique_classes

array(['GIB'], dtype=object)

In [2]:
# Normalize file paths to handle backslashes
KSWSFilesCombined["Begin.Path"] = KSWSFilesCombined["Begin.Path"].str.replace("\\", "\", regex=False)


SyntaxError: unterminated string literal (detected at line 2) (284041759.py, line 2)

In [8]:
flac_path = temp_row["Begin.Path"]
data, samplerate = sf.read(flac_path)