# 1. Dataset load and Setting

In [None]:
!pip install praat-parselmouth

Collecting praat-parselmouth
  Downloading praat_parselmouth-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.9 kB)
Downloading praat_parselmouth-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: praat-parselmouth
Successfully installed praat-parselmouth-0.4.5


In [None]:
import parselmouth
from parselmouth.praat import call
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import google.colab.drive as drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
# data
input_folder = "/content/drive/MyDrive/AIFFELthon/Data/Sample/literature_100/sample" # Speech corpus (Raw data)
drive_csv_path = "/content/drive/MyDrive/AIFFELthon/Data/Literature/label_lit.csv" # Labeling data
full_data = pd.read_csv(drive_csv_path)

In [None]:
# save path
output_contour = "/content/pitch_contour_data.csv"
output_movement = "/content/pitch_movement_data.csv"
output_stylization_folder = "/content/processed_results_res2.0"  # 근접복사유형화 결과 파일을 저장할 폴더

# 2. Extract Pitch Contour

## Extract Pitch or PitchTier Object

In [None]:
# Pitch value 추출

def extract_pitch_values(audio_file):
    sound = parselmouth.Sound(audio_file)
    pitch = sound.to_pitch_ac(time_step=0.01, pitch_floor=50, pitch_ceiling=500) # time_step=0.01 (default)

    # Extract raw pitch values and timestamps
    pitch_values = pitch.selected_array['frequency']
    time_stamps = pitch.xs()

    # Filter out unvoiced segments
    valid_indices = pitch_values > 0
    pitch_values = pitch_values[valid_indices]
    time_stamps = time_stamps[valid_indices]

    return pitch_values, time_stamps

In [None]:
# PitchTier 및 임의의 Pitch value 생성 (근접 복사 유형화 위함)
# PitchTire 객체에서는 값 추출이 불가능함 (API 제공 X). Praat의 Synthesize > "To Pitch..." 이용하여 다시 pitch 객체로 변환

def stylize_pitch_tier_from_sound(audio_file, frequency_resolution=2.0, pitch_floor=50, pitch_ceiling=500):
    """
    음성 파일에서 PitchTier를 생성하고 유형화 합니다.
    """
    try:
        sound = parselmouth.Sound(audio_file)

        manipulation = call(sound, "To Manipulation", 0.01, pitch_floor, pitch_ceiling)
        pitch_tier = call(manipulation, "Extract pitch tier")

        call(pitch_tier, "Stylize...", frequency_resolution, "semitones")

        close_copy_pitch = call(pitch_tier, "To Pitch...", 0.01, pitch_floor, pitch_ceiling)
        stylized_pitch = close_copy_pitch.selected_array['frequency']
        stylized_time = close_copy_pitch.xs()

        if len(stylized_time) == 0 or len(stylized_pitch) == 0:
            raise ValueError("Stylized time or pitch is empty.")

        return np.array(stylized_time), np.array(stylized_pitch)

    except Exception as e:
        print(f"Failed to process {audio_file}: {e}")
        return np.array([]), np.array([])

## 2-1) Extract Pitch Values and Pitch Movement Slope
1. Extract and Modulate Pitch Contour: Using Threshold
2. Generate Pitch Movement Slope Data 

In [None]:
def get_pitch_over_threshold(audio_file, threshold=10.0):  # threshold=10으로 고정
    """
    Extract stylized pitch features including movement distance and slope from an audio file.
    """
    pitch_values, time_stamps = extract_pitch_values(audio_file)

    if len(pitch_values) == 0:
        print(f"No valid pitch values found in {audio_file}")
        return []

    applied_pitch = [pitch_values[0]]
    applied_time = [time_stamps[0]]

    for i in range(1, len(pitch_values)):
        if abs(pitch_values[i] - applied_pitch[-1]) >= threshold:
            applied_pitch.append(pitch_values[i])
            applied_time.append(time_stamps[i])

    applied_pitch, applied_time = np.array(applied_pitch), np.array(applied_time)

    # Visualize the results
    # visualize_pitch_values(applied_pitch, applied_time, title="Stylized Pitch Values")

    contour_data = []
    for i in range(1, len(applied_pitch)):
        contour_data.append({'time': applied_time[i], 'pitch': applied_pitch[i]})

    return contour_data

In [None]:
def calculate_movement_distance_and_slope(pitch_values, time_stamps):
    """
    음높이 움직임의 이동 거리와 기울기를 계산
    """
    # 음높이 값을 0~100 사이로 정규화
    min_pitch = np.min(pitch_values)
    max_pitch = np.max(pitch_values)
    normalized_pitch = (pitch_values - min_pitch) / (max_pitch - min_pitch) * 100

    # 시간 값을 0~100 사이로 정규화
    min_time = np.min(time_stamps)
    max_time = np.max(time_stamps)
    normalized_time = (time_stamps - min_time) / (max_time - min_time) * 100

    movement_data = []
    for i in range(1, len(normalized_pitch)):
        delta_pitch = normalized_pitch[i] - normalized_pitch[i - 1]
        delta_time = normalized_time[i] - normalized_time[i - 1]
        distance = np.sqrt(delta_pitch**2 + delta_time**2)
        slope = delta_pitch / delta_time if delta_time != 0 else 0
        movement_data.append({
            'start_time': normalized_time[i - 1],
            'end_time': normalized_time[i],
            'start_pitch': normalized_pitch[i - 1],
            'end_pitch': normalized_pitch[i],
            'distance': distance,
            'slope': slope
        })

    return movement_data

def extract_pitch_features_with_threshold_and_distance(audio_file, threshold=10.0):  # threshold 통한 pitch point 조정 과정 포함됨
    """
    Extract stylized pitch features including movement distance and slope from an audio file.
    """
    pitch_values, time_stamps = extract_pitch_values(audio_file)

    if len(pitch_values) == 0:
        print(f"No valid pitch values found in {audio_file}")
        return []

    stylized_pitch = [pitch_values[0]]
    stylized_time = [time_stamps[0]]

    for i in range(1, len(pitch_values)):
        if abs(pitch_values[i] - stylized_pitch[-1]) >= threshold:
            stylized_pitch.append(pitch_values[i])
            stylized_time.append(time_stamps[i])

    stylized_pitch, stylized_time = np.array(stylized_pitch), np.array(stylized_time)

    # Visualize the results
    # visualize_pitch_values(pitch_values, time_stamps, title="Raw Pitch Values")
    # visualize_pitch_values(stylized_pitch, stylized_time, title="Stylized Pitch Values")

    movement_data = calculate_movement_distance_and_slope(stylized_pitch, stylized_time)

    return movement_data

## 2-2) Intonation Curve Standardization
3. Close-Copy Stylization


In [None]:
def calculate_slopes(stylized_time, stylized_pitch):
    """
    유형화 된 Pitch 데이터를 바탕으로 각 구간의 기울기를 계산
    """
    slopes = []
    for i in range(1, len(stylized_time)):
        delta_pitch = stylized_pitch[i] - stylized_pitch[i - 1]
        delta_time = stylized_time[i] - stylized_time[i - 1]
        slope = delta_pitch / delta_time if delta_time != 0 else 0
        slopes.append(slope)
    return np.array(slopes)

def save_stylized_pitch(stylized_time, stylized_pitch, slopes, output_file):
    """
    유형화 된 음높이 데이터를 CSV 파일로 저장합니다.
    """
    # slopes의 길이를 stylized_time과 맞추기 위해 None 값을 추가
    if len(slopes) < len(stylized_time):
        slopes = list(slopes) + [None]  # 마지막 구간의 기울기 값 없음 처리

    data = {
        "Time (s)": stylized_time,
        "Pitch (Hz)": stylized_pitch,
        "Slope": slopes
    }
    df_t_s_slope = pd.DataFrame(data)
    df_t_s_slope.to_csv(output_file, index=False)
    print(f"Stylized pitch data saved to {output_file}")
    return df_t_s_slope



# 3. Apply methods and Pre-process Data

In [None]:
def get_pitch_contour_threshold_process(input_folder, output_file, threshold=10):
    """
    Processes all .wav files in a folder, extracts pitch features, and saves them to a CSV file.
    - threshold: Threshold for pitch stylization
    """
    f0_contour_values = []

    for file_name in os.listdir(input_folder):
        if file_name.endswith(".wav"):
            audio_file = os.path.join(input_folder, file_name)
            utterance_contour = get_pitch_over_threshold(audio_file, threshold)

            if isinstance(utterance_contour, list):
                utterance_contour = pd.DataFrame(utterance_contour)

            if not utterance_contour.empty:
                utterance_contour['file_name'] = file_name
                f0_contour_values.append(utterance_contour)

    if f0_contour_values:
        combine = pd.concat(f0_contour_values, ignore_index=True)
        save_to_csv(combine, output_file)
        print(f"All features saved to {output_file}.")
        return combine
    else:
        print("No pitch data was extracted.")
        return pd.DataFrame()

def get_pitch_movement_process(input_folder, threshold=10, output_csv="/content/pitch_movement_data.csv"):
    """
    Processes all .wav files in a folder, extracts pitch features, and saves them to a CSV file.
    - threshold: Threshold for pitch stylization
    - output_csv: Output file name for the processed data
    """
    pitch_movement_data = []

    for file_name in os.listdir(input_folder):
        if file_name.endswith(".wav"):
            audio_file = os.path.join(input_folder, file_name)
            try:
                utterance_movement = extract_pitch_features_with_stylization_and_distance(audio_file, threshold)

                if not utterance_movement:
                    print(f"No pitch data for {file_name}, skipping.")
                    continue

                for entry in utterance_movement:
                    entry['file_name'] = file_name

                pitch_movement_data.extend(utterance_movement)

                print(f"Processed {file_name}.")
            except Exception as e:
                print(f"Error processing {file_name}: {e}")

    df_movement = pd.DataFrame(pitch_movement_data)

    if not df_movement.empty:
        df_movement.to_csv(output_csv, index=False)
        print(f"Saved pitch movement data to {output_csv}")
        return df_movement
    else:
        print("No valid data to save.")
        return pd.DataFrame()

def get_pitch_stylized_data(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for file_name in os.listdir(input_folder):
        if file_name.endswith(".wav"):
            sound_file = os.path.join(input_folder, file_name)

            base_name = os.path.splitext(file_name)[0]
            output_plot = os.path.join(output_folder, f"{base_name}_plot.png")
            output_csv = os.path.join(output_folder, f"{base_name}_pitch.csv")

            try:
                stylized_time, stylized_pitch = stylize_pitch_tier_from_sound(sound_file, frequency_resolution=2.0)
                slopes = calculate_slopes(stylized_time, stylized_pitch)
                df_t_s_slope = save_stylized_pitch(stylized_time, stylized_pitch, slopes, output_csv)
                print(f"Processed {file_name}: Plot saved to {output_plot}, CSV saved to {output_csv}.")
            except Exception as e:
                print(f"Failed to process {file_name}: {e}")

    return df_t_s_slope

In [None]:
def visualize_pitch_values(pitch_values, time_stamps, title="Pitch Values"):
    """Plot pitch values for visualization."""
    plt.figure(figsize=(10, 5))
    plt.plot(time_stamps, pitch_values, label='Pitch Curve')
    plt.title(title)
    plt.xlabel('Time (s)')
    plt.ylabel('Pitch (Hz or Normalized)')
    plt.ylim(50, 500)  # 음역대와 일치시킴
    plt.legend()
    plt.grid(True)
    plt.show()

def save_to_csv(data, output_file):
    """Save all extracted pitch data to a single CSV file."""
    df = pd.DataFrame(data)
    df.to_csv(output_file, index=False)

In [None]:
def entry_point() :
    # Data 1: threshold, F0 values
    df_pitch_countour = get_pitch_contour_threshold_process(input_folder, output_contour, threshold=10)

    # Data 2: pitch movement slope
    df_pitch_movement = get_pitch_movement_process(input_folder, threshold=10, output_csv=output_movement)

    # Data 3: Stylized data
    df_pitch_slope_stylized = get_pitch_stylized_data(input_folder, output_stylization_folder)

    return df_pitch_countour, df_pitch_movement, df_pitch_slope_stylized

In [None]:
if __name__ == "__main__":
    entry_point()

No valid pitch values found in /content/drive/MyDrive/AIFFELthon/Data/Sample/literature_100/sample/S0171-10-06-09.wav
All features saved to /content/pitch_contour_data.csv.
Processed S0171-09-11-05.wav.
Processed S0171-09-11-08.wav.
Processed S0171-09-11-07.wav.
Processed S0171-09-11-10.wav.
Processed S0171-09-11-09.wav.
Processed S0171-09-11-04.wav.
Processed S0171-09-11-03.wav.
Processed S0171-09-11-01.wav.
Processed S0171-09-11-06.wav.
Processed S0171-09-11-02.wav.
Processed S0171-09-11-00.wav.
Processed S0171-10-14-03.wav.
Processed S0171-10-14-06.wav.
Processed S0171-10-14-00.wav.
Processed S0171-10-14-02.wav.
Processed S0171-10-14-08.wav.
Processed S0171-10-14-07.wav.
Processed S0171-10-14-01.wav.
Processed S0171-10-14-04.wav.
Processed S0171-10-14-09.wav.
Processed S0171-10-14-05.wav.
Processed S0171-10-06-01.wav.
Processed S0171-10-06-03.wav.
Processed S0171-10-06-00.wav.
Processed S0171-10-06-05.wav.
Processed S0171-10-06-02.wav.
Processed S0171-10-06-04.wav.
Processed S0171-1

# 4. etc.

**Reference**
  - Jeahyuk Oh. (2014). *A Study of Methods of Standardization for Korean Intonation Curve*. 한국어학, 62, 395-420.
  - Jeahyuk Oh. (2024).*Improving the objectivity of intonation transcription*. 한말연구, 65(25), 1-20.