In [None]:
!apt update
!apt install -y ffmpeg


In [None]:
!mkdir -p wavs

In [None]:
!mkdir -p wavs_1

In [None]:
import xml.etree.ElementTree as ET
import os
import subprocess

os.chdir("/content")

SMIL_PATH  = "/content/book.smil"
AUDIO_PATH = "/content/fulll.wav" # Corrected filename
OUT_DIR    = "/content/wavs"

os.makedirs(OUT_DIR, exist_ok=True)

def parse_time(t):
    t = t.replace(',', '.').replace('s', '').strip()
    parts = t.split(':')

    if len(parts) == 1:
        return float(parts[0])
    elif len(parts) == 2:
        m = int(parts[0])
        s = float(parts[1])
        return m * 60 + s
    elif len(parts) == 3:
        h = int(parts[0])
        m = int(parts[1])
        s = float(parts[2])
        return h * 3600 + m * 60 + s
    else:
        raise ValueError(f"Invalid time format: {t}")

tree = ET.parse(SMIL_PATH)
root = tree.getroot()

NS = {"smil": "http://www.w3.org/2001/SMIL20/"}

index = 1

for par in root.findall(".//smil:par", NS):
    par_id = par.attrib.get("id", "UNKNOWN")
    audio = par.find("smil:audio", NS)

    if audio is None:
        print(f"Skipping {par_id}: No audio element found.")
        continue

    try:
        start = parse_time(audio.attrib["clipBegin"])
        end   = parse_time(audio.attrib["clipEnd"])
        duration = end - start
    except Exception as e:
        print(f"‚ùå Error parsing time for {par_id}: {e}")
        continue

    out_path = f"{OUT_DIR}/{index:06d}.wav"

    cmd = [
        "ffmpeg",
        "-y",
        "-ss", str(start),
        "-i", AUDIO_PATH,
        "-t", str(duration),
        "-ar", "22050",
        "-ac", "1",
        "-sample_fmt", "s16",
        out_path
    ]

    print(f"üéß C·∫Øt {par_id} | start={start}s | dur={duration}s")
    try:
        
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        
    except subprocess.CalledProcessError as e:
        print(f"‚ùå FFMPEG command failed for {par_id}:")
        print(f"    Command: {' '.join(e.cmd)}")
        print(f"    Return code: {e.returncode}")
        print(f"    STDOUT: {e.stdout.strip()}")
        print(f"    STDERR: {e.stderr.strip()}")
        print("-" * 50) 
        
        raise

    index += 1

In [None]:
import re
import csv

INPUT = "metadata.csv"
OUTPUT = "metadata_clean.csv"

def clean_text(text):
    text = re.sub(r'Ch∆∞∆°ng\s+\w+[:.]?', '', text, flags=re.IGNORECASE)

    text = text.replace('"', '')

    text = text.replace(' - ', ', ')

    text = re.sub(r'\s+', ' ', text)

    return text.strip()

rows = []
with open(INPUT, encoding="utf-8") as f:
    reader = csv.reader(f, delimiter='|')
    header = next(reader)
    rows.append(header)

    for wav, text in reader:
        rows.append([wav, clean_text(text)])

with open(OUTPUT, "w", encoding="utf-8", newline="") as f:
    writer = csv.writer(f, delimiter='|')
    writer.writerows(rows)

print("Saved to metadata_clean.csv")


In [None]:
import xml.etree.ElementTree as ET

TEXT_XML = "/content/main.xml"

tree = ET.parse(TEXT_XML)
root = tree.getroot()

sent_text = {}

for elem in root.iter():
    sid = elem.attrib.get("id")
    if sid and sid.startswith("sent_"):
        text = "".join(elem.itertext()).strip()
        if text:
            sent_text[sid] = text

print("Loaded", len(sent_text), "sentences from main.xml")


In [None]:
import xml.etree.ElementTree as ET
import csv

SMIL_PATH = "/content/book.smil"
TEXT_XML  = "/content/main.xml"
OUT_CSV   = "/content/metadata.csv"


tree = ET.parse(TEXT_XML)
root = tree.getroot()

sent_text = {}

for elem in root.iter():
    sid = elem.attrib.get("id")
    if sid and sid.startswith("sent_"):
        text = "".join(elem.itertext()).strip()
        if text:
            sent_text[sid] = text

print(f"Loaded {len(sent_text)} sentences from main.xml")

NS = {"smil": "http://www.w3.org/2001/SMIL20/"}

tree_smil = ET.parse(SMIL_PATH)
root_smil = tree_smil.getroot()

rows = []
index = 1

for par in root_smil.findall(".//smil:par", NS):
    par_id = par.attrib.get("id")

    if par_id not in sent_text:
        continue

    wav_name = f"{index:06d}.wav"
    rows.append((f"wavs/{wav_name}", sent_text[par_id]))
    index += 1


with open(OUT_CSV, "w", encoding="utf-8", newline="") as f:
    writer = csv.writer(f, delimiter="|")
    writer.writerow(["wav_filename", "text"])
    writer.writerows(rows)

print(f"‚úî metadata.csv generated with {len(rows)} entries")


In [None]:
import csv

with open("/content/meta1.csv", encoding="utf-8") as f:
    reader = csv.reader(f, delimiter="|")
    for i, row in enumerate(reader):
        print(row)
        if i == 5:
            break