In [3]:
import random
import os
import joblib
import pandas as pd

# === CONFIG ===
bpm = 175.0
ppq = 480
seconds_per_beat = 60 / bpm
msec_per_tick = (60_000 / bpm) / ppq  # for micro-timing

# Load your trained pitch model
model = joblib.load("teto_model.joblib")

# Load whole-word exceptions
with open("Monosyllabic.txt", "r", encoding="utf-8") as f:
    whole_word_exceptions = set(
        line.strip().lower() for line in f if line.strip()
    )

vowels = {"a", "e", "i", "o", "u"}

def count_vowels(w):
    return sum(1 for c in w if c in vowels)

def split_into_syllables(word):
    word_l = word.lower()
    if word_l in whole_word_exceptions:
        return [word_l]
    # suffix list
    suffixes = {"tion","sion","cian","ding","ling","nion","terred",
                "vous","toire","cent","cause","cient","stand",
                "fone","phone","nia","tched","ted","kov","phane","quence", "one", "thing", "mson"}
    # detect suffix
    suffix = None
    stem = word_l
    for sfx in suffixes:
        if word_l.endswith(sfx):
            suffix = sfx
            stem = word_l[:-len(sfx)]
            break
    # split stem
    if count_vowels(stem) <= 1:
        parts = [stem]
    else:
        parts = []
        cur = ""
        for c in stem:
            cur += c
            if c in vowels:
                parts.append(cur)
                cur = ""
        if cur:
            parts.append(cur)
    if suffix:
        parts.append(suffix)
    return parts

# 1) Read user text
text = input("Write something for Teto to say: ")
words = text.strip().split()

# 2) Precompute totals for contours
syllable_list = [split_into_syllables(w) for w in words]
num_syllables = sum(len(s) for s in syllable_list)

# 3) Prepare generation
notes = []
syllable_counter = 0
prev_pitch = None
cumulative_offset = 0

# 4) Optional breath at start
notes.append({
    "lyric":"br","note":48,"beats":0.25,
    "intensity":60,"modulation":0,
    "pre":0,"overlap":0,
    "envelope":"5,20,60,30","micro_t":0
})

# 5) Random base-note between C4–Bb4
base_note = random.choice([60,62,63,65,67,68,70])

# 6) Build each syllable
for word_sylls in syllable_list:
    for idx_in_word, syl in enumerate(word_sylls):
        # duration vowel vs consonant
        dur = random.uniform(0.75,1.0) if syl[-1] in vowels else random.uniform(0.45,0.65)

        # model prediction
        df = pd.DataFrame({"syllable":[syl],"position":[idx_in_word]})
        base_p = model.predict(df)[0]

        # jitter ±0.3
        jitter = random.uniform(-0.3,0.3)

        # phrase contour: arch shape
        pos = syllable_counter / max(1,num_syllables-1)
        contour = -4*(pos-0.5)**2 + 1  # from -1 to +1

        # combine
        pitch = base_p + jitter + contour
        pitch = round(pitch)

        # offset around base_note
        pitch = pitch + (base_note - 64)

        # small step every 3 syllables
        if syllable_counter % 3 == 0:
            step = random.choice([-1,0,1])
        else:
            step = 0
        cumulative_offset += step
        pitch += cumulative_offset

        # smooth with previous
        if prev_pitch is not None:
            pitch = round(0.7*prev_pitch + 0.3*pitch)
        prev_pitch = pitch

        # occasional human pitch slip 5%
        if random.random()<0.05:
            pitch += random.choice([-1,1])

        # intensity cresc/decresc
        intensity = 80 + int((syllable_counter/num_syllables)*40)

        # vibrato modulation 5–15
        modulation = random.randint(5,15)

        # PreUtterance by consonant type
        first = syl[0]
        if first in ("t","k","p","d","g","b"):
            pre=80
        elif first in ("s","f","h","l","r"):
            pre=60
        else:
            pre=40
        # overlap coarticulation
        overlap = random.randint(15,30)

        # dynamic envelope ADSR
        if intensity>100:
            env="0,30,70,40"
        else:
            env="40,40,50,60"

        # micro-timing shift ±30ms
        ideal = int(dur*ppq)
        shift_ms = random.uniform(-30,30)
        shift_ticks = round(shift_ms/msec_per_tick)
        ticks = max(1, ideal+shift_ticks)

        notes.append({
            "lyric":syl,
            "note":int(pitch),
            "beats":dur,
            "intensity":intensity,
            "modulation":modulation,
            "pre":pre,
            "overlap":overlap,
            "envelope":env,
            "micro_t":ticks
        })

        syllable_counter += 1

    # rest
    # generate micro-timing likewise
    ideal = round(0.125*ppq)
    shift_ms = random.uniform(-30,30)
    shift_ticks = round(shift_ms/msec_per_tick)
    rest_ticks = max(1, ideal+shift_ticks)
    notes.append({
        "lyric":"R","note":60,"beats":0.125,
        "intensity":100,"modulation":0,
        "pre":0,"overlap":0,
        "envelope":"40,40,50,60",
        "micro_t":rest_ticks
    })

# 7) Write out UST
with open("output.ust","w",encoding="utf-8") as f:
    f.write("[#SETTING]\n")
    f.write(f"Tempo={bpm:.2f}\nTracks=1\nProjectName=Teto Ultimate\n\n")
    for i,n in enumerate(notes):
        f.write(f"[#{str(i).zfill(4)}]\n")
        f.write(f"Length={n['micro_t']}\n")
        f.write(f"Lyric={n['lyric']}\n")
        f.write(f"NoteNum={n['note']}\n")
        f.write(f"PreUtterance={n['pre']}\n")
        f.write(f"VoiceOverlap={n['overlap']}\n")
        f.write(f"Intensity={n['intensity']}\n")
        f.write(f"Modulation={n['modulation']}\n")
        f.write(f"Envelope={n['envelope']}\n")
        f.write("StartPoint=0\n\n")
    f.write("[#TRACKEND]\n")

print("✅ UST created: ", os.path.abspath("output.ust"))


Write something for Teto to say:  “Amidst the mists and coldest frosts, with stoutest wrists and loudest boasts, he thrusts his fists against the posts and still insists he sees the ghosts”. 


✅ UST created:  /ztank/scratch/user/u.ng315068/output.ust
