<a href="https://colab.research.google.com/github/Eratofee/WaterMusicGeneration/blob/main/WaterMusicGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install mido
!pip install midiutil

In [None]:
import numpy as np
import pandas as pd
from IPython.display import Audio

In [None]:
file_path = "DataSet_20230217-20231231.csv"
df = pd.read_csv(file_path)

# Datum in echtes Zeitformat umwandeln
df["date"] = pd.to_datetime(df["date"], dayfirst=True)

df.head()
print(df.columns)
# df

In [None]:
station = "P343"

data = df[["date", station]].dropna()
#data

In [None]:
station = "GN-LRu"

data_power = df[["date", station]].dropna()
# data_power

In [None]:
import numpy as np

min_val = data[station].min()
max_val = data[station].max()

data["pitch"] = np.interp(
    data[station],
    (min_val, max_val),
    (48, 72)
)

In [None]:
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt

# If "date" is a column, use it as x-axis
if "date" in df.columns:
    x = df["date"]
    y_df = df.drop(columns=["date"])
else:
    x = df.index
    y_df = df

plt.figure(figsize=(14, 6))

# Alternative to y_df:
y_df_smooth = y_df.rolling(100, min_periods=1).mean()

for col in y_df_smooth.columns:
    plt.plot(x, y_df_smooth[col], label=col, alpha=0.6)

plt.xlabel("Date")
plt.ylabel("Water Level")
plt.title("Overview of All Stations")
plt.grid(True)


# Put legend outside for readability
plt.legend(loc="upper left", bbox_to_anchor=(1.02, 1.0))
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

# --- 0) Ensure df is in the expected shape: time column or time index ---
df2 = df.copy()

# If time is in a column called "date", use it; otherwise use index as time
if "date" in df2.columns:
    df2["date"] = pd.to_datetime(df2["date"], dayfirst=True, errors="coerce")
    df2 = df2.sort_values("date")
    time_col = "date"
else:
    # assume index is datetime-like
    df2 = df2.copy()
    df2.index = pd.to_datetime(df2.index, errors="coerce")
    df2 = df2.sort_index()
    df2 = df2.reset_index().rename(columns={"index": "date"})
    time_col = "date"

# --- 1) Station -> Instrument mapping (GM program numbers 1..128) ---
# GM reference (common):
# Violin 41, Clarinet 72, Cello 43, Trombone 58, Contrabass 44,
# Bassoon 71, Church Organ 20, Trumpet 57, Oboe 69, Synth Pad 1 89
mapping = pd.DataFrame([
    {"station": "P343",  "instrument_name": "Violin",        "midi_program_gm_1_128": 41},
    {"station": "P33",   "instrument_name": "Clarinet",      "midi_program_gm_1_128": 72},
    {"station": "P501",  "instrument_name": "Cello",         "midi_program_gm_1_128": 43},
    {"station": "P37",   "instrument_name": "Trombone",      "midi_program_gm_1_128": 58},
    {"station": "P349",  "instrument_name": "Contrabass",    "midi_program_gm_1_128": 44},
    {"station": "P38",   "instrument_name": "Bassoon",       "midi_program_gm_1_128": 71},
    {"station": "P9",    "instrument_name": "Church Organ",  "midi_program_gm_1_128": 20},
    {"station": "P9",    "instrument_name": "Trumpet",       "midi_program_gm_1_128": 57},  # duplicate station on purpose
    {"station": "P618",  "instrument_name": "Oboe",          "midi_program_gm_1_128": 69},
    {"station": "GN-LRu","instrument_name": "Synth Pad 1",   "midi_program_gm_1_128": 89},
])

# --- 2) Build long/tidy df: time, station, value ---
# Keep only stations that appear in mapping
stations = mapping["station"].unique().tolist()
available = [c for c in stations if c in df2.columns]

long_df = df2[[time_col] + available].melt(
    id_vars=[time_col],
    var_name="station",
    value_name="value"
).dropna(subset=["value"])

# --- 3) Attach instrument info (duplicates on P9 will duplicate rows intentionally) ---
assigned_df = long_df.merge(mapping, on="station", how="left")

# --- 4) Rename time column to exactly 'time' as requested, and order columns ---
assigned_df = assigned_df.rename(columns={time_col: "time"})
assigned_df = assigned_df[["time", "station", "value", "instrument_name", "midi_program_gm_1_128"]]

assigned_df.head(10), assigned_df.shape


In [None]:
# ---------- helpers: note name <-> MIDI ----------
_NOTE2SEMI = {"C":0,"C#":1,"Db":1,"D":2,"D#":3,"Eb":3,"E":4,"F":5,"F#":6,"Gb":6,"G":7,"G#":8,"Ab":8,"A":9,"A#":10,"Bb":10,"B":11}
def note_to_midi(note: str) -> int:
    # e.g. "G3", "Bb1", "F#5"
    note = note.strip()
    if len(note) < 2:
        raise ValueError(f"Bad note: {note}")
    # split pitch class and octave
    if note[1] in ["b", "#"]:
        pc = note[:2]
        octv = int(note[2:])
    else:
        pc = note[:1]
        octv = int(note[1:])
    return 12*(octv + 1) + _NOTE2SEMI[pc]  # MIDI: C4=60

def build_dorian_scale(root_midi: int) -> np.ndarray:
    # Dorian intervals: 0,2,3,5,7,9,10 (relative to root)
    return np.array([root_midi + i for i in [0,2,3,5,7,9,10]], dtype=int)

def build_scale_in_range(root_note: str = "D3", low: int = 48, high: int = 72) -> np.ndarray:
    # Generate D-dorian pitches between [low, high] inclusive.
    root = note_to_midi(root_note)
    pitches = []
    # sweep octaves around the target range
    for o in range(-6, 10):
        pitches.extend(build_dorian_scale(root + 12*o).tolist())
    pitches = np.array(sorted(set(pitches)))
    pitches = pitches[(pitches >= low) & (pitches <= high)]
    if len(pitches) < 2:
        raise ValueError(f"Scale too small in range low={low}, high={high}.")
    return pitches

def quantize_values_to_scale(values: np.ndarray, scale: np.ndarray) -> np.ndarray:
    """Map min(values)->scale[0], max(values)->scale[-1], linearly then quantize to nearest scale index."""
    v = np.asarray(values, dtype=float)
    vmin = np.nanmin(v)
    vmax = np.nanmax(v)
    if not np.isfinite(vmin) or not np.isfinite(vmax) or abs(vmax - vmin) < 1e-12:
        # constant or invalid -> middle pitch
        return np.full(len(v), int(scale[len(scale)//2]), dtype=int)
    t = (v - vmin) / (vmax - vmin)
    idx = np.rint(t * (len(scale)-1)).astype(int)
    idx = np.clip(idx, 0, len(scale)-1)
    return scale[idx].astype(int)

# ---------- instrument config ----------
# GM program numbers are 1..128
INSTRUMENTS = [
    {"station":"P343",  "instrument_name":"Violin",       "gm_program":41, "low":"G3",  "high":"E7"},
    {"station":"P33",   "instrument_name":"Clarinet",     "gm_program":72, "low":"E3",  "high":"C7"},
    {"station":"P501",  "instrument_name":"Cello",        "gm_program":43, "low":"C2",  "high":"A5"},  # praxisnah
    {"station":"P37",   "instrument_name":"Trombone",     "gm_program":58, "low":"E2",  "high":"F5"},
    {"station":"P349",  "instrument_name":"Contrabass",   "gm_program":44, "low":"E1",  "high":"C5"},
    {"station":"P38",   "instrument_name":"Bassoon",      "gm_program":71, "low":"Bb1", "high":"Eb5"},
    {"station":"P9",    "instrument_name":"Church Organ", "gm_program":20, "low":"C2",  "high":"C7"},
    {"station":"P9",    "instrument_name":"Trumpet",      "gm_program":57, "low":"F#3", "high":"E6"},
    {"station":"P618",  "instrument_name":"Oboe",         "gm_program":69, "low":"Bb3", "high":"A6"},
    {"station":"GN-LRu","instrument_name":"Synth Pad 1",  "gm_program":89, "low":"C2",  "high":"C7"},
]

def map_data_to_midi(df: pd.DataFrame,
                     instruments=INSTRUMENTS,
                     time_col: str = "date",
                     scale_root: str = "D3") -> pd.DataFrame:
    """
    Returns tidy DF with columns:
    time, station, instrument_name, gm_program, value, midi_pitch
    """
    d = df.copy()
    if time_col in d.columns:
        d[time_col] = pd.to_datetime(d[time_col], dayfirst=True, errors="coerce")
        d = d.sort_values(time_col)
    else:
        # assume index is time
        d = d.copy()
        d.index = pd.to_datetime(d.index, errors="coerce")
        d = d.sort_index()
        d = d.reset_index().rename(columns={"index": time_col})

    out_rows = []
    for inst in instruments:
        st = inst["station"]
        if st not in d.columns:
            continue

        series = d[[time_col, st]].rename(columns={st:"value"}).copy()
        series["value"] = pd.to_numeric(series["value"], errors="coerce")
        # fill missing values (needed to define min/max robustly across the piece)
        series["value"] = series["value"].interpolate(limit_direction="both")
        series = series.dropna(subset=["value"])

        low_m = note_to_midi(inst["low"])
        high_m = note_to_midi(inst["high"])
        scale = build_scale_in_range(root_note=scale_root, low=low_m, high=high_m)

        midi_pitch = quantize_values_to_scale(series["value"].to_numpy(), scale)

        tmp = pd.DataFrame({
            "time": series[time_col].to_numpy(),
            "station": st,
            "instrument_name": inst["instrument_name"],
            "gm_program": inst["gm_program"],
            "value": series["value"].to_numpy(),
            "midi_pitch": midi_pitch
        })
        out_rows.append(tmp)

    mapped = pd.concat(out_rows, ignore_index=True)
    # track label for notation tools
    mapped["track_name"] = mapped["instrument_name"] + " (" + mapped["station"] + ")"
    return mapped

# ---------- rhythm + MIDI rendering ----------
def render_rhythmic_midi(mapped_df: pd.DataFrame,
                         bars: int = 64,
                         tempo_bpm: int = 80,
                         time_signature=(4,4),
                         quarter_weight: float = 0.55,
                         seed: int = 7,
                         ticks_per_beat: int = 480,
                         outfile: str = "water_piece_64bars.mid"):
    """
    Creates a GM MIDI file with named tracks. Rhythm grid is 1/8 notes, durations allowed:
    eighth (0.5), quarter (1), half (2), whole (4) beats.
    Highest probability for quarter note by default.
    """
    # lazy import / install hint
    try:
        import mido
        from mido import Message, MetaMessage
    except Exception as e:
        raise ImportError("Please install mido first (in Colab: !pip -q install mido).") from e

    rng = np.random.default_rng(seed)
    beats_per_bar = time_signature[0] * (4 / time_signature[1])
    total_beats = int(round(bars * beats_per_bar))

    # Base grid: eighth notes => 2 steps per beat
    steps_per_beat = 2
    total_steps = total_beats * steps_per_beat  # each step = 1/8
    step_len_beats = 1 / steps_per_beat

    # Allowed durations in steps (1=8th, 2=quarter, 4=half, 8=whole)
    dur_steps = np.array([1, 2, 4, 8], dtype=int)

    # Weights: quarter highest by requirement
    # You can tweak these weights if you want “more long notes”.
    # quarter_weight controls the quarter note dominance.
    # Distribute remaining probability over other values:
    rem = 1.0 - quarter_weight
    weights = np.array([rem*0.25, quarter_weight, rem*0.40, rem*0.35], dtype=float)
    weights = weights / weights.sum()

    # Resample each track's pitch stream to exactly total_steps points
    tracks = []
    for track_name, g in mapped_df.groupby("track_name", sort=False):
        g = g.sort_values("time")
        pitches = g["midi_pitch"].to_numpy().astype(int)

        # If data very long: compress via index-space interpolation (keeps global contour)
        if len(pitches) == 0:
            continue
        x_old = np.linspace(0, 1, num=len(pitches))
        x_new = np.linspace(0, 1, num=total_steps)
        pitches_rs = np.interp(x_new, x_old, pitches).round().astype(int)

        tracks.append({
            "track_name": track_name,
            "gm_program": int(g["gm_program"].iloc[0]),
            "pitches_steps": pitches_rs
        })

    # Helper: turn stepwise pitches into note events with allowed durations
    def steps_to_events(pitches_steps: np.ndarray):
        # run-length encode identical pitches
        events = []
        i = 0
        while i < len(pitches_steps):
            p = int(pitches_steps[i])
            j = i + 1
            while j < len(pitches_steps) and int(pitches_steps[j]) == p:
                j += 1
            run_len = j - i  # in steps

            # split this run into allowed dur_steps with probabilistic preference
            remaining = run_len
            while remaining > 0:
                possible = dur_steps[dur_steps <= remaining]
                w = weights[:len(possible)].copy()
                w = w / w.sum()
                d = int(rng.choice(possible, p=w))
                events.append((p, d))
                remaining -= d

            i = j
        return events

    # Build MIDI
    mid = mido.MidiFile(type=1, ticks_per_beat=ticks_per_beat)

    # Global meta track
    meta = mido.MidiTrack()
    mid.tracks.append(meta)
    meta.append(MetaMessage("track_name", name="Score", time=0))
    meta.append(MetaMessage("time_signature", numerator=time_signature[0], denominator=time_signature[1], time=0))
    meta.append(MetaMessage("set_tempo", tempo=mido.bpm2tempo(tempo_bpm), time=0))

    # One track per instrument
    for tr in tracks:
        t = mido.MidiTrack()
        mid.tracks.append(t)
        t.append(MetaMessage("track_name", name=tr["track_name"], time=0))
        # GM program change: mido uses 0..127 internally
        t.append(Message("program_change", program=tr["gm_program"]-1, time=0, channel=0))

        events = steps_to_events(tr["pitches_steps"])

        # Write note events with delta-times in ticks
        # Each step is an eighth note => ticks = ticks_per_beat/2
        step_ticks = int(ticks_per_beat / steps_per_beat)
        velocity = 90

        for pitch, d_steps in events:
            dur_ticks = d_steps * step_ticks
            t.append(Message("note_on", note=int(pitch), velocity=velocity, time=0, channel=0))
            t.append(Message("note_off", note=int(pitch), velocity=0, time=dur_ticks, channel=0))

    mid.save(outfile)
    return outfile


In [None]:
# --------- Example usage ----------
mapped = map_data_to_midi(df, scale_root="D3")
midi_path = render_rhythmic_midi(mapped, bars=64, tempo_bpm=80, time_signature=(4,4))
print("Wrote:", midi_path)

In [None]:
import numpy as np
import pandas as pd
from scipy.io import wavfile

FILE = "DataSet_20230217-20231231.csv"

df = pd.read_csv(FILE)
df["date"] = pd.to_datetime(df["date"], dayfirst=True)
df = df.sort_values("date").set_index("date")

stations = ["P343","P349","P618","P619","P501","P33","P37","P38","P9"]



In [None]:
station = "P343"

# Tagesmittel (du kannst "3H", "6H", "12H", "1D", "7D" probieren)
x = df[station].resample("1D").mean().dropna()


In [None]:
def midi_to_hz(midi):
    return 440.0 * (2.0 ** ((midi - 69) / 12.0))

# C natural minor in MIDI: C D Eb F G Ab Bb
scale_degrees = np.array([0, 2, 3, 5, 7, 8, 10])

# Grundton C3 = 48
root = 48
octaves = 2

scale_midi = np.concatenate([root + 12*o + scale_degrees for o in range(octaves)])
# optional: ein paar Töne höher ergänzen
scale_midi = np.sort(scale_midi)
scale_midi[:10], len(scale_midi)


In [None]:
def quantize_to_scale(values, scale_midi):
    v = np.asarray(values, dtype=float)
    vmin, vmax = np.nanmin(v), np.nanmax(v)
    # Normalisieren 0..1
    t = (v - vmin) / (vmax - vmin + 1e-12)
    # Auf Skala indexen
    idx = np.clip(np.round(t * (len(scale_midi)-1)).astype(int), 0, len(scale_midi)-1)
    return scale_midi[idx]

melody_midi = quantize_to_scale(x.values, scale_midi)
melody_hz = midi_to_hz(melody_midi)

In [None]:
def adsr_env(n, sr, a=0.02, d=0.08, s=0.7, r=0.10):
    # a,d,r in Sekunden, s als Sustain-Level
    aN = int(a*sr); dN = int(d*sr); rN = int(r*sr)
    if aN + dN + rN >= n:
        # falls Note sehr kurz ist
        env = np.linspace(0, 1, n)
        return env
    sustainN = n - (aN + dN + rN)
    A = np.linspace(0, 1, aN, endpoint=False)
    D = np.linspace(1, s, dN, endpoint=False)
    S = np.full(sustainN, s)
    R = np.linspace(s, 0, rN, endpoint=True)
    return np.concatenate([A, D, S, R])

def render_melody(freqs_hz, sr=44100, note_duration=0.18, amp=0.2):
    noteN = int(note_duration * sr)
    y = np.zeros(noteN * len(freqs_hz), dtype=np.float32)
    t = np.arange(noteN) / sr
    env = adsr_env(noteN, sr)
    for i, f in enumerate(freqs_hz):
        # Kompositorische Stellschraube: Obertöne / Charakter
        sig = np.sin(2*np.pi*f*t)
        # optional: leichte 2. Partial für „Körper“
        sig += 0.25*np.sin(2*np.pi*2*f*t)
        sig *= env
        y[i*noteN:(i+1)*noteN] = amp * sig
    # Normalisieren
    mx = np.max(np.abs(y)) + 1e-12
    return (y / mx).astype(np.float32)

In [None]:
#note_duration=0.20
audio = render_melody(melody_hz, note_duration=0.02, amp=0.9)

out = "water_P343_daily_cminor.wav"
wavfile.write(out, 44100, (audio * 32767).astype(np.int16))
out

In [None]:
from scipy.io import wavfile

def render_continuous(freqs_hz, sr=44100, note_duration=0.20, amp=0.2, glide_ms=20):
    """
    freqs_hz: eine Frequenz pro Note-Frame
    Wir erzeugen daraus eine sample-genaue Frequenzkurve + optional Glide (Lowpass).
    """
    noteN = int(note_duration * sr)
    N = noteN * len(freqs_hz)

    # Frequenz pro Sample (stückweise konstant)
    f = np.repeat(freqs_hz.astype(float), noteN)

    # Glide als 1. Ordnung Lowpass auf Frequenz (Portamento)
    glideN = max(1, int((glide_ms/1000.0)*sr))
    alpha = 1.0 / glideN
    f_s = np.empty_like(f)
    f_s[0] = f[0]
    for n in range(1, len(f)):
        f_s[n] = f_s[n-1] + alpha*(f[n] - f_s[n-1])

    # Phase-kontinuierlich integrieren
    phase = 2*np.pi * np.cumsum(f_s) / sr
    y = np.sin(phase)

    # Sehr leichte Amplitudenhüllkurve pro Note (gegen Residual-Clicks)
    env = np.ones(N, dtype=float)
    fadeN = int(0.01*sr)  # 10 ms fade
    if fadeN > 0:
        w = np.linspace(0, 1, fadeN)
        for i in range(len(freqs_hz)):
            start = i*noteN
            end = start + noteN
            # Fade in/out innerhalb jeder Note
            env[start:start+fadeN] *= w
            env[end-fadeN:end] *= w[::-1]

    y = amp * y * env
    y = y / (np.max(np.abs(y)) + 1e-12)
    return y.astype(np.float32)

freqs_hz = midi_to_hz(melody_midi)
audio = render_continuous(freqs_hz, note_duration=0.20, amp=0.9, glide_ms=25)
wavfile.write("water_smooth.wav", 44100, (audio*32767).astype(np.int16))

In [None]:
Audio(audio, rate=44100)

In [None]:
station = "P343"

# Resample to daily mean and interpolate missing values
x = df[station].resample("1D").mean()
x = x.interpolate(limit_direction="both")

# Take the first 50 data points
x50 = x.iloc[:50]

# Plot
plt.figure(figsize=(10, 4))
plt.plot(x50.index, x50.values, marker="o")
plt.title(f"Water Level at Station {station} – First 50 Days")
plt.xlabel("Date")
plt.ylabel("Water Level")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# --- Load df if it doesn't exist yet (adjust FILE if needed) ---
try:
    df
except NameError:
    FILE = "DataSet_20230217-20231231.csv"
    df = pd.read_csv(FILE)
    df["date"] = pd.to_datetime(df["date"], dayfirst=True)
    df = df.sort_values("date").set_index("date")

# --- Choose station and convert its water-level data into quarter-note MIDI notes in D Dorian ---
station = "P343"          # change to: "P37", "P349", ...
tempo_bpm = 120           # only relevant if you later write a MIDI file
quarter_note_per_point = True

# 1) Pick a musically reasonable time grid: one data point = one quarter note
#    Your raw data is ~3-hourly; that's fine. If you want fewer notes, resample to "1D" or "6H".
x = df[station].copy()
x = x.interpolate(limit_direction="both").dropna()  # fill gaps for mapping

# Optional smoothing (often helps musically; can be turned off)
x = x.rolling(window=5, center=True, min_periods=1).mean()

values = x.values.astype(float)

# 2) Define D Dorian scale in MIDI (D E F G A B C)
#    Semitone offsets from D: 0,2,3,5,7,9,10
d_dorian_degrees = np.array([0, 2, 3, 5, 7, 9, 10])

# Choose register: D3 = MIDI 50, D4 = 62, D2 = 38, etc.
root_midi = 50            # D3
octaves = 2               # span 2 octaves of D Dorian

scale_midi = np.concatenate([root_midi + 12*o + d_dorian_degrees for o in range(octaves)])
scale_midi = np.sort(scale_midi)

# 3) Map data values -> scale degrees (quantized)
lo, hi = np.quantile(values, [0.02, 0.98])          # robust range to avoid outliers dominating
values_clip = np.clip(values, lo, hi)

t = (values_clip - values_clip.min()) / (values_clip.max() - values_clip.min() + 1e-12)  # 0..1
idx = np.clip(np.round(t * (len(scale_midi) - 1)).astype(int), 0, len(scale_midi) - 1)

midi_notes = scale_midi[idx].astype(int)

# 4) Result: one MIDI note per data point = one quarter note
print(f"Station: {station}")
print(f"Data points (quarters): {len(midi_notes)}")
print("First 50 MIDI notes:", midi_notes[:50].tolist())
print("First 10 with timestamps:")
for ts, n in list(zip(x.index, midi_notes))[:10]:
    print(ts, "->", int(n))

# (Optional) If you want to export a .mid file and midiutil is available:
try:
    from midiutil import MIDIFile

    out_mid = f"{station}_D_dorian_quarters.mid"
    midi = MIDIFile(1)
    midi.addTempo(0, 0, tempo_bpm)

    time_beats = 0.0
    duration_beats = 1.0   # quarter note = 1 beat
    velocity = 90

    for n in midi_notes:
        midi.addNote(0, 0, int(n), time_beats, duration_beats, velocity)
        time_beats += duration_beats

    with open(out_mid, "wb") as f:
        midi.writeFile(f)

    print("\nWrote MIDI file:", out_mid)

except Exception as e:
    print("\n(MIDI export skipped) Reason:", repr(e))


In [None]:
# If df is not defined yet, load it
try:
    df
except NameError:
    FILE = "/mnt/data/DataSet_20230217-20231231.csv"
    df = pd.read_csv(FILE)
    df["date"] = pd.to_datetime(df["date"], dayfirst=True)
    df = df.sort_values("date").set_index("date")

station = "P343"  # change as needed

# --- 1) Use raw data points (one quarter note per data point) ---
x = df[station].copy()
x = x.interpolate(limit_direction="both").astype(float)

# Optional mild smoothing to reduce noise but keep detail
x = x.rolling(window=3, center=True, min_periods=1).mean()

values = x.values

# --- 2) Define D Dorian (D E F G A B C) in MIDI across several octaves ---
# Semitone offsets from D: 0,2,3,5,7,9,10
d_dorian_degrees = np.array([0, 2, 3, 5, 7, 9, 10])

root_midi = 50   # D3
octaves = 5      # increase for more pitch resolution (try 4..6)

scale_midi = np.concatenate([root_midi + 12*o + d_dorian_degrees for o in range(octaves)])
scale_midi = np.sort(scale_midi)

# --- 3) Rolling (local) normalization: makes small changes audible as pitch motion ---
# Window chosen in "number of samples". With ~3-hour data: 8 samples/day.
# Example: 7 days -> 56 samples. Try 3 days (24) up to 30 days (240).
win = 56

s = pd.Series(values)
roll_min = s.rolling(win, center=True, min_periods=1).min().to_numpy()
roll_max = s.rolling(win, center=True, min_periods=1).max().to_numpy()

# Avoid division by zero in flat windows
den = (roll_max - roll_min)
den = np.where(den < 1e-12, 1.0, den)

t = (values - roll_min) / den  # local 0..1
t = np.clip(t, 0.0, 1.0)

idx = np.round(t * (len(scale_midi) - 1)).astype(int)
midi_notes = scale_midi[idx].astype(int)

# --- 4) Dynamics (optional but recommended): map change-rate to velocity ---
dv = np.abs(np.diff(values, prepend=values[0]))
# Robust scaling for velocity
v95 = np.quantile(dv, 0.95) + 1e-12
vel = np.clip(dv / v95, 0, 1)
velocity = (30 + vel * 90).astype(int)  # MIDI velocity 30..120

print(f"Station: {station}")
print(f"Quarter notes (one per data point): {len(midi_notes)}")
print("First 50 MIDI notes:", midi_notes[:50].tolist())
print("First 50 velocities:", velocity[:50].tolist())
print("First 10 with timestamps:")
for ts, n, v in list(zip(x.index, midi_notes, velocity))[:10]:
    print(ts, "->", int(n), "vel", int(v))

# If you want a compact “score list” to compose from:
score = pd.DataFrame({"date": x.index, "midi": midi_notes, "velocity": velocity})
score.to_csv(f"{station}_D_dorian_quarters.csv", index=False)  # optional


In [None]:
# (Optional) If you want to export a .mid file and midiutil is available:
try:
    from midiutil import MIDIFile

    out_mid = f"{station}_D_dorian_quarters_more.mid"
    midi = MIDIFile(1)
    midi.addTempo(0, 0, tempo_bpm)

    time_beats = 0.0
    duration_beats = 1.0   # quarter note = 1 beat
    velocity = 90

    for n in midi_notes:
        midi.addNote(0, 0, int(n), time_beats, duration_beats, velocity)
        time_beats += duration_beats

    with open(out_mid, "wb") as f:
        midi.writeFile(f)

    print("\nWrote MIDI file:", out_mid)

except Exception as e:
    print("\n(MIDI export skipped) Reason:", repr(e))