# Inference

In [1]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForTextToWaveform, AutoProcessor
import torch
from IPython.display import Audio

device = torch.device("cuda:0" if torch.cuda.device_count()>0 else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


### Music like the 1975

In [2]:
# local output directory
local_model_path = "/mnt/f/SMC/CMC/musicgen-dreamboothing/the1975"

# Load config from local directory
config = PeftConfig.from_pretrained(local_model_path)

# Load the base model (musicgen-melody)
model = AutoModelForTextToWaveform.from_pretrained(
    config.base_model_name_or_path, 
    torch_dtype=torch.float16
)

# Load your fine-tuned LoRA weights from local directory
model = PeftModel.from_pretrained(model, local_model_path).to(device)

# Load the processor from the base model
processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)


  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.33s/it]


In [None]:
# Generate the 1975 fine-tuned music
inputs = processor(
    text=["slow melody music like the 1975"],
    padding=True,
    return_tensors="pt",
).to(device)
audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=1024)

Audio(audio_values.cpu().numpy().squeeze(), rate=32000)

### Music like Indian classical fusion

In [2]:
# local output directory
local_model_path = "/mnt/f/SMC/CMC/musicgen-dreamboothing/as-ch3"

# Load config from local directory
config = PeftConfig.from_pretrained(local_model_path)

# Load the base model (musicgen-melody)
model = AutoModelForTextToWaveform.from_pretrained(
    config.base_model_name_or_path, 
    torch_dtype=torch.float16
)

# Load your fine-tuned LoRA weights from local directory
model = PeftModel.from_pretrained(model, local_model_path).to(device)

# Load the processor from the base model
processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)

  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.38s/it]


In [31]:
# Generate the indian classical fusion fine-tuned music
inputs = processor(
    text=["Indian classical fusion music with violin playing melodies with different notes dynamic tempo shifts where it goes fast and slow and piano playing the chords"],
    padding=True,
    return_tensors="pt",
).to(device)
audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=1024)

Audio(audio_values.cpu().numpy().squeeze(), rate=32000)

In [26]:
import scipy.io.wavfile
import os
import numpy as np

audio_np = audio_values.cpu().numpy().squeeze().astype(np.float32)
# Save as WAV file
output_filename = "sitar_beautiful.wav"
output_path = os.path.join("generated_music", output_filename)
scipy.io.wavfile.write(output_path, rate=32000, data=audio_np)

## Indian classical fusion music training -2 

In [2]:
# local output directory
local_model_path = "/mnt/f/SMC/CMC/musicgen-dreamboothing/as-traes"

# Load config from local directory
config = PeftConfig.from_pretrained(local_model_path)

# Load the base model (musicgen-melody)
model = AutoModelForTextToWaveform.from_pretrained(
    config.base_model_name_or_path, 
    torch_dtype=torch.float16
)

# Load your fine-tuned LoRA weights from local directory
model = PeftModel.from_pretrained(model, local_model_path).to(device)

# Load the processor from the base model
processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)

  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]


In [None]:
# Generate the indian classical fusion fine-tuned music
inputs = processor(
    text=["Indian classical fusion music with acousticbassguitar playing melody with subtle tabla accompanying it."],
    padding=True,
    return_tensors="pt",
).to(device)
audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=1024)

Audio(audio_values.cpu().numpy().squeeze(), rate=32000)

In [22]:
import scipy.io.wavfile
import os
import numpy as np

audio_np = audio_values.cpu().numpy().squeeze().astype(np.float32)
# Save as WAV file
output_filename = "sitar_traditional_raga.wav"
output_path = os.path.join("generated_music", output_filename)
scipy.io.wavfile.write(output_path, rate=32000, data=audio_np)

In [None]:
import time

# Create a directory to save the generated audio files
output_dir = "generated_indian_fusion"
os.makedirs(output_dir, exist_ok=True)

# List of all your prompts with their names
prompts = [
    {
        "name": "01_solo_sitar_improvisation",
        "text": "Indian classical fusion | solo sitar in Raga Yaman | alap-jhala progression | organic studio warmth | mood: meditative, introspective | minimalist production."
    },
    {
        "name": "02_tabla_driven_fusion",
        "text": "Sitar melodies over syncopated teental tabla rhythms | ambient synth undertones | dynamic tempo shifts | mood: melancholic, restless | cross-cultural harmony."
    },
    {
        "name": "03_cinematic_orchestral_fusion",
        "text": "Sitar concerto with swelling cinematic strings | 16-beat tabla cycle | minimalist piano accents | mood: dramatic, yearning | organic-meets-studio production."
    },
    {
        "name": "04_ambient_electronic_raga",
        "text": "Sitar processed with reverb/delay | glitchy tabla breaks | ambient sub-bass drones | mood: haunting, meditative | raga-inspired motifs | modern studio textures."
    },
    {
        "name": "05_acoustic_folk_fusion",
        "text": "Sitar and acoustic guitar duet | 6/8 folk rhythm | earthy tanpura drone | mood: nostalgic, uplifting | organic production."
    },
    {
        "name": "06_collaborative_vocal_fusion",
        "text": "Wordless female vocals harmonizing with sitar | handpan rhythms | sparse sarangi layers | mood: nomadic, spiritual | cross-cultural harmony."
    },
    {
        "name": "07_microtonal_experiment",
        "text": "Sitar exploring 22-shruti microtones | free-time alap | dissonant prepared piano | mood: abstract, introspective | minimalist cross-cultural layers."
    },
    {
        "name": "08_rhythmic_ritual_fusion",
        "text": "Polyrhythmic tabla and dholak | sitar jhala strumming | fiery tempo shifts | mood: primal, ecstatic | organic raw production."
    },
    {
        "name": "09_neo_classical_fusion",
        "text": "Sitar with minimalist string quartet | cyclic teental rhythm | mood: bittersweet, cinematic | studio clarity."
    },
    {
        "name": "10_desert_blues_fusion",
        "text": "Sitar meets slide guitar bendir drum | 5/4 polymeter | desert wind field recordings | mood: sweltering, trance-like | organic-meets-electronic."
    }
]


# Generate audio for each prompt
for i, prompt in enumerate(prompts):
    print(f"\n[{i+1}/10] Generating: {prompt['name']}")
    print(f"Prompt: {prompt['text']}")
    
    # Process the text prompt
    inputs = processor(
        text=[prompt['text']],
        padding=True,
        return_tensors="pt",
    ).to(device)
    
    # Generate audio
    start_time = time.time()
    audio_values = model.generate(
        **inputs, 
        do_sample=True,
        guidance_scale=3,
        max_new_tokens=1024
    )
    generation_time = time.time() - start_time
    
    # Convert to numpy array and cast to float32 (this is the key fix)
    audio_np = audio_values.cpu().numpy().squeeze().astype(np.float32)
    
    # Save as WAV file
    output_path = os.path.join(output_dir, f"{prompt['name']}.wav")
    scipy.io.wavfile.write(output_path, rate=32000, data=audio_np)
    
    # Play the audio (in notebook)
    display(Audio(audio_np, rate=32000))
    
    print(f"✓ Saved to {output_path}")
    print(f"  Generation time: {generation_time:.2f} seconds")
    
    # Optional: clear GPU memory between generations
    torch.cuda.empty_cache()
    
    # Optional: add a small delay between generations
    time.sleep(1)

print("\n✓ Generation complete! All files saved to:", output_dir)

### NLP Prompts

In [None]:

# Create a directory to save the generated audio files
output_dir = "generated_music/indian_fusion_set3"
os.makedirs(output_dir, exist_ok=True)

# List of all your new prompts with their names
prompts = [
    {
        "name": "01_core_fusion_elements",
        "text": "Indian classical fusion blending sitar-driven melodies and tabla rhythms, layered with minimalist cross-cultural textures. Introspective and meditative mood with subtle electronic production and organic raga improvisation."
    },
    {
        "name": "02_orchestral_melancholic",
        "text": "Contemporary orchestral textures merging with sitar-driven melodies and tabla grooves, set against a melancholic and reflective backdrop. Features dynamic tempo shifts and studio-polished production."
    },
    {
        "name": "03_minimalist_meditative",
        "text": "Minimalist Indian classical fusion with slow, meditative sitar lines and subtle raga improvisation. Sparse tabla rhythms and electronic textures create a serene, organic-meets-studio atmosphere."
    },
    {
        "name": "04_bittersweet_evolving",
        "text": "Sitar-driven melodies with tabla rhythms and cross-cultural orchestral textures. A bittersweet, introspective mood with evolving tempo shifts and a fusion of traditional and electronic sounds."
    },
    {
        "name": "05_contemporary_contemplative",
        "text": "Contemporary electronic textures meet Indian raga improvisation, fused with minimalist tabla patterns. The mood is melancholic and contemplative, with subtle orchestral layers and studio refinement."
    },
    {
        "name": "06_cinematic_soulful",
        "text": "Fusion of Indian classical sitar and tabla rhythms, enriched by cinematic orchestral textures. Mood: meditative and soulful, featuring organic production with touches of modern electronic elements."
    },
    {
        "name": "07_raga_ambient",
        "text": "Sitar melodies exploring raga improvisation, paired with electronic textures and minimal percussion. Introspective and ambient, blending Indian classical roots with a modern, studio-driven sound."
    },
    {
        "name": "08_dynamic_minimalist",
        "text": "Dynamic tempo shifts between tabla and sitar, layered with minimalist electronic textures. Indian classical fusion with a melancholic and meditative mood, mixing organic instrumentation and studio techniques."
    },
    {
        "name": "09_calm_organic",
        "text": "Organic-meets-studio production with tabla-driven rhythms and introspective sitar improvisations. Mood: calm yet dynamic, blending Indian classical elements with subtle electronic textures."
    },
    {
        "name": "10_meditative_fiery",
        "text": "Cross-cultural fusion of sitar melodies, tabla grooves, and modern orchestral textures. Meditative yet fiery, featuring raga improvisation and minimalist layers with shifting tempos and electronic flourishes."
    }
]

# Generate audio for each prompt
for i, prompt in enumerate(prompts):
    print(f"\n[{i+1}/10] Generating: {prompt['name']}")
    print(f"Prompt: {prompt['text']}")
    
    # Process the text prompt
    inputs = processor(
        text=[prompt['text']],
        padding=True,
        return_tensors="pt",
    ).to(device)
    
    # Generate audio
    start_time = time.time()
    audio_values = model.generate(
        **inputs, 
        do_sample=True,
        guidance_scale=3,
        max_new_tokens=1024
    )
    generation_time = time.time() - start_time
    
    # Convert to numpy array and cast to float32 (to fix the data type issue)
    audio_np = audio_values.cpu().numpy().squeeze().astype(np.float32)
    
    # Save as WAV file
    output_path = os.path.join(output_dir, f"{prompt['name']}.wav")
    scipy.io.wavfile.write(output_path, rate=32000, data=audio_np)
    
    # Play the audio (in notebook)
    display(Audio(audio_np, rate=32000))
    
    print(f"✓ Saved to {output_path}")
    print(f"  Generation time: {generation_time:.2f} seconds")
    
    # Optional: clear GPU memory between generations
    torch.cuda.empty_cache()
    
    # Optional: add a small delay between generations
    time.sleep(1)

print("\n✓ Generation complete! All files saved to:", output_dir)

In [2]:
# local output directory
local_model_path = "/mnt/f/SMC/CMC/musicgen-dreamboothing/as-new-updated"

# Load config from local directory
config = PeftConfig.from_pretrained(local_model_path)

# Load the base model (musicgen-melody)
model = AutoModelForTextToWaveform.from_pretrained(
    config.base_model_name_or_path, 
    torch_dtype=torch.float16
)

# Load your fine-tuned LoRA weights from local directory
model = PeftModel.from_pretrained(model, local_model_path).to(device)

# Load the processor from the base model
processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)


  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.42s/it]


In [7]:
# Generate the indian classical fusion fine-tuned music
inputs = processor(
    text=["Indian classical fusion music with sitar playing melody with different notes, violin in the background as orchestra, with some percussion"],
    padding=True,
    return_tensors="pt",
).to(device)
audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=1024)

Audio(audio_values.cpu().numpy().squeeze(), rate=32000)