In [1]:
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np

In [2]:
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)

In [3]:
def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    
    vgg16_feature = model.predict(img_data)
    return vgg16_feature.flatten()

In [4]:
img_features = extract_features("C:\\Users\\adhik\\Downloads\\test.jpg")
print(img_features.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 770ms/step
(4096,)


In [28]:
# Normalize image features to fit the musical range (e.g., 60-80 for MIDI pitch)
def normalize_features(features, min_val, max_val):
    # Prevent division by zero if features have constant values
    if np.min(features) == np.max(features):
        return np.full(features.shape, (min_val + max_val) / 2)
    
    normalized = (features - np.min(features)) / (np.max(features) - np.min(features))
    
    # Ensure no NaN values and handle edge cases
    normalized = np.nan_to_num(normalized)  # Replace NaN with 0
    return normalized * (max_val - min_val) + min_val


In [29]:
# Mapping image features to MIDI notes and durations
def map_features_to_music(features):
    # Select first 10 features for pitches and next 10 for durations
    pitches = normalize_features(features[:10], 60, 80)  # MIDI pitches between 60 and 80
    durations = normalize_features(features[10:20], 0.5, 2)  # Durations between 0.5 and 2 seconds
    
    # Convert to integers (for pitches) and ensure no NaN values
    pitches = np.nan_to_num(pitches).astype(int)  # Replace NaN with 0 for safety
    durations = np.nan_to_num(durations)  # Replace NaN with 0 for safety
    
    return pitches, durations


In [30]:
from midiutil import MIDIFile
def create_midi(pitches, durations, file_name="output_music.mid"):
    midi = MIDIFile(1) 
    track = 0
    time = 0
    midi.addTrackName(track, time, "Generated Track")
    midi.addTempo(track, time, 120)  # Set the tempo

    channel = 0
    volume = 100

    for i in range(len(pitches)):
        midi.addNote(track, channel, pitches[i], time, durations[i], volume)
        time += durations[i]  # Move time forward

   
    with open(file_name, "wb") as output_file:
        midi.writeFile(output_file)

pitches, durations = map_features_to_music(img_features)
create_midi(pitches, durations, "image_to_music.mid")

In [31]:
def image_to_music(img_path, output_midi="output_music.mid"):
    features = extract_features(img_path)
    pitches, durations = map_features_to_music(features)
    create_midi(pitches, durations, output_midi)
    print(f"Music generated and saved to {output_midi}")


In [36]:

image_to_music("C:\\Users\\adhik\\Downloads\\test2.jpg", 'generated_music.mid')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 376ms/step
Music generated and saved to generated_music.mid
