In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random

import pygame
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import img_to_array

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

from midiutil import MIDIFile
from pydub import AudioSegment
import base64

import streamlit as st
from music21 import stream, note, converter, metadata
import os

pygame 2.5.2 (SDL 2.28.3, Python 3.11.0)
Hello from the pygame community. https://www.pygame.org/contribute.html





In [18]:
# Catégories et descriptions
categories = {
    "Aircraft": "plane, airplane, airport",
    "Animals": "farm, donkey, pig, cow, duck, goose, pigeon, horse, ox, ram, buffalo, sheep, ice_bear",
    "Applause":"applause, show, performance, scene, cabaret",
    "Atmosphere":"atmosphere",
    "Bells":"bell, church, altar",
    "Birds":"bird, goose",
    "Clocks":"clock, ticker, time",
    "Crowds":"crowd, people",
    "Daily Life" : "chill, coffee_shop, coffee, library, book, restaurant, tray",
    "Destruction": "destruction",
    "Electronics":"machine, computer, electronic",
    "Events":"firework, festival, party, concert",
    "Fire":"fire, wood, campfire, tents, camping",
    "Footsteps":"footsetps, run, walk",
    "Machines":"machine",
    "Medical":"hospital, sick, invalid, ill, unhealthy",
    "Military":"soldier, war, military, weapon, gun, battle, fog, volcano",
    "Nature": "tree, bench, park_bench, sun, water, sea, sunset, seaside, valley, forest, fountain, lakeside, sand, cliff, ice floe, palm, cascade, flower",
    "Sports":"tennis, basketball, ball, football, swimming-pool, swimming, horse, horse racing, boat",
    "Toys":"toy, children toy, puzzle",
    "Transport":"train, station train, cars, bus, taxi", 
}

# Catégories d'instrument MIDI (donne intervalle d'instrus possibles)
categories_instruments = {
    "Aircraft": (96, 103),  # SFX Sci-fi à SFX Atmosphere
    "Animals": (113, 123),  # Tinkle Bell à Bird Tweet
    "Applause": (126, 127),  # Applause
    "Atmosphere": (88, 94),  # New Age Syn Pad à Halo Syn Pad
    "Bells": (13, 14),  # Xylophone, Tubular Bells
    "Birds": (72, 78),  # Piccolo à Ocarina
    "Clocks": (0, 7),  # Acoustic Grand Piano à Clavinet
    "Crowds": (48, 62),  # String Ensemble 1 à Syn Brass 2
    "Daily Life": (24, 31),  # Guitar
    "Destruction": (116, 118),  # Melodic Tom à Syn Drum
    "Electronics": (80, 127),  # Syn Square Wave à Gun Shot
    "Events": (48, 62),  # String Ensemble 1 à Syn Brass 2
    "Fire": (97, 99),  # SFX Soundtrack à SFX Brightness
    "Footsteps": (115, 115),  # Woodblock
    "Machines": (0, 118),  # Acoustic Grand Piano à Syn Drum
    "Medical": (88, 94),  # New Age Syn Pad à Halo Syn Pad
    "Military": (56, 61),  # Trumpet à Brass Section
    "Nature": (72, 76),  # Piccolo à Bottle Blow
    "Sports": (56, 62),  # Trumpet à Syn Brass 2
    "Toys": (112, 118),  # Tinkle Bell à Syn Drum
    "Transport": (97, 127)  # SFX Soundtrack à Gun Shot
}


In [19]:
image_path = './pierre_boulez.jpg'

In [20]:
# Charger un modèle pré-entraîné (par exemple, VGG16)
# https://github.com/tkeldenich/VGG16_SimplyUse

model = VGG16()

# Charger l'image
image = cv2.imread(image_path)
image = cv2.resize(image, (224, 224))  # Redimensionner à la taille attendue par le modèle
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Prétraitement pour le modèle
image = img_to_array(image)
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
image = preprocess_input(image)

# Prédiction
prediction = model.predict(image)
labels = decode_predictions(prediction)
# Extraire les labels individuels
label1, label2, label3 = None, None, None

if len(labels[0]) > 0:
    label1 = labels[0][0][1]  # Premier label
    

if len(labels[0]) > 1:
    label2 = labels[0][1][1]  # Deuxième label
    

if len(labels[0]) > 2:
    label3 = labels[0][2][1]  # Troisième label

# Afficher les labels
print("Label 1:", label1)
print("Label 2:", label2)
print("Label 3:", label3)




Label 1: tray
Label 2: envelope
Label 3: handkerchief


In [21]:
# Fonction pour calculer la similarité entre les labels et les catégories
def trouver_categorie(labels, categories):
    vect = TfidfVectorizer()
    descriptions = list(categories.values())
    vect.fit(descriptions)
    cat_vectors = vect.transform(descriptions)

    label_vector = vect.transform([" ".join(labels)])
    sim_scores = cosine_similarity(label_vector, cat_vectors)

    # Trouver la catégorie avec le score de similarité le plus élevé
    categorie_index = np.argmax(sim_scores)
    categorie = list(categories.keys())[categorie_index]
    return categorie

# Labels extraits de l'image
labels_extraits = [label1, label2, label3]  # Remplacer par les labels réels

# Trouver la catégorie correspondante
categorie_correspondante = trouver_categorie(labels_extraits, categories)
print("Catégorie correspondante :", categorie_correspondante)
print (label1)


Catégorie correspondante : Daily Life
tray


In [6]:
# Tableau associant tonalité à caractère

