In [1]:
import pandas as pd

# 1. Charger le fichier VGGSound (sans en-tête) et nommer les colonnes
df = pd.read_csv('dataset/vggsound.csv', header=None, names=['video_id', 'start_sec', 'label', 'split'])

counts_df = (
    df['label']
    .value_counts()              # décompte des labels
    .reset_index()               # transforme en DataFrame
    .rename(columns={            # renomme les colonnes
        'index': 'label',
        'label': 'count'
    })
)

# 3) Enregistre dans un CSV sans la colonne d’index
counts_df.to_csv('label_counts.csv', index=False)

In [1]:
import pandas as pd

# 1. Charger le fichier VGGSound (sans en-tête) et nommer les colonnes
df = pd.read_csv('dataset/labels_classified.csv', header=0)
df[df['assigned_topics'] == '[3]']

Unnamed: 0,label,count,assigned_topics
2,"motorboat, speedboat acceleration",1050,[3]
5,driving buses,1050,[3]
7,male singing,1050,[3]
8,playing bass guitar,1050,[3]
9,"playing violin, fiddle",1050,[3]
...,...,...,...
270,blowtorch igniting,250,[3]
274,ferret dooking,239,[3]
292,playing shofar,200,[3]
300,playing washboard,200,[3]


In [6]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
# from ace_tools import display_dataframe_to_user

# 1. Charger les labels
df = pd.read_csv('dataset/label_counts.csv', header=None, names=['label', 'count'])

# 2. Calculer des embeddings sémantiques avec un modèle pré‑entraîné
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['label'].tolist(), show_progress_bar=True)

# 3. Regrouper en clusters via K-Means
n_clusters = 4  # à ajuster selon le nombre de thématiques souhaité
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
df['cluster'] = kmeans.fit_predict(embeddings)

# 4. Optionnel : afficher les labels de chaque cluster pour interprétation
for c in range(n_clusters):
    print(f"\nCluster {c} (exemples) :")
    print(df[df['cluster']==c]['label'].tolist())

# 5. Sauvegarder le résultat
output_path = 'dataset/labels_semantic_clusters.csv'
df.to_csv(output_path, index=False)

df



Batches: 100%|██████████| 10/10 [00:00<00:00, 68.70it/s]


Cluster 0 (exemples) :
['motorboat, speedboat acceleration', 'engine accelerating, revving, vroom', 'chainsawing trees', 'vacuum cleaner cleaning floors', 'car engine knocking', 'tractor digging', 'rope skipping', 'arc welding', 'electric shaver, electric razor shaving', 'printer printing', 'lathe spinning', 'lawn mowing', 'planing timber', 'sharpen knife', 'typing on typewriter', 'mouse clicking', 'using sewing machines', 'hammering nails', 'sliding door', 'typing on computer keyboard', 'ripping paper', 'eating with cutlery', 'car engine starting', 'opening or closing car doors', 'opening or closing drawers', 'electric grinder grinding', 'car engine idling', 'hair dryer drying', 'cupboard opening or closing', 'running electric fan', 'extending ladders', 'cutting hair with electric trimmers', 'forging swords', 'plastic bottle crushing', 'chopping wood', 'hedge trimmer running', 'eletric blender running', 'opening or closing car electric windows', 'disc scratching', 'chopping food']

C




Unnamed: 0,label,count,cluster
0,fireworks banging,1050,1
1,tap dancing,1050,2
2,"motorboat, speedboat acceleration",1050,0
3,playing trombone,1050,2
4,playing drum kit,1050,2
...,...,...,...
305,snake rattling,199,1
306,zebra braying,199,1
307,playing castanets,199,3
308,cow lowing,199,1


In [7]:
import pandas as pd

df = pd.read_csv('dataset/labels_semantic_clusters.csv', header=0, names=['label','count','cluster'])
df = df.groupby('cluster')
print(df.sum())


                                                     label  count
cluster                                                          
0        motorboat, speedboat accelerationengine accele...  23603
1        fireworks bangingpeople booingpolice car (sire...  80911
2        tap dancingplaying tromboneplaying drum kitmal...  42934
3        driving busesrace car, auto racingtoilet flush...  52019


In [2]:
import pandas as pd
df = pd.read_csv('dataset/labels_semantic_clusters.csv', header=None, names=['label', 'count', 'cluster'])

used_data = df[df['cluster'] == '3']

data_set = pd.read_csv('dataset/vggsound.csv', header=None, names=['video_id', 'start_sec', 'label', 'split'])
sub_section = data_set[data_set['label'].isin(used_data['label'].values)]
sub_section.sample(10)


Unnamed: 0,video_id,start_sec,label,split
63262,Gxs2k4mLnLE,4,lighting firecrackers,train
35389,8a5M98cyYLU,232,skiing,train
56000,EfOwUB4VS4o,19,firing muskets,train
62178,Gc_naKiJWuY,51,firing cannon,train
157706,kxqNCdZHzMg,187,cap gun shooting,train
30802,7BndMcm0jdk,28,playing erhu,train
107730,VVbOdcGz5Fg,130,driving buses,train
11481,1uYD_SiusjE,0,playing glockenspiel,train
135119,dV5xF4wMQEM,133,playing squash,train
91980,QAJ3lXw6rdw,34,playing sitar,train


In [10]:
import os
import pandas as pd
from yt_dlp import YoutubeDL

# --- CONFIGURATION ---
OUTPUT_DIR  = "videos"                     # Dossier de sortie
# ---------------------

def download_video_360p(video_id: str, dest_folder: str):
    """
    Télécharge la vidéo YouTube en 360p (hauteur ≤ 360px),
    récupère l'audio séparément, puis les merge en MKV
    (vidéo + piste audio distincte).
    """
    os.makedirs(dest_folder, exist_ok=True)
    url = f"https://www.youtube.com/watch?v={video_id}"
    ydl_opts = {
        # bestvideo[height<=360] + bestaudio, fallback sur best[height<=360]
        'format': 'best[height<=360]',  # un seul flux contenant audio+vidéo
        'outtmpl': os.path.join(dest_folder, f"{video_id}.%(ext)s"),
        'noplaylist': True,
        'quiet': False,
        'no_warnings': True,
    }
    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])



df = pd.read_csv('dataset/labels_semantic_clusters.csv', header=None, names=['label', 'count', 'cluster'])

used_data = df[df['cluster'] == '3']

data_set = pd.read_csv('dataset/vggsound.csv', header=None, names=['video_id', 'start_sec', 'label', 'split'])
sub_section = data_set[data_set['label'].isin(used_data['label'].values)]

sub_df = sub_section.sample(10)
# 3) Télécharger
for vid in sub_df['video_id'].unique():
    print(f"▶️ Téléchargement de {vid} en 360p …")
    try:
        download_video_360p(vid, OUTPUT_DIR)
    except Exception as e:
        print(f"⚠️ Erreur pour {vid} : {e}")

print("✅ Terminé.")

▶️ Téléchargement de SF2boY3hmYg en 360p …
[youtube] Extracting URL: https://www.youtube.com/watch?v=SF2boY3hmYg
[youtube] SF2boY3hmYg: Downloading webpage
[youtube] SF2boY3hmYg: Downloading tv client config
[youtube] SF2boY3hmYg: Downloading player 22f02d3d-main
[youtube] SF2boY3hmYg: Downloading tv player API JSON
[youtube] SF2boY3hmYg: Downloading ios player API JSON
[youtube] SF2boY3hmYg: Downloading m3u8 information
[info] SF2boY3hmYg: Downloading 1 format(s): 18
[download] Destination: videos/SF2boY3hmYg.mp4
[download] 100% of    1.75MiB in 00:00:00 at 3.35MiB/s   
▶️ Téléchargement de joEc6bcndLc en 360p …
[youtube] Extracting URL: https://www.youtube.com/watch?v=joEc6bcndLc
[youtube] joEc6bcndLc: Downloading webpage
[youtube] joEc6bcndLc: Downloading tv client config
[youtube] joEc6bcndLc: Downloading player 22f02d3d-main
[youtube] joEc6bcndLc: Downloading tv player API JSON
[youtube] joEc6bcndLc: Downloading ios player API JSON
[youtube] joEc6bcndLc: Downloading m3u8 informati

ERROR: [youtube] _EO0l5IbwY0: Sign in to confirm your age. This video may be inappropriate for some users. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


⚠️ Erreur pour _EO0l5IbwY0 : ERROR: [youtube] _EO0l5IbwY0: Sign in to confirm your age. This video may be inappropriate for some users. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
▶️ Téléchargement de cRtPLWOdusM en 360p …
[youtube] Extracting URL: https://www.youtube.com/watch?v=cRtPLWOdusM
[youtube] cRtPLWOdusM: Downloading webpage
[youtube] cRtPLWOdusM: Downloading tv client config
[youtube] cRtPLWOdusM: Downloading player 8102da6c-main
[youtube] cRtPLWOdusM: Downloading tv player API JSON
[youtube] cRtPLWOdusM: Downloading ios player API JSON
[youtube] cRtPLWOdusM: Downloading m3u8 information
[info] cRtPLWOdusM: Downloading 1 format(s): 18
[download] Destination: videos/cRtPLWOdusM.mp4
[download] 100% of    7.31MiB 

In [18]:
import pandas as pd
df = pd.read_csv('dataset/test_labeling.csv', header=None, names=['label', 'count', 'cluster'])

used_data = df[df['cluster'] == ' X']
total_count = used_data.sum()['count']



data_set = pd.read_csv('dataset/vggsound.csv', header=None, names=['video_id', 'start_sec', 'label', 'split'])
sub_section = data_set[data_set['label'].isin(used_data['label'].values)]
sub_section.sample(10)


Unnamed: 0,video_id,start_sec,label,split
51884,DVF3rKX-opo,30,playing zither,test
189108,wXIlLOrPBWM,198,playing bongo,train
111620,WoFTCw3RlDI,230,playing electric guitar,train
32663,7kDIIIf4QQw,450,playing cello,train
34127,8CcpUJCVQNA,19,playing tabla,train
91499,Q-iR4FEjpwU,49,playing oboe,train
176418,rFgrOflwKPg,290,playing trombone,train
60578,G6__pct5bis,35,playing glockenspiel,train
108338,VjjH55ct-Lk,50,playing clarinet,test
119900,ZUITHumAwlo,7,playing tabla,train


In [None]:
cfg = second_step
pred_depth, x_tokens, x_positions, x_modalities = model.generate_one_modality_roar(
   x_tokens, x_positions, x_modalities,
   target_mod=cfg['target_mod'],
   num_steps=cfg['num_steps'],
   temp=cfg['temp'],
   top_p=cfg['top_p'],
   top_k=cfg['top_k']
)
show_modality(pred_depth, cfg['target_mod'])