In [None]:
!git clone https://github.com/MTG/freesound-python.git

Cloning into 'freesound-python'...
remote: Enumerating objects: 417, done.[K
remote: Counting objects: 100% (125/125), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 417 (delta 58), reused 100 (delta 53), pack-reused 292 (from 1)[K
Receiving objects: 100% (417/417), 206.37 KiB | 9.83 MiB/s, done.
Resolving deltas: 100% (200/200), done.


In [None]:
!pip install ./freesound-python

Processing ./freesound-python
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: freesound-python
  Building wheel for freesound-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for freesound-python: filename=freesound_python-1.1-py3-none-any.whl size=6496 sha256=1df77d6f0f1320691dc8b455e32338596c320bc4abd95c61c0e5eade9b8726c1
  Stored in directory: /root/.cache/pip/wheels/80/e8/a3/3e192d339b115e258b361c28f8b0e0cd4e54ece6b15a459836
Successfully built freesound-python
Installing collected packages: freesound-python
  Attempting uninstall: freesound-python
    Found existing installation: freesound-python 0.1
    Uninstalling freesound-python-0.1:
      Successfully uninstalled freesound-python-0.1
Successfully installed freesound-python-1.1


In [None]:
import freesound
print("Freesound client imported successfully!")

Freesound client imported successfully!


In [None]:
# VIOLENCE DETECTION: Violent Audio Dataset Scraper for Google Colab
# FINAL WORKING VERSION - Fixed preview URL access


# STEP 1: Install packages
print("üì¶ Installing packages...")
!pip install freesound-python requests tqdm soundfile librosa -q
!apt-get install -y ffmpeg > /dev/null 2>&1

print("‚úÖ Packages installed\n")

# STEP 2: Import everything
import os
import time
import json
import requests
from pathlib import Path
from tqdm import tqdm
import random
from datetime import datetime
import pandas as pd

import freesound

print("‚úÖ All modules imported successfully!\n")

# ============================================================================
# Configuration
# ============================================================================

BASE_PATH = '/content/datasets/violent_audio'
os.makedirs(BASE_PATH, exist_ok=True)

print(f"‚úì Dataset location: {BASE_PATH}\n")

# ‚ö†Ô∏è REPLACE THIS WITH YOUR ACTUAL API KEY
FREESOUND_API_KEY = "JmeBmVL3M0FsXnUT7MBTbw2EYXU19e2MIEkw8lRE"

# ============================================================================
# Violent Audio Categories
# ============================================================================

VIOLENT_AUDIO_CATEGORIES = {
    "screams_human_distress": {
        "queries": ["scream", "screaming", "yell", "distress"],
        "target_per_query": 50,
        "duration_range": (1, 5),
    },
    "gunshots_firearms": {
        "queries": ["gunshot", "gun", "pistol", "rifle"],
        "target_per_query": 50,
        "duration_range": (0.5, 3),
    },
    "glass_breaking": {
        "queries": ["glass", "shatter", "window", "bottle"],
        "target_per_query": 40,
        "duration_range": (1, 3),
    },
    "explosions_blasts": {
        "queries": ["explosion", "blast", "bomb", "detonation"],
        "target_per_query": 40,
        "duration_range": (2, 5),
    },
    "sirens_alarms": {
        "queries": ["siren", "alarm", "emergency", "warning"],
        "target_per_query": 40,
        "duration_range": (3, 10),
    },
    "fighting_impact": {
        "queries": ["punch", "impact", "fight", "hit"],
        "target_per_query": 40,
        "duration_range": (0.5, 2),
    },
    "aggressive_speech": {
        "queries": ["shouting", "yelling", "angry", "argument"],
        "target_per_query": 25,
        "duration_range": (2, 10),
    },
    "crash_collision": {
        "queries": ["crash", "collision", "accident", "car"],
        "target_per_query": 25,
        "duration_range": (2, 5),
    }
}

# ============================================================================
# Scraper Class - FINAL FIXED VERSION
# ============================================================================

class ViolentAudioScraper:
    def __init__(self, api_key, base_path=BASE_PATH):
        self.api_key = api_key
        self.base_path = base_path
        self.client = freesound.FreesoundClient()
        self.client.set_token(api_key, "token")

        self.stats = {
            'successful_downloads': 0,
            'failed_downloads': 0,
            'skipped_duplicates': 0,
            'collection_start': datetime.now().isoformat(),
            'collection_end': None,
            'categories': {},
            'licensing_info': []
        }
        self.downloaded_ids = set()

    def search_sounds(self, query, max_results=50, duration_range=(1, 10)):
        """Search using REST API directly"""
        try:
            duration_filter = f"duration:[{duration_range[0]} TO {duration_range[1]}]"

            url = "https://freesound.org/apiv2/search/text/"
            params = {
                "query": query,
                "filter": duration_filter,
                "sort": "rating_desc",
                "fields": "id,name,previews,duration,license,username",
                "page_size": max_results,
                "token": self.api_key
            }

            response = requests.get(url, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()

            # Convert JSON to Sound objects with FIXED preview access
            sounds = []
            for sound_data in data.get('results', []):
                try:
                    # FIX: Use correct key name for preview URL
                    preview_url = sound_data['previews'].get('preview-hq-mp3')

                    if not preview_url:
                        continue

                    sound = type('Sound', (), {
                        'id': sound_data['id'],
                        'name': sound_data['name'],
                        'preview_url': preview_url,
                        'duration': sound_data['duration'],
                        'license': sound_data['license'],
                        'username': sound_data['username']
                    })()
                    sounds.append(sound)
                except (KeyError, TypeError):
                    continue

            return sounds

        except Exception as e:
            print(f"‚ùå Search error for '{query}': {e}")
            return None

    def download_sound(self, sound, category_path, index):
        """Download sound"""
        try:
            if sound.id in self.downloaded_ids:
                self.stats['skipped_duplicates'] += 1
                return False

            preview_url = sound.preview_url
            response = requests.get(preview_url, timeout=30)
            response.raise_for_status()

            filename = f"{category_path.name}_{index:04d}_id{sound.id}.mp3"
            filepath = category_path / filename

            with open(filepath, 'wb') as f:
                f.write(response.content)

            self.stats['licensing_info'].append({
                'file': filename,
                'sound_id': sound.id,
                'license': sound.license,
                'username': sound.username,
                'duration': sound.duration,
                'freesound_url': f"https://freesound.org/people/{sound.username}/sounds/{sound.id}/"
            })

            self.downloaded_ids.add(sound.id)
            self.stats['successful_downloads'] += 1
            return True

        except Exception as e:
            self.stats['failed_downloads'] += 1
            return False

    def scrape_category(self, category_name, config):
        """Scrape one category"""
        category_path = Path(self.base_path) / category_name
        category_path.mkdir(parents=True, exist_ok=True)

        self.stats['categories'][category_name] = {'queries': config['queries'], 'downloaded': 0}

        print(f"\n{'='*70}")
        print(f"üìÇ CATEGORY: {category_name.upper()}")
        print(f"{'='*70}")

        download_index = 0

        for query in config['queries']:
            print(f"üîç Searching: '{query}'")
            results = self.search_sounds(
                query=query,
                max_results=config['target_per_query'],
                duration_range=config['duration_range']
            )

            if results is None or len(results) == 0:
                print(f"   ‚ö†Ô∏è  No results found")
                continue

            print(f"   ‚úì Found {len(results)} results")
            pbar = tqdm(results, desc=f"Downloading {query[:20]}")

            for sound in pbar:
                success = self.download_sound(sound, category_path, download_index)
                if success:
                    download_index += 1
                    self.stats['categories'][category_name]['downloaded'] += 1

                time.sleep(random.uniform(0.5, 1.5))

        print(f"‚úì Downloaded: {self.stats['categories'][category_name]['downloaded']} sounds")

    def scrape_all(self, categories_config):
        """Scrape all categories"""
        print("="*70)
        print("üé¨ VIOLENT AUDIO DATASET COLLECTOR")
        print("="*70)
        print(f"üìÅ Saving to: {self.base_path}")
        print(f"üìä Categories: {len(categories_config)}")
        print("="*70)

        for category_name, config in categories_config.items():
            self.scrape_category(category_name, config)

        self.finalize()

    def finalize(self):
        """Save stats"""
        self.stats['collection_end'] = datetime.now().isoformat()

        stats_file = Path(self.base_path) / 'collection_stats.json'
        with open(stats_file, 'w') as f:
            json.dump(self.stats, f, indent=2)

        if self.stats['licensing_info']:
            licensing_df = pd.DataFrame(self.stats['licensing_info'])
            licensing_file = Path(self.base_path) / 'licensing_attribution.csv'
            licensing_df.to_csv(licensing_file, index=False)

        print("\n\n" + "="*70)
        print("üìä COLLECTION COMPLETE!")
        print("="*70)
        print(f"‚úÖ Downloaded: {self.stats['successful_downloads']}")
        print(f"‚ùå Failed: {self.stats['failed_downloads']}")
        print(f"‚äò Duplicates: {self.stats['skipped_duplicates']}")
        print("="*70)

        print("\nüìã Per-Category:")
        total_downloaded = 0
        for cat, stats in self.stats['categories'].items():
            print(f"  {cat}: {stats['downloaded']} sounds")
            total_downloaded += stats['downloaded']

        print(f"\nüìä TOTAL AUDIO FILES: {total_downloaded}")
        print(f"üìÅ Files saved to: {self.base_path}")

# ============================================================================
# RUN THE SCRAPER
# ============================================================================

print("\n" + "="*70)
print("‚öôÔ∏è  CHECKING API KEY...")
print("="*70)

if FREESOUND_API_KEY == "YOUR_API_KEY_HERE":
    print("\n‚ùå ERROR: Please set your Freesound API key!")
    print("\nüìù Steps:")
    print("   1. Go to: https://freesound.org/apiv2/apply/")
    print("   2. Create/use your API credentials")
    print("   3. Copy your API Key (NOT Client ID)")
    print("   4. Replace 'YOUR_API_KEY_HERE' with your actual key")
    print("   5. Run this cell again")
else:
    print("‚úÖ API key found!")
    print("\n‚è≥ Starting collection (20-40 minutes)...")
    print("   Keep this notebook open\n")

    try:
        scraper = ViolentAudioScraper(api_key=FREESOUND_API_KEY, base_path=BASE_PATH)
        scraper.scrape_all(VIOLENT_AUDIO_CATEGORIES)

        print("""

‚úÖ SUCCESS! Your dataset is ready.

üìÇ Dataset Structure:
   violent_audio/
   ‚îú‚îÄ‚îÄ screams_human_distress/
   ‚îú‚îÄ‚îÄ gunshots_firearms/
   ‚îú‚îÄ‚îÄ glass_breaking/
   ‚îú‚îÄ‚îÄ explosions_blasts/
   ‚îú‚îÄ‚îÄ sirens_alarms/
   ‚îú‚îÄ‚îÄ fighting_impact/
   ‚îú‚îÄ‚îÄ aggressive_speech/
   ‚îú‚îÄ‚îÄ crash_collision/
   ‚îú‚îÄ‚îÄ collection_stats.json
   ‚îî‚îÄ‚îÄ licensing_attribution.csv

üì• Download to Your Computer:
   from google.colab import files
   !zip -r violent_audio_dataset.zip /content/datasets/violent_audio
   files.download('violent_audio_dataset.zip')

‚öñÔ∏è  IMPORTANT: Check licensing_attribution.csv for attribution!

üéØ NEXT STEPS:
1. Download your dataset
2. Preprocess audio (16kHz, mono)
3. Extract features (MFCCs, spectrograms)
4. Combine with your image dataset
5. Train multimodal violence detection model!

üìä Expected Performance:
   - Single modality: 70-75% accuracy
   - Audio + Image fusion: 78-85% accuracy
        """)

    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        import traceback
        traceback.print_exc()


üì¶ Installing packages...
‚úÖ Packages installed

‚úÖ All modules imported successfully!

‚úì Dataset location: /content/datasets/violent_audio


‚öôÔ∏è  CHECKING API KEY...
‚úÖ API key found!

‚è≥ Starting collection (20-40 minutes)...
   Keep this notebook open

üé¨ VIOLENT AUDIO DATASET COLLECTOR
üìÅ Saving to: /content/datasets/violent_audio
üìä Categories: 8

üìÇ CATEGORY: SCREAMS_HUMAN_DISTRESS
üîç Searching: 'scream'
   ‚úì Found 50 results


Downloading scream: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:37<00:00,  1.95s/it]


üîç Searching: 'screaming'
   ‚úì Found 50 results


Downloading screaming: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:07<00:00,  1.36s/it]


üîç Searching: 'yell'
   ‚úì Found 50 results


Downloading yell: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:23<00:00,  1.67s/it]


üîç Searching: 'distress'
   ‚úì Found 50 results


Downloading distress: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:35<00:00,  1.92s/it]


‚úì Downloaded: 151 sounds

üìÇ CATEGORY: GUNSHOTS_FIREARMS
üîç Searching: 'gunshot'
‚ùå Search error for 'gunshot': HTTPSConnectionPool(host='freesound.org', port=443): Read timed out. (read timeout=10)
   ‚ö†Ô∏è  No results found
üîç Searching: 'gun'
‚ùå Search error for 'gun': HTTPSConnectionPool(host='freesound.org', port=443): Read timed out. (read timeout=10)
   ‚ö†Ô∏è  No results found
üîç Searching: 'pistol'
   ‚úì Found 50 results


Downloading pistol: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:30<00:00,  1.80s/it]


üîç Searching: 'rifle'
   ‚úì Found 50 results


Downloading rifle: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:25<00:00,  1.70s/it]


‚úì Downloaded: 91 sounds

üìÇ CATEGORY: GLASS_BREAKING
üîç Searching: 'glass'
   ‚úì Found 40 results


Downloading glass: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:14<00:00,  1.86s/it]


üîç Searching: 'shatter'
   ‚úì Found 40 results


Downloading shatter: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:06<00:00,  1.66s/it]


üîç Searching: 'window'
   ‚úì Found 40 results


Downloading window: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:09<00:00,  1.73s/it]


üîç Searching: 'bottle'
   ‚úì Found 40 results


Downloading bottle: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:06<00:00,  1.66s/it]


‚úì Downloaded: 129 sounds

üìÇ CATEGORY: EXPLOSIONS_BLASTS
üîç Searching: 'explosion'
   ‚úì Found 40 results


Downloading explosion: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:21<00:00,  2.03s/it]


üîç Searching: 'blast'
   ‚úì Found 40 results


Downloading blast: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:13<00:00,  1.84s/it]


üîç Searching: 'bomb'
   ‚úì Found 40 results


Downloading bomb: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:06<00:00,  1.67s/it]


üîç Searching: 'detonation'
   ‚úì Found 40 results


Downloading detonation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:08<00:00,  1.72s/it]


‚úì Downloaded: 130 sounds

üìÇ CATEGORY: SIRENS_ALARMS
üîç Searching: 'siren'
   ‚úì Found 40 results


Downloading siren: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:30<00:00,  2.25s/it]


üîç Searching: 'alarm'
   ‚úì Found 40 results


Downloading alarm: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:18<00:00,  1.96s/it]


üîç Searching: 'emergency'
   ‚úì Found 40 results


Downloading emergency: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:06<00:00,  1.66s/it]


   ‚úì Found 40 results




‚úì Downloaded: 126 sounds

üìÇ CATEGORY: FIGHTING_IMPACT
üîç Searching: 'punch'
   ‚úì Found 40 results


Downloading punch: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:11<00:00,  1.79s/it]


üîç Searching: 'impact'
‚ùå Search error for 'impact': HTTPSConnectionPool(host='freesound.org', port=443): Read timed out. (read timeout=10)
   ‚ö†Ô∏è  No results found
üîç Searching: 'fight'
   ‚úì Found 40 results


Downloading fight: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:07<00:00,  1.68s/it]


üîç Searching: 'hit'
   ‚úì Found 40 results


Downloading hit: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:10<00:00,  1.77s/it]


‚úì Downloaded: 108 sounds

üìÇ CATEGORY: AGGRESSIVE_SPEECH
üîç Searching: 'shouting'
   ‚úì Found 25 results


Downloading shouting: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:47<00:00,  1.90s/it]


üîç Searching: 'yelling'
   ‚úì Found 25 results


Downloading yelling: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:40<00:00,  1.62s/it]


üîç Searching: 'angry'
   ‚úì Found 25 results


Downloading angry: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:43<00:00,  1.75s/it]


üîç Searching: 'argument'
   ‚úì Found 25 results


Downloading argument: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:51<00:00,  2.06s/it]


‚úì Downloaded: 76 sounds

üìÇ CATEGORY: CRASH_COLLISION
üîç Searching: 'crash'
   ‚úì Found 25 results


Downloading crash: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:48<00:00,  1.96s/it]


üîç Searching: 'collision'
   ‚úì Found 25 results


Downloading collision: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:50<00:00,  2.04s/it]


üîç Searching: 'accident'
   ‚úì Found 25 results


Downloading accident: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:51<00:00,  2.05s/it]


üîç Searching: 'car'
   ‚úì Found 25 results


Downloading car: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:47<00:00,  1.90s/it]

‚úì Downloaded: 93 sounds


üìä COLLECTION COMPLETE!
‚úÖ Downloaded: 904
‚ùå Failed: 0
‚äò Duplicates: 196

üìã Per-Category:
  screams_human_distress: 151 sounds
  gunshots_firearms: 91 sounds
  glass_breaking: 129 sounds
  explosions_blasts: 130 sounds
  sirens_alarms: 126 sounds
  fighting_impact: 108 sounds
  aggressive_speech: 76 sounds
  crash_collision: 93 sounds

üìä TOTAL AUDIO FILES: 904
üìÅ Files saved to: /content/datasets/violent_audio


‚úÖ SUCCESS! Your dataset is ready.

üìÇ Dataset Structure:
   violent_audio/
   ‚îú‚îÄ‚îÄ screams_human_distress/
   ‚îú‚îÄ‚îÄ gunshots_firearms/
   ‚îú‚îÄ‚îÄ glass_breaking/
   ‚îú‚îÄ‚îÄ explosions_blasts/
   ‚îú‚îÄ‚îÄ sirens_alarms/
   ‚îú‚îÄ‚îÄ fighting_impact/
   ‚îú‚îÄ‚îÄ aggressive_speech/
   ‚îú‚îÄ‚îÄ crash_collision/
   ‚îú‚îÄ‚îÄ collection_stats.json
   ‚îî‚îÄ‚îÄ licensing_attribution.csv

üì• Download to Your Computer:
   from google.colab import files
   !zip -r violent_audio_dataset.zip /content/datasets/violent_audio
  




In [None]:
from google.colab import files
!zip -r violent_audio_dataset.zip /content/datasets/violent_audio
files.download('violent_audio_dataset.zip')


  adding: content/datasets/violent_audio/ (stored 0%)
  adding: content/datasets/violent_audio/screams_human_distress/ (stored 0%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0130_id83283.mp3 (deflated 3%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0132_id16090.mp3 (deflated 1%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0022_id520271.mp3 (deflated 2%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0078_id720764.mp3 (deflated 2%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0065_id164024.mp3 (deflated 1%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0072_id179328.mp3 (deflated 1%)
  adding: content/datasets/violent_audio/screams_human_distress/screams_human_distress_0149_id693597.mp3 (deflated 2%)
  adding: content/datasets/violent_aud

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>