In [None]:
from google.colab import drive
import csv
import json
import subprocess
import os
import requests


In [None]:
!apt install ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 19 not upgraded.


In [None]:
# Montez votre Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!cd drive/MyDrive/TSP/HTI/PFE/; ls

create-original-json.ipynb  dataset.csv  original.json


In [None]:
# Chemin d'accès à votre fichier CSV et dossier de sortie JSON
csv_file_path = 'drive/MyDrive/TSP/HTI/PFE/dataset.csv'
json_output_folder = 'drive/MyDrive/TSP/HTI/PFE/original.json'

In [None]:
def get_video_encoding_parameters(video_path):
    try:
        command = [
                    "ffprobe",
                    "-v", "error",
                    "-show_entries", "format=filename,nb_streams,nb_programs,format_name,format_long_name,start_time,duration,size,bit_rate,probe_score,tags",
                    "-show_streams",
                    "-of", "json",
                    video_path
                ]
        result = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
        output, _ = result.communicate()
        return json.loads(output)
    except Exception as e:
        return {"error": str(e)}


In [None]:
def download_video(stream_link, video_name):
    try:
        r = requests.get(stream_link, stream=True)
        if r.status_code == 200:
            with open(video_name, 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
            return True
        else:
            return False
    except Exception as e:
        print(f"Error downloading video {video_name}: {e}")
        return False


In [None]:
def process_video_row(row, all_video_info):
    try:
        uuid = row['uuid']
        video_name = row['name']
        stream_link = row['stream_link']
    except KeyError as e:
        print(f"A key is missing in the row: {e}")
        return False

    if uuid not in all_video_info:
        if download_video(stream_link, video_name):
            video_info = get_video_encoding_parameters(video_name)
            video_info['name'] = video_name
            video_info['stream_link'] = stream_link
            all_video_info[uuid] = video_info
            os.remove(video_name)  # Delete the video file to save space
            return True
        else:
            print(f"Failed to download video {video_name}")
            return False
    return False  # UUID already processed, no need to download

In [None]:
def process_videos(csv_file_path, json_output_path):
    # Try loading existing JSON data if it exists
    if os.path.exists(json_output_path) and os.path.getsize(json_output_path) > 0:
        with open(json_output_path, 'r') as json_file:
            all_video_info = json.load(json_file)
    else:
        all_video_info = {}

    # Process each video in the CSV
    with open(csv_file_path, mode='r', encoding='utf-8-sig') as file:
        csv_reader = csv.DictReader(file, delimiter=';')
        headers = csv_reader.fieldnames
        print("Headers:", headers)  # Check the actual headers
        first_row = next(csv_reader)
        print("First row:", first_row)  # Check the first row

        # Now reset the iterator and process as normal
        file.seek(0)
        csv_reader = csv.DictReader(file, delimiter=';')
        next(csv_reader)  # Skip the header after resetting

        for row in csv_reader:
            if process_video_row(row, all_video_info):
                # Save after each successful process to allow resuming
                with open(json_output_path, 'w') as json_file:
                    json.dump(all_video_info, json_file, indent=4)


In [None]:
# Exécutez cette fonction pour commencer le traitement
process_videos(csv_file_path, json_output_folder)

Headers: ['uuid', 'name', 'duration', 'size', 'video_quality', 'stream_link']
First row: {'uuid': '1ec2a834-2a67-4162-bbe8-66cb57ca6cb3', 'name': 'Bouygues_20s_reseau_Plongeon_WEB_noPetal_ML', 'duration': '20', 'size': '23723292', 'video_quality': '1920x1080', 'stream_link': 'https://storage.gra.cloud.ovh.net/v1/AUTH_cf288dd96a8a4bc5bbc0ba258e1b93b8/media-storage-prod/1ec2a834-2a67-4162-bbe8-66cb57ca6cb3_bouygues-20s-reseau-plongeon-web-nopetal-ml.mp4'}
Failed to download video Green210719-GRANDS-LACS-DE-CHAMPAGNE-v3-1920x1080
Failed to download video Friends & Fellows-woman-swimming-in-ocean-and-on-boat-in-the-bahamas-283909-filmsupply
Failed to download video php9LcObL
Failed to download video phprKiono
Failed to download video Baby-Foot De Table - 28376
Failed to download video phpjenHVl
Failed to download video 15s_34949_MAAF_Credit_15s
Failed to download video 1280x720_MAX_1-creme_choco_30s-169_MSB-SonWEB_NRJ Orange Perf Garantie Dailymotion
Failed to download video 1280x720_MAX_1