In [22]:
%pip install --upgrade openai



In [9]:
COURSE_NAME = "percepcion-computacional"
# Directories for videos and audio files
videos_folder = f'videos/{COURSE_NAME}'
audio_folder = f'audio/{COURSE_NAME}'
transcripts_folder = f'transcripts/{COURSE_NAME}'
csv_file = f'videos_csv/{COURSE_NAME}.csv'

### First me want to capture the VideoURL from the HTML response

In [24]:
import requests
import re

In [25]:
def get_video_url(page_url):
    """ Fetch the webpage and extract the video URL """
    try:
        response = requests.get(page_url)
        text = response.text
        video_url_regex = r'"VideoUrl":"(.*?)"'
        match = re.search(video_url_regex, text)
        if match:
            dirty_url = match.group(1)
            video_url = dirty_url.replace('\\/', '/')
            return video_url
    except Exception as e:
        print(f'Error fetching video URL from {page_url}:', e)
    return None

### Then write the video to a file

In [26]:
def write_video(video_url, filename):
    """ Download the video and write it to the filesystem """
    try:
        video_response = requests.get(video_url, stream=True)
        with open(filename, 'wb') as f:
            for chunk in video_response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        print(f'File {filename} has been written')
    except Exception as e:
        print(f'Error writing video to {filename}:', e)

### Finally we want to perform the same operation for all the videos under the videos folder

In [3]:
import os
import pandas as pd

In [30]:
def download_videos(csv_file_path, output_folder):
    df = pd.read_csv(csv_file_path)

    os.makedirs(output_folder, exist_ok=True)

    for i, page_url in enumerate(df['link']):
        video_url = get_video_url(page_url)
        if video_url:
            filename = os.path.join(output_folder, f'video_{i+1}.mp4')
            write_video(video_url, filename)

## Percepción Computacional -> MP4

In [31]:
output_folder = videos_folder
download_videos(csv_file, output_folder)

File videos/inteligencia-artificial-y-computacion-cognitiva/video_1.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_2.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_3.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_4.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_5.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_6.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_7.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_8.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_9.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_10.mp4 has been written
File videos/inteligencia-artificial-y-computacion-cognitiva/video_11.mp4 has been written
File videos/intelig

### Pricing Calculation

In [32]:
from moviepy.editor import VideoFileClip

In [33]:
def calculate_video_length(video_path):
    """ Calculate the length of a video in minutes """
    with VideoFileClip(video_path) as video:
        return video.duration / 60  # duration in minutes

In [34]:
def calculate_whisper_pricing(folder_path):
    """ Calculate the total cost for Whisper API based on the length of all videos """
    total_minutes = 0
    whisper_cost_per_minute = 0.006

    for filename in os.listdir(folder_path):
        if filename.endswith('.mp4'):
            video_path = os.path.join(folder_path, filename)
            video_length = calculate_video_length(video_path)
            total_minutes += video_length

    total_cost = total_minutes * whisper_cost_per_minute
    return total_cost, total_minutes


In [35]:
folder_path = videos_folder
total_cost, total_minutes = calculate_whisper_pricing(folder_path)
print(f"Total minutes of video: {total_minutes:.2f}")
print(f"Total cost for using Whisper API: ${total_cost:.2f}")

Total minutes of video: 390.72
Total cost for using Whisper API: $2.34


In [36]:
def convert_mp4_to_mp3(mp4_file_path, mp3_file_path):
    """ Convert an MP4 video file to an MP3 audio file """
    clip = VideoFileClip(mp4_file_path)
    clip.audio.write_audiofile(mp3_file_path, codec='mp3')

In [37]:
def convert_videos_to_audio(videos_folder, audio_folder):
    """ Convert all MP4 files in a folder to MP3 files in another folder """
    os.makedirs(audio_folder, exist_ok=True)

    for filename in os.listdir(videos_folder):
        if filename.endswith('.mp4'):
            mp4_file_path = os.path.join(videos_folder, filename)
            mp3_filename = filename.replace('.mp4', '.mp3')
            mp3_file_path = os.path.join(audio_folder, mp3_filename)

            convert_mp4_to_mp3(mp4_file_path, mp3_file_path)
            print(f"Converted {mp4_file_path} to {mp3_file_path}")

In [38]:
convert_videos_to_audio(videos_folder, audio_folder)

MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_14.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_14.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_14.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_23.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_23.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_23.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_16.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_16.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_16.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_49.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_49.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_49.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_56.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_56.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_56.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_51.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_51.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_51.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_54.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_54.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_54.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_31.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_31.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_31.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_52.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_52.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_52.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_3.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_3.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_3.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_15.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_15.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_15.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_30.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_30.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_30.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_13.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_13.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_13.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_41.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_41.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_41.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_9.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_9.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_9.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_48.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_48.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_48.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_17.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_17.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_17.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_46.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_46.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_46.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_24.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_24.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_24.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_32.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_32.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_32.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_26.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_26.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_26.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_34.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_34.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_34.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_50.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_50.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_50.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_38.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_38.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_38.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_36.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_36.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_36.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_6.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_6.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_6.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_21.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_21.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_21.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_28.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_28.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_28.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_37.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_37.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_37.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_39.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_39.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_39.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_58.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_58.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_58.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_19.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_19.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_19.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_47.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_47.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_47.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_12.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_12.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_12.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_45.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_45.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_45.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_35.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_35.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_35.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_57.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_57.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_57.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_4.mp3


index 99627 is out of bounds for axis 0 with size 99627



MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_4.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_4.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_29.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_29.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_29.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_22.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_22.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_22.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_10.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_10.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_10.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_8.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_8.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_8.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_33.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_33.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_33.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_18.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_18.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_18.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_44.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_44.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_44.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_43.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_43.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_43.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_1.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_1.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_1.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_53.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_53.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_53.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_2.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_2.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_2.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_40.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_40.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_40.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_20.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_20.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_20.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_55.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_55.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_55.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_42.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_42.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_42.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_25.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_25.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_25.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_7.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_7.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_7.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_11.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_11.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_11.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_5.mp3




MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_5.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_5.mp3
MoviePy - Writing audio in audio/inteligencia-artificial-y-computacion-cognitiva/video_27.mp3


                                                                       

MoviePy - Done.
Converted videos/inteligencia-artificial-y-computacion-cognitiva/video_27.mp4 to audio/inteligencia-artificial-y-computacion-cognitiva/video_27.mp3




### Performing Transcriptions with OpenAI API

In [39]:
from openai import OpenAI

client = OpenAI(api_key="sk-xxx")

In [40]:
def transcribe_audio_file(client, audio_file_path):
    """ Transcribe the audio file using OpenAI's Whisper model """
    with open(audio_file_path, "rb") as audio_file:
        transcript = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file,
            language="es",
            prompt="Hola, esta es una clase de maestría en inteligencia artificial.",
        )
    return transcript.text

In [41]:
def transcribe_and_save_all_audio(audio_folder, transcript_folder):
    """ Transcribe all audio files in a folder and save the transcripts """
    os.makedirs(transcript_folder, exist_ok=True)

    for filename in os.listdir(audio_folder):
        if filename.endswith('.mp3'):
            audio_file_path = os.path.join(audio_folder, filename)
            transcript_filename = filename.replace('.mp3', '_default.txt')
            transcript_file_path = os.path.join(transcript_folder, transcript_filename)

            transcript_text = transcribe_audio_file(client, audio_file_path)
            with open(transcript_file_path, "w", encoding="utf-8") as f:
                f.write(transcript_text)
            print(f"Transcript saved to {transcript_file_path}")

In [42]:
transcribe_and_save_all_audio(audio_folder, transcripts_folder)

Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_38_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_21_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_15_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_49_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_24_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_11_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_54_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_25_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_58_default.txt
Transcript saved to transcripts/inteligencia-artificial-y-computacion-cognitiva/video_47_de

### Formatting folders & files

In [3]:
import os
import shutil
import pandas as pd

In [4]:
def create_folder_structure(df, transcript_dir, base_dir):
    for index, row in df.iterrows():
        # Crear un nombre de carpeta seguro para el sistema de archivos
        tema_folder_name = row['tema'].replace(' ', '_').replace('.', '').replace('∧', '')
        tema_folder_path = os.path.join(base_dir, tema_folder_name)

        # Crear la carpeta del tema si no existe
        if not os.path.exists(tema_folder_path):
            os.makedirs(tema_folder_path)

        # Construir el nuevo nombre de archivo de transcripción
        class_name = row['nombre'].split('.')[0]  # Remover la palabra 'Página'
        class_file_name = f"{class_name.replace(' ', '_').replace('.', '')}.txt"
        new_file_path = os.path.join(tema_folder_path, class_file_name)

        # Mover el archivo de transcripción al nuevo directorio
        # Asumiendo que el nombre del archivo es 'video_X_default.txt' donde X es el índice + 1
        old_file_path = os.path.join(transcript_dir, f"video_{index + 1}_default.txt")
        if os.path.exists(old_file_path):
            shutil.move(old_file_path, new_file_path)
        else:
            print(f"No se encontró el archivo de transcripción: {old_file_path}")

In [10]:
df = pd.read_csv(csv_file)
create_folder_structure(df, transcripts_folder, transcripts_folder)

No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_1_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_2_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_3_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_4_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_5_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_6_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_7_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_8_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_9_default.txt
No se encontró el archivo de transcripción: transcripts/percepcion-computacional\video_10_d

### Merging Transcriptions

In [64]:
import pandas as pd
import os

In [7]:
def merge_transcripts_with_structure(transcripts_base_folder, merged_file_path):
    """
    Merge transcript files from structured theme folders into a single file.
    """
    # Ensure the merged file directory exists
    os.makedirs(os.path.dirname(merged_file_path), exist_ok=True)

    with open(merged_file_path, 'w', encoding='utf-8') as merged_file:
        # Walk through the theme directories
        for root, dirs, files in os.walk(transcripts_base_folder):
            # Sort directories to maintain order
            dirs.sort()
            for dir_name in dirs:
                # Write the Tema heading
                merged_file.write(f"# {dir_name.replace('_', ' ')}\n\n")

                # Path to the theme directory
                theme_dir_path = os.path.join(root, dir_name)
                idea_files = sorted(os.listdir(theme_dir_path))  # Sort the idea files

                for idea_file in idea_files:
                    # Write the Idea Clave heading
                    idea_name = os.path.splitext(idea_file)[0].replace('_', ' ')
                    merged_file.write(f"## {idea_name}\n\n")

                    # Full path to the idea transcript file
                    idea_file_path = os.path.join(theme_dir_path, idea_file)

                    # Write the transcript content
                    with open(idea_file_path, 'r', encoding='utf-8') as f:
                        transcript_text = f.read()
                        merged_file.write(f"{transcript_text}\n\n")

    print(f"All transcripts merged into {merged_file_path}")

In [11]:
merged_file_path = f'merged/{COURSE_NAME}.md'
videos_df = pd.read_csv(csv_file)

merge_transcripts_with_structure(transcripts_folder, merged_file_path)

All transcripts merged into merged/percepcion-computacional.txt


In [12]:
videos_df = pd.read_csv("videos_csv/inteligencia-artificial-y-computacion-cognitiva.csv")
videos_df['provider']

0     unir.cloud.panopto.eu
1     unir.cloud.panopto.eu
2     unir.cloud.panopto.eu
3     unir.cloud.panopto.eu
4     unir.cloud.panopto.eu
5     unir.cloud.panopto.eu
6     unir.cloud.panopto.eu
7     unir.cloud.panopto.eu
8     unir.cloud.panopto.eu
9     unir.cloud.panopto.eu
10    unir.cloud.panopto.eu
11    unir.cloud.panopto.eu
12    unir.cloud.panopto.eu
13    unir.cloud.panopto.eu
14    unir.cloud.panopto.eu
15    unir.cloud.panopto.eu
16    unir.cloud.panopto.eu
17    unir.cloud.panopto.eu
18    unir.cloud.panopto.eu
19    unir.cloud.panopto.eu
20    unir.cloud.panopto.eu
21    unir.cloud.panopto.eu
22    unir.cloud.panopto.eu
23    unir.cloud.panopto.eu
24    unir.cloud.panopto.eu
25    unir.cloud.panopto.eu
26    unir.cloud.panopto.eu
27    unir.cloud.panopto.eu
28    unir.cloud.panopto.eu
29    unir.cloud.panopto.eu
30    unir.cloud.panopto.eu
31    unir.cloud.panopto.eu
32    unir.cloud.panopto.eu
33    unir.cloud.panopto.eu
34    unir.cloud.panopto.eu
35    unir.cloud.pan

In [47]:
!zip -r merged.zip transcripts/inteligencia-artificial-y-computacion-cognitiva/

  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/ (stored 0%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_5_default.txt (deflated 60%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_34_default.txt (deflated 59%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_16_default.txt (deflated 61%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_43_default.txt (deflated 63%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_37_default.txt (deflated 62%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_30_default.txt (deflated 59%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_7_default.txt (deflated 61%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/video_33_default.txt (deflated 61%)
  adding: transcripts/inteligencia-artificial-y-computacion-cognitiva/v