In [3]:
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')


In [None]:
from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ]
)

print(completion.choices[0].message)

In [None]:
! pip install cmake


In [None]:
! pip install dlib


In [1]:
import base64
from openai import OpenAI
import requests
import cv2
import os


In [2]:
# Function to encode the image
def encode_image(image):
    _, buffer = cv2.imencode('.jpg', image)
    return base64.b64encode(buffer).decode('utf-8')


In [3]:
api_key = ""  # Replace with your OpenAI API key


In [4]:
def predict_age_from_image(image):
    # Encode the image
    base64_image = encode_image(image)

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Predict the closest age of the person in this image and don't say you can't predict the age. Don't say if the age is in early or later age ranges, just give the exact age you think the person is in, don't even say that the person in the image is xyz years old, just give me the number only, I DONT WANT ANY OTHER OUTPUT , ONLY THE AGE IN NUMBER FORMAT!"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 300
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    if response.status_code == 200:
        response_data = response.json()
        age_prediction = response_data['choices'][0]['message']['content']
        try:
            age = int(age_prediction)
            return age
        except ValueError:
            return None
    else:
        return None

In [5]:
# Function to process the video and predict age for each frame
def process_video(video_path,output_path):
    # Load Haar Cascade for face detection
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to grayscale for face detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)

        # Predict age for each detected face
        for (x, y, w, h) in faces:
            face_img = frame[y:y+h, x:x+w]
            age_prediction = predict_age_from_image(face_img)
            print(f"Predicted Age: {age_prediction}")

            # Display the resulting frame with age prediction
            if age_prediction != "Error":
                cv2.putText(frame, f"{age_prediction}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (144, 238, 144), 2)
                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)

        cv2.imshow('Age Prediction', frame)

        # Break the loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
# Example usage
video_path = 'walk1.mp4'
output_path = 'walk1output.mp4'
process_video(video_path, output_path)

In [None]:
# Capture frames from the webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Predict age from the current frame
    age_prediction = predict_age_from_image(frame)
    print(f"Predicted Age: {age_prediction}")

    # Display the resulting frame
    cv2.putText(frame, f"Age: {age_prediction}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
    cv2.imshow('Age Prediction', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and destroy all windows
cap.release()
cv2.destroyAllWindows()

In [None]:
# Example usage
image_path = '/Users/satyajeetkadu/Downloads/IMG_2013.JPG'
age_prediction = predict_age_from_image(image_path)
print(age_prediction)

In [None]:
from pydub import AudioSegment

def convert_aac_to_mp3(input_file_path, output_file_path):
    # Load the AAC file
    audio = AudioSegment.from_file(input_file_path, format="aac")
    
    # Export as MP3
    audio.export(output_file_path, format="mp3")

# Define input and output paths
input_file = ["noice_file1.aac","noise1.aac","noise2.aac"]
output_file = ["noice_file1.mp3","noise1.mp3","noise2.mp3"]

# Convert AAC to MP3
for input_file, output_file in zip(input_file, output_file):
    convert_aac_to_mp3(input_file, output_file)


In [None]:
from dotenv import load_dotenv


In [None]:
def process_and_transcribe_audio(aac_file_path):
    """
    This function takes an AAC audio file, converts it to MP3, and then transcribes the audio using OpenAI's Whisper model.
    
    Args:
    aac_file_path (str): The path to the AAC file.
    openai_api_key (str): Your OpenAI API key.
    
    Returns:
    str: The transcription of the audio.
    """

    # Load the API key from .env file
    load_dotenv()
    openai_api_key = os.getenv('OPENAI_API_KEY')
    # Define the MP3 output path
    mp3_file_path = aac_file_path.replace(".aac", ".mp3")
    
    # Convert AAC to MP3
    audio = AudioSegment.from_file(aac_file_path, format="aac")
    audio.export(mp3_file_path, format="mp3")

    # Initialize OpenAI API
    client.api_key = openai_api_key


    # Open the MP3 file and transcribe
    with open(mp3_file_path, "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file
        )
    
    # Clean up the MP3 file after transcription
    os.remove(mp3_file_path)
    
    # Return the transcribed text
    return transcription.text

In [None]:
input_files = ["noice_file1.aac","noise1.aac","noise2.aac"]


In [None]:
# Process each file and print the transcription
for input_file in input_files:
    text = process_and_transcribe_audio(input_file)
    print(f"Transcription for {input_file}:")
    print(text)

In [None]:
%pip install ffmpeg-python

In [5]:
import ffmpeg
import openai
import os
from dotenv import load_dotenv
import subprocess



    This function takes an AAC audio file, converts it to MP3 using FFmpeg, and then transcribes the audio using OpenAI's Whisper model.
    
    Args:
    aac_file_path (str): The path to the AAC file.
    
    Returns:
    str: The transcription of the audio.

In [3]:
def process_and_transcribe_audio(aac_file_path):
    """
    This function takes an AAC audio file, converts it to MP3 using FFmpeg, and then transcribes the audio using OpenAI's Whisper model.
    
    Args:
    aac_file_path (str): The path to the AAC file.
    
    Returns:
    str: The transcription of the audio.
    """
    # Load the API key from .env file
    load_dotenv()
    openai_api_key = os.getenv('OPENAI_API_KEY')
    
    if not openai_api_key:
        raise ValueError("OpenAI API key not found in the environment variables.")
    
    # Define the MP3 output path
    mp3_file_path = aac_file_path.replace(".aac", ".mp3")
    
    # Convert AAC to MP3 using FFmpeg and suppress output
    try:
        with open(os.devnull, 'w') as devnull:
            subprocess.run(['ffmpeg', '-i', aac_file_path, mp3_file_path], stdout=devnull, stderr=devnull, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error occurred while converting {aac_file_path} to MP3: {e}")
        return None
    
    # Initialize OpenAI API
    client.api_key = openai_api_key
    
    # Open the MP3 file and transcribe
    with open(mp3_file_path, "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file
        )
    
    # Clean up the MP3 file after transcription
    os.remove(mp3_file_path)
    
    # Return the transcribed text
    return transcription.text

    This function takes an MP3 audio file and transcribes the audio using OpenAI's Whisper model.
    
    Args:
    mp3_file_path (str): The path to the MP3 file.
    
    Returns:
    str: The transcription of the audio.

In [6]:
import os
import openai
from dotenv import load_dotenv

def process_and_transcribe_audio(mp3_file_path):
    """
    This function takes an MP3 audio file and transcribes the audio using OpenAI's Whisper model.
    
    Args:
    mp3_file_path (str): The path to the MP3 file.
    
    Returns:
    str: The transcription of the audio.
    """
    # Load the API key from .env file
    load_dotenv()
    openai_api_key = os.getenv('OPENAI_API_KEY')
    
    if not openai_api_key:
        raise ValueError("OpenAI API key not found in the environment variables.")
    
    # Initialize OpenAI API
    client.api_key = openai_api_key
    
    # Open the MP3 file and transcribe
    with open(mp3_file_path, "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file
        )
    
    # Return the transcribed text
    return transcription.text

In [7]:
# Directory containing the MP3 files
mp3_directory = "downloaded_mp3s"

In [8]:
input_file = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/trimmed_test2.mp3'

In [4]:
# input_file='noice_file1.aac'

In [None]:
# List of input MP3 files from the specified directory
# input_files = [os.path.join(mp3_directory, f) for f in os.listdir(mp3_directory) if f.endswith('.mp3')]

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(input_file)
print(f"Transcription for {input_file}:")
print(text)

Testing Different Segments from https://www.youtube.com/watch?v=zDJgG6hcs7c


In [10]:
segment_boston_1 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/boston_trim1.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_boston_1)
print(f"Transcription for {segment_boston_1}:")
print(text)

In [12]:
segment_boston_2 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/boston_trim2.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_boston_2)
print(f"Transcription for {segment_boston_2}:")
print(text)

In [14]:
segment_boston_3 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/boston_trim3.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_boston_3)
print(f"Transcription for {segment_boston_3}:")
print(text)

Testing Different Segments from https://www.youtube.com/watch?v=b6yqknRixWM&t


In [16]:
segment_woman_1 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/woman_trim1.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_woman_1)
print(f"Transcription for {segment_woman_1}:")
print(text)

In [18]:
segment_woman_2 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/woman_trim2.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_woman_2)
print(f"Transcription for {segment_woman_2}:")
print(text)

In [20]:
segment_woman_3 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/woman_trim3.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_woman_3)
print(f"Transcription for {segment_woman_3}:")
print(text)

In [23]:
segment_woman_4 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/woman_trim4.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_woman_4)
print(f"Transcription for {segment_woman_4}:")
print(text)

Testing Different Segments from https://youtu.be/Uo4ZcCEtBlk?si=P-kXBbm--VrFaJ5K


In [25]:
segment_cocaine_1 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/cocain_trim1.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_cocaine_1)
print(f"Transcription for {segment_cocaine_1}:")
print(text)

In [28]:
segment_cocaine_2 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/cocain_trim2.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_cocaine_2)
print(f"Transcription for {segment_cocaine_2}:")
print(text)

In [27]:
segment_cocaine_3 = '/Users/satyajeetkadu/Documents/AudioToText/downloaded_mp3s/cocain_trim3.mp3'

In [None]:
# Process each file and print the transcription
text = process_and_transcribe_audio(segment_cocaine_3)
print(f"Transcription for {segment_cocaine_3}:")
print(text)

Testing API BELOW


In [1]:
from openai import OpenAI
import os
client = OpenAI()
# defaults to getting the key using os.environ.get("OPENAI_API_KEY")
# if you saved the key under a different environment variable name, you can do something like:
client = OpenAI(
  api_key=os.environ.get("OPENAI_API_KEY"),
)

In [None]:
from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ]
)

print(completion.choices[0].message)

In [None]:
%pip install pytube ffmpeg-python

In [None]:
import pytube
import ffmpeg
import os
import ssl

    Download a YouTube video and save it to the specified output path.
    
    Args:
    youtube_url (str): The URL of the YouTube video.
    output_path (str): The path to save the downloaded video file.
    
    Returns:
    str: The path to the downloaded video file and the title of the video.

In [None]:
def download_youtube_video(youtube_url, output_path):
    """
    Download a YouTube video and save it to the specified output path.
    
    Args:
    youtube_url (str): The URL of the YouTube video.
    output_path (str): The path to save the downloaded video file.
    
    Returns:
    str: The path to the downloaded video file and the title of the video.
    """
    # Download the YouTube video

    ssl._create_default_https_context = ssl._create_unverified_context

    yt = pytube.YouTube(youtube_url)
    stream = yt.streams.filter(only_audio=True).first()
    video_title = yt.title
    sanitized_title = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in video_title)
    video_file_path = os.path.join(output_path, f"{sanitized_title}.mp4")
    stream.download(output_path=output_path, filename=f"{sanitized_title}.mp4")
    return video_file_path, sanitized_title

    Convert a video file to MP3 using FFmpeg.
    
    Args:
    video_file_path (str): The path to the video file.
    mp3_file_path (str): The path to save the converted MP3 file.

In [None]:

def convert_to_mp3(video_file_path, mp3_file_path):
    """
    Convert a video file to MP3 using FFmpeg.
    
    Args:
    video_file_path (str): The path to the video file.
    mp3_file_path (str): The path to save the converted MP3 file.
    """
    try:
        ffmpeg.input(video_file_path).output(mp3_file_path).run(overwrite_output=True)
    except ffmpeg.Error as e:
        print(f"Error occurred while converting {video_file_path} to MP3: {e}")


    Download a YouTube video, convert it to MP3, and save it to the specified output path.
    
    Args:
    youtube_url (str): The URL of the YouTube video.
    output_path (str): The directory to save the MP3 file.
    
    Returns:
    str: The path to the converted MP3 file.

In [None]:
def download_and_convert(youtube_url, output_path):
    """
    Download a YouTube video, convert it to MP3, and save it to the specified output path.
    
    Args:
    youtube_url (str): The URL of the YouTube video.
    output_path (str): The directory to save the MP3 file.
    
    Returns:
    str: The path to the converted MP3 file.
    """
    # Download the video
    video_file_path, video_title = download_youtube_video(youtube_url, output_path)
    
    # Define the MP3 file path
    mp3_file_path = os.path.join(output_path, f"{video_title}.mp3")
    
    # Convert the video to MP3
    convert_to_mp3(video_file_path, mp3_file_path)
    
    # Remove the original video file
    os.remove(video_file_path)
    
    return mp3_file_path

In [None]:
# List of YouTube video URLs
youtube_urls = [
    "https://www.youtube.com/watch?v=qTGPCuY1S64",
    "https://www.youtube.com/watch?v=Vlxy-NUes14",
    "https://www.youtube.com/watch?v=U0JvPJiNUEo",
    "https://www.youtube.com/watch?v=uFDx_jlV1mY",
    "https://www.youtube.com/watch?v=CT5GcN1HCn4",
    "https://www.youtube.com/watch?v=OqfHTxmrJeo",
    "https://www.youtube.com/watch?v=b6yqknRixWM",
    "https://www.youtube.com/watch?v=nM3W2p0gx7A",
    "https://www.youtube.com/watch?v=D3Qn9jO5ku0",
    "https://www.youtube.com/watch?v=3hd1OupgUBo",
    "https://www.youtube.com/watch?v=zDJgG6hcs7c",
    "https://www.youtube.com/watch?v=H6j_ZOlk_vo",
    "https://www.youtube.com/watch?v=-_hc1QfVMOI",
    "https://www.youtube.com/watch?v=4UE_aRtv1ic",
    "https://www.youtube.com/watch?v=QFTxtZXLJs0",
    "https://www.youtube.com/watch?v=7EgfGq2unjY",
    "https://www.youtube.com/watch?v=bvYQHBW7hNs",
    "https://www.youtube.com/watch?v=EZb6kiCif9g",
    "https://www.youtube.com/watch?v=e3g2o0eTm_E",
    "https://www.youtube.com/watch?v=2aYy9cJoRv4",
    "https://www.youtube.com/watch?v=gYWhwure9X8",
    "https://www.youtube.com/watch?v=0OvufDpsz3o",
    "https://www.youtube.com/watch?v=Uo4ZcCEtBlk",
    "https://www.youtube.com/watch?v=nnOtyRa9uyA",
    "https://www.youtube.com/watch?v=VBtseYkRwdU"
]

In [None]:
# Directory to save the MP3 files
output_directory = "downloaded_mp3s"

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

In [None]:
# Download and convert each YouTube video to MP3
for url in youtube_urls:
    mp3_file_path = download_and_convert(url, output_directory)
    print(f"Downloaded and converted to MP3: {mp3_file_path}")

In this notebook, I have given codes for two scenarios:-
1) Converting An AAC file to MP3 using FFMPEG and then using Open AI Whisper model to transcribe text from speech
2) Downloading YT videos and converting them to mp3 and then using Open AI Whisper model to transcribe text from speech

Conclusions:-

1) Faced issues converting YT videos which were very long, most of them were around 30-40 minutes, hence i tried trimming some segments and transcribing the speech, which gave highly accurate results
2) To solve this issue, we can run the code on servers such as AWS, Google Cloud Platform ,Azure etc.
