In [3]:
import requests
import json
import ast
import os
from datetime import datetime
from openai import AzureOpenAI

# Cấu hình Airtable
AIRTABLE_BASE_ID = 'app7mcJMf9O8GQRal'
AIRTABLE_API_KEY = ''
TABLE_NAME = 'tiktok_post'
VIEW_NAME = '1_download'
AIRTABLE_URL = f"https://api.airtable.com/v0/{AIRTABLE_BASE_ID}/{TABLE_NAME}"

# Cấu hình Azure OpenAI
AZURE_ENDPOINT = "https://stepup.openai.azure.com"
API_VERSION = "2024-08-01-preview"
API_KEY = ""
DEPLOYMENT_NAME = "whisper"

# Khởi tạo Azure OpenAI client
client = AzureOpenAI(
    azure_endpoint=AZURE_ENDPOINT,
    api_key=API_KEY,
    api_version=API_VERSION,
)

def download_video(url, local_path):
    response = requests.get(url)
    if response.status_code == 200:
        with open(local_path, 'wb') as f:
            f.write(response.content)
        return True
    return False

def transcribe_video(file_path):
    try:
        with open(file_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model=DEPLOYMENT_NAME,
                file=audio_file,
                language="vi",
                response_format="verbose_json"
            )
            return transcript
    except Exception as e:
        print(f"Lỗi khi transcribe: {str(e)}")
        return None

def convert_seconds_to_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    remaining_seconds = int(seconds % 60)
    return f"{hours}:{minutes:02d}:{remaining_seconds:02d}"

def format_transcript(transcript):
    formatted_lines = []
    
    for segment in transcript.segments:
        timestamp = convert_seconds_to_timestamp(segment.start)
        text = segment.text
        formatted_line = f"[{timestamp}] {text}"
        formatted_lines.append(formatted_line)
    
    return "\n".join(formatted_lines)

def update_airtable_record(record_id, transcript):
    headers = {
        "Authorization": f"Bearer {AIRTABLE_API_KEY}",
        "Content-Type": "application/json"
    }
    
    update_data = {
        "records": [{
            "id": record_id,
            "fields": {
                "transcript": transcript
            }
        }]
    }
    
    response = requests.patch(
        AIRTABLE_URL,
        headers=headers,
        json=update_data
    )
    
    return response.status_code == 200

def process_videos():
    headers = {
        "Authorization": f"Bearer {AIRTABLE_API_KEY}",
        "Content-Type": "application/json"
    }
    
    # Lấy records từ view 1_download
    response = requests.get(f"{AIRTABLE_URL}?view={VIEW_NAME}", headers=headers)
    
    if response.status_code == 200:
        records = response.json().get('records', [])
        
        for record in records:
            try:
                video_url = record['fields'].get('video_download')
                if not video_url:
                    continue
                
                # Download video
                local_path = f"temp_video_{record['id']}.mp4"
                if download_video(video_url, local_path):
                    
                    # Transcribe video
                    transcription = transcribe_video(local_path)
                    if transcription:
                        # Format transcript
                        formatted_transcript = format_transcript(transcription)
                        
                        # Update Airtable
                        if update_airtable_record(record['id'], formatted_transcript):
                            print(f"Đã xử lý thành công video {record['id']}")
                        else:
                            print(f"Lỗi khi cập nhật transcript cho {record['id']}")
                    
                    # Xóa file tạm
                    os.remove(local_path)
                    
            except Exception as e:
                print(f"Lỗi khi xử lý record {record['id']}: {str(e)}")
                if os.path.exists(local_path):
                    os.remove(local_path)

if __name__ == "__main__":
    process_videos() 

KeyboardInterrupt: 

In [None]:
import requests
import json
import ast
import os
from datetime import datetime
from openai import AzureOpenAI

# Cấu hình Airtable
AIRTABLE_BASE_ID = 'app7mcJMf9O8GQRal'
AIRTABLE_API_KEY = ''
TABLE_NAME = 'tiktok_post'
VIEW_NAME = '1_download'
AIRTABLE_URL = f"https://api.airtable.com/v0/{AIRTABLE_BASE_ID}/{TABLE_NAME}"

# Cấu hình Azure OpenAI
AZURE_ENDPOINT = "https://stepup.openai.azure.com"
API_VERSION = "2024-08-01-preview"
API_KEY = ""
DEPLOYMENT_NAME = "whisper"

# Khởi tạo Azure OpenAI client
client = AzureOpenAI(
    azure_endpoint=AZURE_ENDPOINT,
    api_key=API_KEY,
    api_version=API_VERSION,
)

def download_video(url, local_path):
    response = requests.get(url)
    if response.status_code == 200:
        with open(local_path, 'wb') as f:
            f.write(response.content)
        return True
    return False

def transcribe_video(file_path):
    try:
        with open(file_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model=DEPLOYMENT_NAME,
                file=audio_file,
                language="vi",
                response_format="verbose_json"
            )
            return transcript
    except Exception as e:
        print(f"Lỗi khi transcribe: {str(e)}")
        return None

def convert_seconds_to_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    remaining_seconds = int(seconds % 60)
    return f"{hours}:{minutes:02d}:{remaining_seconds:02d}"

def format_transcript(transcript):
    formatted_lines = []
    
    for segment in transcript.segments:
        timestamp = convert_seconds_to_timestamp(segment.start)
        text = segment.text
        formatted_line = f"[{timestamp}] {text}"
        formatted_lines.append(formatted_line)
    
    return "\n".join(formatted_lines)

def update_airtable_record(record_id, transcript):
    headers = {
        "Authorization": f"Bearer {AIRTABLE_API_KEY}",
        "Content-Type": "application/json"
    }
    
    update_data = {
        "records": [{
            "id": record_id,
            "fields": {
                "transcript": transcript
            }
        }]
    }
    
    response = requests.patch(
        AIRTABLE_URL,
        headers=headers,
        json=update_data
    )
    
    return response.status_code == 200

def process_videos():
    headers = {
        "Authorization": f"Bearer {AIRTABLE_API_KEY}",
        "Content-Type": "application/json"
    }
    
    # Lấy records từ view 1_download
    response = requests.get(f"{AIRTABLE_URL}?view={VIEW_NAME}", headers=headers)
    
    if response.status_code == 200:
        records = response.json().get('records', [])
        
        for record in records:
            try:
                video_url = record['fields'].get('video_download')
                if not video_url:
                    continue
                
                # Download video
                local_path = f"temp_video_{record['id']}.mp4"
                if download_video(video_url, local_path):
                    
                    # Transcribe video
                    transcription = transcribe_video(local_path)
                    if transcription:
                        # Format transcript
                        formatted_transcript = format_transcript(transcription)
                        
                        # Update Airtable
                        if update_airtable_record(record['id'], formatted_transcript):
                            print(f"Đã xử lý thành công video {record['id']}")
                        else:
                            print(f"Lỗi khi cập nhật transcript cho {record['id']}")
                    
                    # Xóa file tạm
                    os.remove(local_path)
                    
            except Exception as e:
                print(f"Lỗi khi xử lý record {record['id']}: {str(e)}")
                if os.path.exists(local_path):
                    os.remove(local_path)

if __name__ == "__main__":
    process_videos() 

KeyboardInterrupt: 