In [None]:
key_frame_region = (70, 42, 150, 65)

import json
import cv2
from PIL import Image

with open("state.json", "r") as file:
    state = json.load(file)

record_id = state['episodes'][-1]['source']['record'] + 1
season = [item['name'] for item in state['records'] if item['id'] == record_id][0]
print(record_id)

record_file = f"./records/{str(record_id).zfill(5)}.mp4"
record = cv2.VideoCapture(record_file)
fps = int(record.get(cv2.CAP_PROP_FPS))

def get_frame(frame_number = -1):
    if frame_number != -1: record.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    
    ok, raw = record.read()
    if not ok: return None
    
    frame = Image.fromarray(raw[:, :, [2, 1, 0]])
    return frame

key_frame_number = 100
key_frame = get_frame(key_frame_number)
key_frame_crop = key_frame.crop(key_frame_region)
display(key_frame_crop)

In [None]:
from PIL import ImageChops
import numpy as np

periods = []
record.set(cv2.CAP_PROP_POS_FRAMES, 0)

start = 0
end = 0
last_is_payload = False

while True:
    frame = get_frame()
    if not frame:
        break

    crop = frame.crop(key_frame_region)
    diff = np.array(ImageChops.difference(key_frame_crop, crop))
    is_payload = np.sum(diff > 20) / diff.size < 0.7
    
    if(not last_is_payload):
        if(is_payload):
            last_is_payload = True
            start = int(record.get(cv2.CAP_PROP_POS_FRAMES))
    else:
        if(not is_payload):
            last_is_payload = False
            end = int(record.get(cv2.CAP_PROP_POS_FRAMES))
            period = {'start': start, 'end': end}

            print(period, end="")
            print(end - start, end="")
            periods.append(period)

In [None]:
import requests
import base64
import re
import os
import io

with open("secret.json", "r") as file:
    secret = json.load(file)

def get_title(frame_number):
    title_frame = get_frame(frame_number).crop(title_region)
    
    image_buffer = io.BytesIO()
    title_frame.save(image_buffer, format="JPEG")
    image_buffer.seek(0)
    image_base64 = base64.b64encode(image_buffer.read()).decode("utf-8")

    payload = { "folderId": secret['yandex_folder'], "analyze_specs": [{ "content": image_base64,
        "features": [{"type": "TEXT_DETECTION", "text_detection_config": {"language_codes": ["ru", "en"]}}]}]}

    url = "https://vision.api.cloud.yandex.net/vision/v1/batchAnalyze"
    headers = { "Content-Type": "application/json", "Authorization": f"Api-Key {secret['yandex_key']}" }
    response = requests.post(url, json=payload, headers=headers)

    parse_result = response.json()
    parse_string = json.dumps(parse_result, indent=4, ensure_ascii=False)
    pattern = r'"text": "([^"]+)"'
    
    text = 'Пожалуйста исправь опечатки в названии темы серии смешариков. Его пишут с большой буквы, как и персонажей: Крош, Ёжик, Лосяш, Копатыч, Нюша. При OCR могли перепутаться буквы "п" и "л" или вместо букв могли распознаться цифры. Сделай все слова по возможности корректными. Не бойся переборщить, выложись на максимум. \n\n' + ' '.join(re.findall(pattern, parse_string))

    result = requests.post(
        "https://api.openai.com/v1/chat/completions",
        json = {"model": "gpt-3.5-turbo","messages": [{"role": "user", "content": text}]},
        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {secret['openai_key']}"}
    ).json()

    if "error" in result:
        raise ValueError(result["error"])

    title = result["choices"][-1]["message"]["content"]
    title = re.sub(r'тема:? ', '', title, flags=re.IGNORECASE)
    title = re.sub(r'\.$', '', title)
    
    return title.replace('Ежик', 'Ёжик')

title_region = (60, 30, 840, 200)

display(key_frame.crop(title_region))

In [None]:
import ipywidgets as widgets

def format_duration(duration):
    return f"{(duration // 60):02d}:{(duration % 60):02d}"

def print_episode(episode):
    text = widgets.Text(value = episode['title'])
    text.layout.width = '1000px'
    
    def on_text_change(change):
        episode['title'] = change['new']
    
    text.observe(on_text_change, names='value')

    display(get_frame(stop_frame_id).crop(title_region))
    display(text)

last_episode_id = 1

for period in periods:
    stop_frame_id = period['start'] + 3 * fps
    title = get_title(stop_frame_id)
    
    episode = {
        'id': state['episodes'][-1]['id'] + 1,
        'season': season,
        'episode': last_episode_id,
        'title': title,
        'duration': (period['end'] - period['start']) // fps,
        'source': {'record': record_id, 'start_frame': period['start'], 'end_frame': period['end']},
        'uploaded': False
    }
    
    last_episode_id += 1
    state['episodes'].append(episode)
    print_episode(episode) 

In [None]:
with open("state.json", "w") as file:
    json.dump(state, file, indent=4, ensure_ascii=False)

import subprocess

thumbnail_file_ids = [int(item.split('.')[0]) for item in os.listdir("./thumbnails") if item.endswith(".jpg")]
generate_thumbnails = [item for item in state['episodes'] if item['id'] not in thumbnail_file_ids]

for ep in generate_thumbnails:
    if ep['source']['record'] != record_id:
        continue
    
    id_filled = str(ep['id']).zfill(5)
    get_frame(ep['source']['start_frame']).save(f"./thumbnails/{id_filled}.jpg")

episode_file_ids = [int(item.split('.')[0]) for item in os.listdir("./episodes") if item.endswith(".mp4")]
render_episodes = [item for item in state['episodes'] if item['id'] not in episode_file_ids]

for ep in render_episodes:
    source = ep['source']
    
    if source['record'] != record_id:
        continue
    
    record_filled = str(source['record']).zfill(5)
    id_filled = str(ep['id']).zfill(5)
    fps = int(record.get(cv2.CAP_PROP_FPS))
    
    command = f"ffmpeg -i ./records/{record_filled}.mp4 -ss {(source['start_frame'] / fps):.2f} -to {(source['end_frame'] / fps):.2f} -c:v copy -c:a copy ./episodes/{id_filled}.mp4"
    subprocess.check_output(command, shell=True, text=True)

In [None]:
import time
from telegram import Bot
import asyncio
import nest_asyncio

nest_asyncio.apply()
bot = Bot(token=secret['telegram_key'])

async def publish_episode(episode):
    eposide_full_id = str(episode['id']).zfill(5)
    thumb = f"./thumbnails/{eposide_full_id}.jpg"
    thumb_width, thumb_height = Image.open(thumb).size

    message = await bot.send_video(
        chat_id='@GPTriki',
        video=open(f"./episodes/{eposide_full_id}.mp4", 'rb'),
        width=thumb_width,
        height=thumb_height,
        caption=f"{episode['season']}\n" + 
        f"Эпизод: {episode['episode']}\n" + 
        f"Тема: {episode['title']}\n" + 
        f"Длительность: [{format_duration(episode['duration'])}]\n" + 
        f"Все серии: t.me/GPTriki",
        supports_streaming = True,
        thumbnail=open(thumb, 'rb')
    )
    
    episode['uploaded'] = f"https://t.me/GPTriki/{message.message_id}"

async def publish_episodes():
    episodes_to_upload = [item for item in state['episodes'] if item['uploaded'] == False]
    
    for episode in episodes_to_upload:
        try:
            await publish_episode(episode)
        except Exception as e:
            match = re.search(r'\d+', str(e))

            if match and ('Flood control exceeded. Retry in ' in str(e)):
                time.sleep(int(match.group()) + 1)
                await publish_episode(episode)

            else:
                raise e

await publish_episodes()

with open("state.json", "w") as file:
    json.dump(state, file, indent=4, ensure_ascii=False)