In [3]:
import cv2
from pydub import AudioSegment
import torch
import torchaudio
import pytesseract
from PIL import Image
import torchvision.transforms as transforms
from torchvision import models

In [4]:
import cv2

def load_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise Exception(f"Cannot open video file {video_path}")
    return cap

In [5]:
def extract_frames(cap):
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # Изменение размера кадра
        frame = cv2.resize(frame, (224, 224))
        frames.append(frame)
    cap.release()
    return frames

In [6]:
from pydub import AudioSegment

def extract_audio(video_path, audio_path):
    video = AudioSegment.from_file(video_path)
    video.export(audio_path, format="wav")

In [8]:
import speech_recognition as sr

def speech_to_text(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio, language='ru-RU')
    except sr.UnknownValueError:
        text = ""
    return text


In [9]:
import torch
import torchaudio

class SoundClassifier(torch.nn.Module):
    def __init__(self):
        super(SoundClassifier, self).__init__()
        self.conv1 = torch.nn.Conv1d(1, 16, kernel_size=3)
        self.pool = torch.nn.MaxPool1d(2)
        self.fc1 = torch.nn.Linear(16 * 49, 10)  # Предполагая, что входной размер 100

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = x.view(-1, 16 * 49)
        x = torch.nn.functional.relu(self.fc1(x))
        return x

def classify_sound(audio_path, model):
    waveform, sample_rate = torchaudio.load(audio_path)
    output = model(waveform)
    _, predicted = torch.max(output, 1)
    return predicted.item()

In [10]:
import torch

def detect_objects(frames):
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
    results = []
    for frame in frames:
        results.append(model(frame))
    return results

In [11]:
import pytesseract
from PIL import Image

def ocr_on_frame(frame):
    text = pytesseract.image_to_string(Image.fromarray(frame), lang='rus')
    return text

In [12]:
import torchvision.transforms as transforms
from torchvision import models

def classify_scene(frames):
    model = models.resnet18(pretrained=True)
    model.eval()
    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    results = []
    for frame in frames:
        input_tensor = preprocess(frame)
        input_batch = input_tensor.unsqueeze(0)
        with torch.no_grad():
            output = model(input_batch)
        results.append(output)
    return results

In [13]:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import declarative_base, sessionmaker

engine = create_engine('sqlite:///config.db')
Base = declarative_base()

class Keyword(Base):
    __tablename__ = 'keywords'
    id = Column(Integer, primary_key=True)
    word = Column(String)

Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

def add_keyword(word):
    keyword = Keyword(word=word)
    session.add(keyword)
    session.commit()


In [14]:
from fastapi import FastAPI

app = FastAPI()

@app.post("/analyze_video/")
async def analyze_video_endpoint(video_path: str):
    # Логика обработки видео
    return {"status": "Video analysis started"}

# Запуск сервера:
# uvicorn main:app --reload
@app.get("/search/")
async def search_endpoint(query: str):
    # Логика поиска по метаданным
    return {"results": []}


In [16]:
class AnalysisResult(Base):
    __tablename__ = 'analysis_results'
    id = Column(Integer, primary_key=True)
    video_path = Column(String)
    datameta = Column(String)

def save_result(video_path, datameta):
    result = AnalysisResult(video_path=video_path, metadata=metadata)
    session.add(result)
    session.commit()

  class AnalysisResult(Base):


In [18]:
from flask import Flask, render_template

app = Flask(__name__)

@app.route('/')
def index():
    results = session.query(AnalysisResult).all()
    return render_template('index.html', results=results)

In [19]:
def main(video_path):
    cap = load_video(video_path)
    frames = extract_frames(cap)
    audio_path = 'temp_audio.wav'
    extract_audio(video_path, audio_path)

    # Анализ аудио
    speech_text = speech_to_text(audio_path)
    sound_class = classify_sound(audio_path, sound_model)

    # Анализ видео
    objects = detect_objects(frames)
    ocr_texts = [ocr_on_frame(frame) for frame in frames]
    scenes = classify_scene(frames)

    # Сохранение результатов
    metadata = {
        "speech_text": speech_text,
        "sound_class": sound_class,
        "objects": objects,
        "ocr_texts": ocr_texts,
        "scenes": scenes,
    }
    save_result(video_path, str(metadata))

In [20]:
def train_sound_classifier(data_loader, model, criterion, optimizer):
    for epoch in range(num_epochs):
        for inputs, labels in data_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        

In [21]:
def fine_tune_object_detection_model(model, data_loader):
    model.train()
    for images, targets in data_loader:
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

In [None]:
print(main("example.mp4")