In [1]:
from flask import Flask
from flask import request
from pytube import YouTube
import os
import re
import cv2
import easyocr
import sqlite3
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification, TextClassificationPipeline

In [2]:
HUGGINGFACE_MODEL_PATH = "bespin-global/klue-roberta-small-3i4k-intent-classification"
loaded_tokenizer = RobertaTokenizerFast.from_pretrained(HUGGINGFACE_MODEL_PATH)
loaded_model = RobertaForSequenceClassification.from_pretrained(HUGGINGFACE_MODEL_PATH)

# using Pipeline
text_classifier = TextClassificationPipeline(
    tokenizer=loaded_tokenizer,
    model=loaded_model,
    return_all_scores=True
)

reader = easyocr.Reader(['ko', 'en'])

Using C:\Users\kimjm\AppData\Local\torch_extensions\torch_extensions\Cache\py38_cu117 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file C:\Users\kimjm\AppData\Local\torch_extensions\torch_extensions\Cache\py38_cu117\cuda_kernel\build.ninja...
Failed to load CUDA kernels. Mra requires custom CUDA kernels. Please verify that compatible versions of PyTorch and CUDA Toolkit are installed: Command '['where', 'cl']' returned non-zero exit status 1.
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [3]:
api = Flask(import_name='__name__')



@api.route('/makeTL', methods = ['GET'])
def TL():
    videoID = request.args.get('v', None)
    
    con = sqlite3.connect('timeline.db')
    con.row_factory = lambda cursor, row: row[0]
    cur = con.cursor()
    
    cur.execute('SELECT pk FROM video WHERE id = ?',[videoID])
    pk = cur.fetchall()
    
    if pk:
        cur.execute('SELECT cmt FROM comments WHERE fk = (SELECT pk FROM video WHERE fk = ?)',[pk[0]])
        cmts = cur.fetchall()
        con.close()
        return '<br>'.join(cmts)
    
    else:    
        url = "https://www.youtube.com/watch?v=" + videoID
        yt = YouTube(url)
        stream = yt.streams.get_highest_resolution()
        stream.download('./video')

        filepath = os.listdir('./video')[0]
        filepath = './video/' + filepath
        os.rename(filepath, ''.join(re.findall(r'[가-힣A-Za-z\\/.0-9]+',filepath)))

        filepath = os.listdir('./video')[0]
        filepath = './video/' + filepath

        video = cv2.VideoCapture(filepath)
        length = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) # 전체 프레임 갯수
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # 가로 해상도
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # 세로 해상도
        fps = round(video.get(cv2.CAP_PROP_FPS)) # 초당 프레임 갯수

        count = 0
        imgpath = './images'
        while(video.isOpened()):
          if video.get(1) >= length:
            break
          ret, image = video.read()
          if(int(video.get(1)) % fps == 0): #앞서 불러온 fps 값을 사용하여 1초마다 추출
              cv2.imwrite(imgpath + "/frame%d.jpg" % count, image)
              count += 1
        video.release()

        imgs = sorted(os.listdir(imgpath), key = lambda x: int(re.search(r'(\d{1,})',x).group()))
        result = []
        for i in range(len(imgs)):
          sbres = reader.readtext(imgpath + '/' + imgs[i])
          result.append(sbres)

        cmt = ''
        cmts = []
        th = 0.7
        cur.execute('INSERT INTO video(id) VALUES(?)',[videoID])
        for k in range(len(result)):
            cm = []
            for i in range(len(result[k])):
                cm.append(result[k][i][1])

            preds_list = text_classifier(' '.join(re.findall(r'[가-힣?]+', ' '.join(cm))))
            pred = sorted(preds_list[0], key = lambda x: x['score'], reverse = True)[0]

            if pred['label'] != 'question':
                continue

            elif pred['score'] > th:
                cmt_sub = cmt
                cmt = ' '.join(cm)

                if len(set(cmt.split(' ')) & set(cmt_sub.split(' '))) > 3:
                    continue


                if k > 3600:
                    if k % 60 < 10:
                        cur.execute('INSERT INTO comments(fk, cmt) VALUES((SELECT pk FROM video WHERE id = ?),?)',\
                                    (videoID, f'{k // 3600}:{(k // 3600 // 60)}:0{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}'))
                        cmts.append(f'{k // 3600}:{(k // 3600 // 60)}:0{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}')
                    else:
                        cur.execute('INSERT INTO comments(fk, cmt) VALUES((SELECT pk FROM video WHERE id = ?),?)',\
                                    (videoID, f'{k // 3600}:{(k // 3600 // 60)}:{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}'))
                        cmts.append(f'{k // 3600}:{(k // 3600 // 60)}:{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}')

                else:
                    if k % 60 < 10:
                        cur.execute('INSERT INTO comments(fk, cmt) VALUES((SELECT pk FROM video WHERE id = ?),?)',\
                                    (videoID, f'{k // 60}:0{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}'))
                        cmts.append(f'{k // 60}:0{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}')
                    else:
                        cur.execute('INSERT INTO comments(fk, cmt) VALUES((SELECT pk FROM video WHERE id = ?),?)',\
                                    (videoID, f'{k // 60}:{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}'))
                        cmts.append(f'{k // 60}:{k % 60} {" ".join(re.findall(r"[가-힣?0-9]+", cmt))}')
        
        
        
        for i in range(len(imgs)):
            os.remove(imgpath + '/' + imgs[i])
        os.remove(filepath)
        
        con.commit()
        con.close()
        
        return '<br>'.join(cmts)
    


if __name__ == '__main__':
    api.run(debug = True)

 * Serving Flask app '__name__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
