In [1]:
#pip install --upgrade watchdog
#!pip install keras

#최초 실행시에만 위 주석 제거

In [2]:
import flask
from flask_cors import CORS
from google.cloud import vision_v1p3beta1 as vision
import os
from werkzeug.utils import secure_filename
from datetime import datetime  # datetime 모듈 추가
import numpy as np
import requests
from PIL import Image
import re
from konlpy.tag import Okt
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import StandardScaler
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
import joblib
import pickle

In [3]:
app = flask.Flask(__name__)
CORS(app)


model = None
app.config['UPLOAD_FOLDER'] = './uploads'
app.config['SECRET_KEY'] = 'AIDEAR'


os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'keys/august-bond-397606-9fed2249633b.json'
image_path = ''  # 전역 변수로 선언

client = vision.ImageAnnotatorClient()

# 업로드 폴더 생성
upload_folder = app.config['UPLOAD_FOLDER']
os.makedirs(upload_folder, exist_ok=True)


def filter_korean(tokens):
    """한글만 남기는 함수"""
    return [token for token in tokens if re.match("^[가-힣]+$", token)]

def get_subscriber_count(api_key, channel_id):
    url = f'https://www.googleapis.com/youtube/v3/channels'
    params = {
        'part': 'statistics',
        'id': channel_id,
        'key': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if 'items' in data and data['items']:
            subscriber_count = int(data['items'][0]['statistics'].get('subscriberCount', 0))
            return subscriber_count
        else:
            print(f'ErrorL No items in response data')
            return 0
    else:
        print(f'Error: {response.status_code}')
        return 0
        
def extract_text_info(image_path, text_annotations):
    with Image.open(image_path) as img:
        width, height = img.size
        print(f"이미지의 가로 크기: {width} 픽셀")
        print(f"이미지의 세로 크기: {height} 픽셀")
    
    text_data = {"text": "", "area": 0, "area_percentage": 0, "char_count": 0}
    if len(text_annotations) <= 1:
        return text_data
    
    for text in text_annotations[1:]:
        char_count = len(text.description)
        vertices = text.bounding_poly.vertices
        text_width = vertices[1].x - vertices[0].x                                                                                                             
        text_height = vertices[3].y - vertices[0].y
        area = text_width * text_height
        area_percentage = round((area / (width * height)) * 100, 2)
        char_area = int(area / char_count)
        char_area_percentage = round((char_area / (width * height)) * 100, 2)

        if char_area_percentage >= 0.35:
            text_data["text"] += text.description + ' '
            text_data["area"] += area
            text_data["area_percentage"] += area_percentage
            text_data["char_count"] += char_count
            
    return text_data

In [None]:
@app.route("/predict", methods=["POST"])

def predict():
    print("")
    print("========================")
    print("        앱 시작")
    print("========================")
    print("")
    global model
    global image_path  # 전역 변수로 사용
    if model is None:
        
        model = load_model(f'./models/model_category_0.h5')
    if flask.request.method == "POST":
        if 'file' not in flask.request.files:
            return 'No file part'
        file = flask.request.files['file']
        if file.filename == '':
            return 'No selected file'
        if file:
            current_time = datetime.now()
            timestamp = current_time.strftime("%Y%m%d%H%M%S")
            filename = secure_filename(f"{timestamp}_{file.filename}")  # 타임스탬프를 파일 이름에 추가
            # 이미지 파일의 안전한 파일 이름을 생성합니다.

            # 이미지를 업로드 폴더에 저장합니다.
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

            # 이미지의 URL을 생성합니다.
            image_url = f"/{app.config['UPLOAD_FOLDER']}/{filename}"
            image_path = os.path.join(upload_folder, filename)  # 이미지 경로 업데이트
                
            # title은 form 데이터로 받아옵니다.
            channel_id = flask.request.form.get('channelID')
            
            tokenizer = Tokenizer()
            okt = Okt()
            
            title = flask.request.form.get('title')
            thumbnail = flask.request.files.get('file')
            username = flask.request.form.get('username')
            useremail = flask.request.form.get('useremail')
            category = flask.request.form.get('category')
            
            print('-----------------------------------------')
            # 저장된 토큰화 객체를 불러옴
            with open(f'./token/{category}_tokenizer_title.pickle', 'rb') as handle:
                loaded_title_tokenizer = pickle.load(handle)

            # loaded_tokenizer.word_index에서 따옴표 제거
            word_index_without_quotes = {re.sub(r"['\"]", '', key): value for key, value in loaded_title_tokenizer.word_index.items()}
            
            # 예측에 사용할 텍스트를 토큰화
            filtered_title_tokens = filter_korean(okt.nouns(title))
            print("filtered_title_tokens : ", filtered_title_tokens[:10] )

            filtered_title_tokens = [token for token in filtered_title_tokens if token in word_index_without_quotes]
            print("filtered_title_tokens : ", filtered_title_tokens[:10] )
            # 각 단어를 해당 인덱스로 변환
            title_indices = [word_index_without_quotes.get(token, 0) for token in filtered_title_tokens ]
            print("title_indices: ", title_indices)

            # 리스트 형태로 모델에 전달
            title_input = [title_indices]

            # 아래에서 패딩을 적용하여 시퀀스 길이를 맞춥니다.
            title_tokens = pad_sequences(title_input, maxlen=15)
            print("최종 title: ", title_tokens)

            
            #  --------------------모델 불러오기--------------------
            model = load_model(f'./models/model_category_{category}.h5')
      
            # API 키
            
            api_key = 'api key'
            

            # 유튜브 API요청을 통한 타이틀 및 구독자 수 입력
            title_length = len(title)
            
            try:
                subscriber_count = get_subscriber_count(api_key, channel_id)
            except Exception as e:
                print(f"API 호출 중 에러 발생: {e}")
                subscriber_count = 0

                  
            with open(image_path, 'rb') as image_file:
                content = image_file.read()
            image = vision.Image(content=content)
            response = client.text_detection(image=image)
            if response.text_annotations:
                extract_text = response.text_annotations[0].description
            else:
                # Handle the case when text_annotations is empty or None
                extract_text = ""
            
            
#             print('<<<<<<<<<< 썸네일로부터 추출된 텍스트 >>>>>>>>>>')
#             print("extract_text: ",extract_text)
            
            # 저장된 토큰화 객체를 불러옴
        
            print('-----------------------------------------')
            with open(f'./token/{category}_tokenizer_text.pickle', 'rb') as handle:
                loaded_text_tokenizer = pickle.load(handle)
    
            # loaded_tokenizer.word_index에서 따옴표 제거
            word_index_without_quotes = {re.sub(r"['\"]", '', key): value for key, value in loaded_text_tokenizer.word_index.items()}
            
           
            filtered_thumbnail_text = filter_korean(okt.nouns(extract_text))
            print("filtered_thumbnail_text: ", filtered_thumbnail_text[:10])

            filtered_thumbnail_text = [token for token in filtered_thumbnail_text if token in word_index_without_quotes]
            print("filtered_thumbnail_text: ", filtered_thumbnail_text[:10])

            # 각 단어를 해당 인덱스로 변환
            thumbnail_indices = [word_index_without_quotes.get(token, 0) for token in filtered_thumbnail_text]
            print("thumbnail_indices: ", thumbnail_indices)

            # 리스트 형태로 모델에 전달
            thumbnail_input = [thumbnail_indices]

            # 아래에서 패딩을 적용하여 시퀀스 길이를 맞춥니다.
            thumbnail_tokens = pad_sequences(thumbnail_input, maxlen=15)
            print("최종 썸네일 토큰: ", thumbnail_tokens)
            print('-----------------------------------------')
            

            
            #텍스트 감지
            
            text_data = extract_text_info(image_path, response.text_annotations)
            char_count = text_data["char_count"]
            total_area_percentage = text_data["area_percentage"]
            char_area_percentage = round(text_data["area_percentage"] / text_data["char_count"], 2) if text_data["char_count"] > 0 else 0
            
            # 얼굴 감지
            has_face = 1 if response.face_annotations else 0

            # 유해한 콘텐츠 감지
            violence = response.safe_search_annotation.violence
            racy = response.safe_search_annotation.racy
            
      
            print(image_url)
        
            if thumbnail:
                print("채널아이디: ",channel_id)
                print("썸네일:", filename)
                print("제목:", title)
                print("카테고리: ", category)
                print("Received file: ", thumbnail.filename)
                print("title_length: ",title_length)
                print("char_count: ",char_count)
                print("구독자 수: ", subscriber_count)
                print("폭력성: ", violence)
                print("선정성: ",racy)
                print("썸네일 속 글씨: ",extract_text[:10])
                print("제목 길이: ",title_length)
                print("전체 비율: ",total_area_percentage)
                print("글자 비율: ",char_area_percentage)
                print("얼굴 유무: ",has_face)
                print("제목 토큰: ",title_tokens)
                print("썸네일 토큰: ",thumbnail_tokens)

                
               ####모델 작성하기#####
            
            
            backup_violence = violence
            backup_racy = racy
            backup_subscriber_count = subscriber_count
            
            title_length = np.array(title_length).reshape(-1, 1)
            char_count = np.array(char_count).reshape(-1, 1)
            total_area_percentage = np.array(total_area_percentage).reshape(-1, 1)
            char_area_percentage = np.array(char_area_percentage).reshape(-1, 1)
            has_face = np.array(has_face).reshape(-1, 1)
            violence = np.array(violence).reshape(-1, 1)
            racy = np.array(racy).reshape(-1, 1)
            subscriber_count = np.array(subscriber_count).reshape(-1, 1)
            
            
            # scaler
            # StandardScaler 객체 생성
            scaler1 = joblib.load(f"./scaler/{category}_scaler1.pkl")
            scaler2 = joblib.load(f"./scaler/{category}_scaler2.pkl")
            scaler3 = joblib.load(f"./scaler/{category}_scaler3.pkl")
            scaler4 = joblib.load(f"./scaler/{category}_scaler4.pkl")
            scaler5 = joblib.load(f"./scaler/{category}_scaler5.pkl")
            scaler6 = joblib.load(f"./scaler/{category}_scaler6.pkl")
            scaler7 = joblib.load(f"./scaler/{category}_scaler7.pkl")
            scaler8 = joblib.load(f"./scaler/{category}_scaler8.pkl")
    
            scaled_title_length = scaler1.transform(title_length)
            scaled_char_count = scaler2.transform(char_count)
            scaled_total_area_percentage = scaler3.transform(total_area_percentage)
            scaled_char_area_percentage = scaler4.transform(char_area_percentage)
            scaled_has_face = scaler5.transform(has_face)
            scaled_violence = scaler6.transform(violence)
            scaled_racy = scaler7.transform(racy)
            scaled_subscriber_count = scaler8.transform(subscriber_count)
    

            # 모델 입력 구성
            inputs = [
                scaled_title_length, scaled_char_count, scaled_total_area_percentage, scaled_char_area_percentage,
                scaled_has_face, scaled_violence, scaled_racy, scaled_subscriber_count,
                title_tokens, thumbnail_tokens
            ]
            
            
            title_tokens_sum = np.sum(title_tokens)
            thumbnail_tokens_sum = np.sum(thumbnail_tokens)
            check_text = 0 # 정상
            if title_tokens_sum == 0  and thumbnail_tokens_sum == 0:
                check_text = 1 #비정상
            elif title_tokens_sum == 0:
                check_text == 2 # 제목 오류
            else:
                check_text == 3 # 썸네일 오류
                
                
                
            # 모델 예측
            predicted_value = model.predict(inputs)
            
            print('scaled data')
            print("scaled_title_length: ",scaled_title_length)
            print("scaled_char_count: ",scaled_char_count)
            print("scaled_total_area_percentage: ",scaled_total_area_percentage)
            print("scaled_char_area_percentage: ",scaled_char_area_percentage)
            print("scaled_has_face: ",scaled_has_face)
            print("scaled_violence: ",scaled_violence)
            print("scaled_racy: ",scaled_racy)
            print("scaled_subscriber_count: ",scaled_subscriber_count)
            print("prevalue_0: ",predicted_value[0][0])
            print("prevalue_1: ",predicted_value[0][1])
            print("prevalue_2: ",predicted_value[0][2])
            print("prevalue_3: ",predicted_value[0][3])
            print("check_text: ",check_text)
            return f'{predicted_value},{backup_subscriber_count},{check_text},{backup_violence},{backup_racy}'
#             return "1000,1234"     
            
if __name__ == "__main__":
    #app.debug = True
    app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit



       앱 시작

-----------------------------------------
filtered_title_tokens :  ['톰캣', '버전', '서버', '앹', '로컬호스트', '아파치', '톰캣', '씨드', '라이브', '프로그램']
filtered_title_tokens :  ['버전', '라이브', '프로그램', '파일']
title_indices:  [1123, 644, 38, 377]
최종 title:  [[   0    0    0    0    0    0    0    0    0    0    0 1123  644   38
   377]]
-----------------------------------------
filtered_thumbnail_text:  ['차', '몇렙', '하루', '재획해', '진짜', '주말', '하루', '재획', '쩐다', '저']
filtered_thumbnail_text:  ['차', '하루', '진짜', '주말', '하루', '저', '주말', '사냥', '소재', '비']
thumbnail_indices:  [166, 81, 62, 1523, 81, 230, 1523, 544, 902, 19, 62, 81, 544, 141, 606, 141, 171, 544, 62, 1623, 37, 544, 169, 230, 544, 544, 103, 559, 1210, 544, 384, 166, 495, 544, 394, 726]
최종 썸네일 토큰:  [[ 544  169  230  544  544  103  559 1210  544  384  166  495  544  394
   726]]
-----------------------------------------
이미지의 가로 크기: 706 픽셀
이미지의 세로 크기: 1200 픽셀
/./uploads/20230926161145_i013918706503.gif
채널아이디:  UCWmkrCwxD6PK5moIQI1fNDw
썸네일: 20230

127.0.0.1 - - [26/Sep/2023 16:11:53] "POST /predict HTTP/1.1" 200 -


scaled data
scaled_title_length:  [[-0.31776386]]
scaled_char_count:  [[-2.0319181]]
scaled_total_area_percentage:  [[-2.08680798]]
scaled_char_area_percentage:  [[-0.31220423]]
scaled_has_face:  [[-0.83420443]]
scaled_violence:  [[-2.89130591]]
scaled_racy:  [[-1.99029909]]
scaled_subscriber_count:  [[-0.584622]]
prevalue_0:  0.1830179
prevalue_1:  0.04470728
prevalue_2:  0.6892226
prevalue_3:  0.08305227
check_text:  0

       앱 시작

-----------------------------------------
filtered_title_tokens :  ['톰캣', '버전', '서버', '앹', '로컬호스트', '아파치', '톰캣', '씨드', '라이브', '프로그램']
filtered_title_tokens :  ['버전', '서버', '라이브', '프로그램', '파일']
title_indices:  [249, 147, 208, 369, 4274]
최종 title:  [[   0    0    0    0    0    0    0    0    0    0  249  147  208  369
  4274]]
-----------------------------------------
filtered_thumbnail_text:  ['차', '몇렙', '하루', '재획해', '진짜', '주말', '하루', '재획', '쩐다', '저']
filtered_thumbnail_text:  ['차', '하루', '진짜', '하루', '저', '사냥', '이번', '비', '부담', '진짜']
thumbnail_indices:  [

127.0.0.1 - - [26/Sep/2023 16:12:18] "POST /predict HTTP/1.1" 200 -


scaled data
scaled_title_length:  [[0.04375886]]
scaled_char_count:  [[-1.51679138]]
scaled_total_area_percentage:  [[-1.69533921]]
scaled_char_area_percentage:  [[-0.43353182]]
scaled_has_face:  [[-0.93099739]]
scaled_violence:  [[-3.0649567]]
scaled_racy:  [[-1.85255814]]
scaled_subscriber_count:  [[-0.19827987]]
prevalue_0:  0.100976884
prevalue_1:  0.3145111
prevalue_2:  0.42341563
prevalue_3:  0.16109641
check_text:  0

       앱 시작

-----------------------------------------
filtered_title_tokens :  ['톰캣', '버전', '서버', '앹', '로컬호스트', '아파치', '톰캣', '씨드', '라이브', '프로그램']
filtered_title_tokens :  ['버전', '라이브', '프로그램', '파일']
title_indices:  [1123, 644, 38, 377]
최종 title:  [[   0    0    0    0    0    0    0    0    0    0    0 1123  644   38
   377]]
-----------------------------------------
filtered_thumbnail_text:  ['프로그래머', '스', '준', '제목', '한글', '텍스트', '썸네일', '업로드', '카테고리', '독자']
filtered_thumbnail_text:  ['스', '수', '속', '전체', '얼굴', '스', '이동', '코딩', '테스트', '뼈']
thumbnail_indices:  [269

127.0.0.1 - - [26/Sep/2023 16:13:03] "POST /predict HTTP/1.1" 200 -


scaled data
scaled_title_length:  [[-0.31776386]]
scaled_char_count:  [[-2.1484003]]
scaled_total_area_percentage:  [[-2.19694306]]
scaled_char_area_percentage:  [[-1.8812346]]
scaled_has_face:  [[-0.83420443]]
scaled_violence:  [[-2.89130591]]
scaled_racy:  [[-1.99029909]]
scaled_subscriber_count:  [[-0.584622]]
prevalue_0:  0.17889766
prevalue_1:  0.04347523
prevalue_2:  0.69272166
prevalue_3:  0.084905386
check_text:  0

       앱 시작

-----------------------------------------
filtered_title_tokens :  ['톰캣', '버전', '서버', '앹', '로컬호스트', '아파치', '톰캣', '씨드', '라이브', '프로그램']
filtered_title_tokens :  ['버전', '라이브', '프로그램', '파일']
title_indices:  [1123, 644, 38, 377]
최종 title:  [[   0    0    0    0    0    0    0    0    0    0    0 1123  644   38
   377]]
-----------------------------------------
filtered_thumbnail_text:  []
filtered_thumbnail_text:  []
thumbnail_indices:  []
최종 썸네일 토큰:  [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
-----------------------------------------
이미지의 가로 크기: 1920 픽셀
이미지의 세로 크기: 

127.0.0.1 - - [26/Sep/2023 16:13:20] "POST /predict HTTP/1.1" 200 -


scaled data
scaled_title_length:  [[-0.31776386]]
scaled_char_count:  [[-2.1484003]]
scaled_total_area_percentage:  [[-2.19694306]]
scaled_char_area_percentage:  [[-1.8812346]]
scaled_has_face:  [[-0.83420443]]
scaled_violence:  [[-2.89130591]]
scaled_racy:  [[-1.99029909]]
scaled_subscriber_count:  [[-0.584622]]
prevalue_0:  0.1787323
prevalue_1:  0.043722034
prevalue_2:  0.69220966
prevalue_3:  0.08533602
check_text:  0

       앱 시작

-----------------------------------------
filtered_title_tokens :  ['톰캣', '버전', '서버', '앹', '로컬호스트', '아파치', '톰캣', '씨드', '라이브', '프로그램']
filtered_title_tokens :  ['버전', '라이브', '프로그램', '파일']
title_indices:  [1123, 644, 38, 377]
최종 title:  [[   0    0    0    0    0    0    0    0    0    0    0 1123  644   38
   377]]
-----------------------------------------
filtered_thumbnail_text:  ['며']
filtered_thumbnail_text:  []
thumbnail_indices:  []
최종 썸네일 토큰:  [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
-----------------------------------------
이미지의 가로 크기: 1920 픽셀
이미지의 세로 크기

127.0.0.1 - - [26/Sep/2023 16:13:29] "POST /predict HTTP/1.1" 200 -


scaled data
scaled_title_length:  [[-0.31776386]]
scaled_char_count:  [[-2.1484003]]
scaled_total_area_percentage:  [[-2.19694306]]
scaled_char_area_percentage:  [[-1.8812346]]
scaled_has_face:  [[-0.83420443]]
scaled_violence:  [[-2.89130591]]
scaled_racy:  [[-1.99029909]]
scaled_subscriber_count:  [[-0.584622]]
prevalue_0:  0.1787323
prevalue_1:  0.043722034
prevalue_2:  0.69220966
prevalue_3:  0.08533602
check_text:  0

       앱 시작

-----------------------------------------
filtered_title_tokens :  ['제목', '뭘', '용']
filtered_title_tokens :  ['뭘', '용']
title_indices:  [292, 57]
최종 title:  [[  0   0   0   0   0   0   0   0   0   0   0   0   0 292  57]]
-----------------------------------------
filtered_thumbnail_text:  ['며']
filtered_thumbnail_text:  []
thumbnail_indices:  []
최종 썸네일 토큰:  [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
-----------------------------------------
이미지의 가로 크기: 1920 픽셀
이미지의 세로 크기: 962 픽셀
/./uploads/20230926161351_cta-bg.jpg
채널아이디:  UCWmkrCwxD6PK5moIQI1fNDw
썸네일: 20230926161

127.0.0.1 - - [26/Sep/2023 16:13:53] "POST /predict HTTP/1.1" 200 -


scaled data
scaled_title_length:  [[-2.02490337]]
scaled_char_count:  [[-2.1484003]]
scaled_total_area_percentage:  [[-2.19694306]]
scaled_char_area_percentage:  [[-1.8812346]]
scaled_has_face:  [[-0.83420443]]
scaled_violence:  [[-2.89130591]]
scaled_racy:  [[-1.99029909]]
scaled_subscriber_count:  [[-0.584622]]
prevalue_0:  0.031111877
prevalue_1:  0.09866282
prevalue_2:  0.09925246
prevalue_3:  0.7709728
check_text:  0
