In [12]:
!pip install gradio transformers sentence_transformers openai PyPDF2 pdfplumber bs4 semchunk faiss-gpu tiktoken

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0


In [21]:
import gradio as gr
import time
from fastapi import FastAPI
from datetime import datetime
import base64
from openai import OpenAI
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader  # PDF 분석을 위한 라이브러리
import numpy as np
import torch
import faiss
from ProcFile import process_file


client = OpenAI(
    api_key="EMPTY",
    base_url="https://bf60-35-198-247-133.ngrok-free.app/v1"  # ngrok URL로 교체
)

###
# 모델 로드
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
embedding_model = SentenceTransformer('all-mpnet-base-v2')

# 글로벌 변수
res = faiss.StandardGpuResources()  # GPU 리소스 생성
text_faiss_index = None
image_faiss_index = None
chunk_data = []
chunk_metadata = []
image_embeddings = []
image_metadata = []
GPU_FLAG =False


# 장치 선택 처리
if torch.cuda.is_available():
  cuda_version = torch.version.cuda
  gpu_name = torch.cuda.get_device_name(0)
  GPU_FLAG = True
###


# Initialize global variables
chat_rooms = [{"id": "Chat1", "timestamp": datetime.now()}]
chat_histories = {"Chat1": []}
ai_history = {"Chat1": []}
current_chat = "Chat1"

def create_new_chat():
    global chat_rooms, chat_histories, current_chat, ai_history
    new_chat_id = f"Chat{len(chat_rooms) + 1}"
    chat_rooms.append({"id": new_chat_id, "timestamp": datetime.now()})
    chat_histories[new_chat_id] = []
    ai_history[new_chat_id] = []
    current_chat = new_chat_id
    return (
        gr.update(choices=[room["id"] for room in chat_rooms], value=new_chat_id),
        [],
    )
#이 부분에서 faiss 저장 & search까지
def extract_pdf_text(file_path):
    """PDF 파일에서 텍스트를 추출하는 함수."""
    try:
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        return f"Error reading PDF file: {str(e)}"

def search_file(query):
    global text_faiss_index, chunk_metadata

    if text_faiss_index is None:
        return "No indexed data. Please upload and process a file first."

    query_embedding = embedding_model.encode([query])
    query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)

    distances, indices = text_faiss_index.search(query_embedding, 5)

    results = []
    for i, (distance, idx) in enumerate(zip(distances[0], indices[0])):
        if distance > 100:
            continue
        if idx < len(chunk_metadata):
            meta = chunk_metadata[idx]
            results.append(
                f"Result {i + 1}: File: {meta['file']} | Type: {meta['type']} | "
                f"{'Page' if meta['type'] == 'PDF' else 'Row'}: {meta.get('page', meta.get('row', 'N/A'))} | "
                f"Distance: {distance:.4f}\n"
                f"Content: {meta['chunk_text'][:200]}..."
            )
        top_chunks = [item.split('Content: ')[1] for item in results[:3]]
        print("\n".join(results) if results else "No relevant results found within the distance threshold.")
    return top_chunks


def add_message(history, message):
    global current_chat, chat_histories, ai_history, text_faiss_index

    if current_chat is None:
        _, history = create_new_chat()
    else:
        history = chat_histories[current_chat]

    ai_content = []

    if message["files"]:
        for file_path in message["files"]:
            print(file_path)
            if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                #llava는 사진업로드 1장만 가능; AI 쿼리용 history에서 이미지 삭제
                # for message in ai_history[current_chat]:
                #     if 'content' in message and isinstance(message['content'], list):
                #         message['content'] = [item for item in message['content'] if item.get('type') != 'image_url']

                try:
                    history.append({"role": "user", "content": {"path": {file_path}}})

                    # 이미지 Base64 데이터를 AI쿼리용 history의 content에 기록
                    with open(file_path, "rb") as image_file:
                        base64_image = base64.b64encode(image_file.read()).decode('utf-8')
                        image_base64 = f"data:image/jpeg;base64,{base64_image}"
                        ai_content.append({"type" : "image_url", "image_url": {"url" : image_base64}})

                except Exception as e:
                    history.append({"role": "assistant", "content": f"Error processing file {file_path}: {str(e)}"})

            elif file_path.lower().endswith('.pdf'):
                # PDF 파일 처리
                pdf_text = extract_pdf_text(file_path)      #이 부분!!
                if "Error" in pdf_text:
                    history.append({"role": "assistant", "content": pdf_text})
                else:
                    #파일 프로세스
                    with open(file_path, 'rb') as file:
                        file_name, text_chunks, metadata_chunks = process_file(file)
                    # file_name, text_chunks, metadata_chunks = process_file(file_path)
                    chunk_data.extend(text_chunks)
                    chunk_metadata.extend(metadata_chunks)

                    embeddings = embedding_model.encode(text_chunks, batch_size=32, show_progress_bar=True)
                    embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
                    d = embeddings.shape[1]

                    if text_faiss_index is None:
                        if(GPU_FLAG):
                            # GPU용 인덱스 생성
                            flat_config = faiss.GpuIndexFlatConfig()
                            flat_config.device = 0  # 사용할 GPU 장치 ID 설정 (보통 0번부터 시작)
                            text_faiss_index = faiss.GpuIndexFlatL2(res, d, flat_config)
                        else:

                            text_faiss_index = faiss.IndexFlatL2(d)
                    text_faiss_index.add(embeddings)

                    print(f"{file_name} processed successfully. {len(text_chunks)} chunks indexed.")

                    ##쿼리로부터 서치, 결과 + 쿼리 hisotry에 추가
                    file_search = []
                    if (message["text"] != None):
                        topk = search_file(message["text"])
                    for index, result in enumerate(topk):
                        #history.append({"role": "user", "content": result})
                        file_search.append(result)
                    ai_content.append({"type" : "text", "text" : f"Based on the following three sentences: 1. {file_search[0]}, 2. {file_search[1]}, 3. {file_search[2]}"})
                    print("search done!")
                    #history.append({"role": "user", "content": f"PDF uploaded: {file_path}\nExtracted text: {pdf_text[:500]}..."}) #이 부분!!!
            else:
                # 이미지 파일이 아닌 경우
                history.append({"role": "user", "content": f"File {file_path} is not supported."})

    if (message["text"]) == None:
        history.append({"role": "user", "content": "Please input any message"})
        return
    elif (message["text"]):
        history.append({"role": "user", "content": message["text"]})
        ai_content.append({"type" : "text", "text" : message["text"]})

    print(ai_content)
    # 업데이트된 히스토리를 저장
    chat_histories[current_chat] = history
    ai_history[current_chat] = [{"role": "user", "content" : ai_content}]
    return history, gr.MultimodalTextbox(value=None, interactive=False)

def bot(history: list):
    global current_chat, chat_histories, ai_history

    try:
        messages = [{"role": "system", "content": "You are a helpful assistant."}]
        # for msg in history:
        #     if msg.get("image"):
        #         messages.append({"role": "user", "content": f"Image data: {msg['image']}"})
        #     else:
        #         messages.append({"role": "user", "content": msg["content"]})

        response = client.chat.completions.create(
            model="llava-hf/llava-v1.6-mistral-7b-hf",
            messages=ai_history[current_chat],
            stream=True
        )

        history.append({"role": "assistant", "content": ""})
        for chunk in response:
            if chunk.choices[0].delta.content:
                history[-1]["content"] += chunk.choices[0].delta.content
                time.sleep(0.05)
                yield history

    except Exception as e:
        history.append({"role": "assistant", "content": f"Error: {str(e)}"})
        yield history

    chat_histories[current_chat] = history

def switch_chat(chat_id):
    global current_chat, chat_histories
    current_chat = chat_id
    return chat_histories.get(chat_id, [])

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):
            new_chat_button = gr.Button("New Chat")
            chat_list = gr.Dropdown(
                label="Chat Rooms",
                choices=[room["id"] for room in chat_rooms],
                value="Chat1",
            )
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(type="messages")
            chat_input = gr.MultimodalTextbox(
                interactive=True,
                file_count="multiple",
                placeholder="Enter message or upload file...",
                show_label=False,
            )

    chat_msg = chat_input.submit(
        add_message, [chatbot, chat_input], [chatbot, chat_input]
    )
    bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])

    new_chat_button.click(create_new_chat, [], [chat_list, chatbot])
    chat_list.change(switch_chat, inputs=[chat_list], outputs=[chatbot])

#gr.mount_gradio_app(app, demo, path="/gradio")
demo.launch(debug=True).share=True


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://f0b8994821588c8a18.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


/tmp/gradio/587909888c9923e3fa0dc8f6818d9ca5566f2bf63568dd6f94b54b6dd2d9704a/Lecture-01-LLM-Input-Output.pdf


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Lecture-01-LLM-Input-Output.pdf processed successfully. 68 chunks indexed.
Result 1: File: Lecture-01-LLM-Input-Output.pdf | Type: PDF | Page: 3 | Distance: 1.1657
Content: LLM Process embedding deembedding each tokens transformer each vectors texts token converted into decoder token texts converted into a learnable blocks a token vector tokenizing, embedding, transforme...
Result 1: File: Lecture-01-LLM-Input-Output.pdf | Type: PDF | Page: 3 | Distance: 1.1657
Content: LLM Process embedding deembedding each tokens transformer each vectors texts token converted into decoder token texts converted into a learnable blocks a token vector tokenizing, embedding, transforme...
Result 2: File: Lecture-01-LLM-Input-Output.pdf | Type: PDF | Page: 14 | Distance: 1.3843
Content: Embedding in LLM 1. Token Embedding From tokens to vectors tokenizer vocab size로 부터 encode된 token id를 vector로 1대1 mapping vector set는 vocab size 만큼의 vector table로 구성 embedded vector는 learnable paramet...
Result 1: File: Le