<a href="https://colab.research.google.com/github/CrazyTiger8903/Study_Pytorch/blob/main/ViolenceDetectionWithImageCaptioning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install streamlit -q

In [None]:
!pip install pyngrok==5.1.0

In [None]:
from pyngrok import ngrok

ngrok.set_auth_token('')

In [None]:
# 필요한 라이브러리 및 패키지 설치
!pip install git+https://github.com/huggingface/transformers.git@main
!pip install -q datasets

In [None]:
!pip install transformers

In [None]:
%%writefile app.py
import streamlit as st
import torch
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
from transformers import BlipForConditionalGeneration, AutoProcessor
from torch.utils.data import DataLoader
from transformers import AutoProcessor, BlipForConditionalGeneration
import nltk
from nltk.translate.bleu_score import sentence_bleu
from transformers import AutoTokenizer  # Assuming you are using a pretrained model from Hugging Face
import torch.nn.functional as F
from transformers import BertForSequenceClassification, BertTokenizer
nltk.download("punkt")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

label_map = {0: 'normal', 1: 'normal', 2: 'caution', 3: 'caution', 4: 'caution', 5: 'danger',
             6: 'danger', 7: 'danger'}

# 모델 및 이미지 경로 정의
model_save_path = "/content/drive/MyDrive/blip_finetuning_weight/5_blip_model_weights.pth"
# 저장된 모델 불러오기
blip_model = BlipForConditionalGeneration.from_pretrained(model_save_path)

# Processor 불러오기
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")

# Load the saved model and tokenizer
bert_model = BertForSequenceClassification.from_pretrained('/content/drive/MyDrive/blip_mc')
tokenizer = BertTokenizer.from_pretrained('/content/drive/MyDrive/blip_mc')
# Set the device for inference
bert_model.to(device)

blip_model.eval()
bert_model.eval()


# Streamlit page config
st.set_page_config(layout="wide", page_title="Image Captioning and Classification")

# Custom CSS for styling
st.markdown("""
    <style>
    .team-member {
        text-align: center;
        margin-bottom: 20px;
    }
    .team-member img {
        border-radius: 50%;
        width: 100px;
        height: 100px;
        object-fit: cover;
        margin-bottom: 10px;
    }
    .team-member h3 {
        margin-bottom: 5px;
    }
    .team-member p {
        font-size: 14px;
        color: #666;
    }
    .footer {
        text-align: center;
        padding-top: 20px;
    }
    .footer img {
        max-width: 150px;
        margin-bottom: 10px;
    }
    .risk-label {
        font-size: 20px; /* Increased font size */
        font-weight: bold;
    }
    .normal { color: blue; }
    .caution { color: green; }
    .danger { color: red; }
    .section-space {
        margin-top: 50px; /* Space between sections */
    }
    </style>
    """, unsafe_allow_html=True)

# Streamlit layout
st.title("Image Captioning and Classification")
st.markdown("This tool uses BLIP for image captioning and a BERT model for classification. Upload an image to get started.")

# Sidebar for image upload
with st.sidebar:
    st.write("## Upload Image")
    uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

# Main page layout
if uploaded_image is not None:
    # Image processing for consistent size
    image = Image.open(uploaded_image)
    image = ImageOps.fit(image, (500, 500), Image.ANTIALIAS)

    col1, col2 = st.columns([1, 2])

    with col1:
        st.image(image, caption='Uploaded Image')

    with col2:
        st.subheader("Results")

        # Caption generation
        inputs = processor(images=image, padding="max_length", return_tensors="pt")
        with torch.no_grad():
            outputs = blip_model.generate(**inputs)
        generated_captions = processor.batch_decode(outputs, skip_special_tokens=True)

        st.markdown("### Generated Captions")
        for caption in generated_captions:
            st.markdown(f"* {caption}")

        # Classification
        inputs = tokenizer(generated_captions, padding=True, truncation=True, return_tensors="pt")
        input_ids = inputs["input_ids"].to(device)
        attention_mask = inputs["attention_mask"].to(device)

        with torch.no_grad():
            inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
            outputs = bert_model(**inputs)
            logits = outputs.logits
            probabilities = F.softmax(logits, dim=1)

            st.markdown("### Classification Results")

            for i, caption in enumerate(generated_captions):
                max_label_id = torch.argmax(probabilities[i]).item()
                max_label_name = label_map[max_label_id]
                label_class = "normal" if max_label_id < 2 else "caution" if max_label_id < 5 else "danger"
                st.markdown(f"<span class='risk-label {label_class}'>Predicted degree of danger: {max_label_name}</span>", unsafe_allow_html=True)

else:
    st.write("Please upload an image to start.")

# Company Introduction with added spacing
st.markdown("<div class='section-space'></div>", unsafe_allow_html=True)  # Adds spacing
# Company Introduction

# Footer
st.markdown("""
    <div class='footer'>
        <p><strong>Follow Follow Me</strong><br>
        Chungbuk National University<br>
        For inquiries, contact us at MDG@cbnu.ac.kr</p>
    </div>
    """, unsafe_allow_html=True)

In [None]:
!ls

In [None]:
!nohup streamlit run app.py --server.port 80 &

In [None]:
url = ngrok.connect(port='80')
url

In [None]:
ngrok.kill()