In [1]:
!pip install watchdog
!pip install --no-dependencies --quiet streamlit
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -o ngrok-stable-linux-amd64.zip
!pip install --quiet pyngrok
!pip install --no-dependencies --quiet protobuf==3.20.*   #==4.21.12
!pip install --no-dependencies --quiet validators

Collecting watchdog
  Downloading watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: watchdog
Successfully installed watchdog-3.0.0
--2023-12-21 15:35:46--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 54.161.241.46, 18.205.222.128, 52.202.168.65, ...
Connecting to bin.equinox.io (bin.equinox.io)|54.161.241.46|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13921656 (13M) [application/octet-stream]
Saving to: 'ngrok-stable-linux-amd64.zip'


2023-12-21 15:35:47 (56.8 MB/s) - 'ngrok-stable-linux-amd64.zip' saved [13921656/13921656]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   


In [2]:
!ngrok authtoken "" 

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml                                


In [3]:
%%writefile my_app.py
import numpy as np 
import pandas as pd 
import os

from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostRegressor

import warnings
warnings.filterwarnings("ignore")

from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import DebertaTokenizer, DebertaModel
from transformers import BlipProcessor, BlipForConditionalGeneration

from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn

import requests
from PIL import Image 
from io import BytesIO
from tqdm import tqdm
from IPython.display import Image as IPImage, display

import streamlit as st

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)


tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
Distil_bert = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
Distil_bert.classifier = nn.Sequential(
    nn.Linear(768, 5),
    nn.Softmax(dim=1)
)
Distil_bert.load_state_dict(torch.load('/kaggle/input/nityam-model-1/log_model_state_dict.pth'))
Distil_bert.eval()

# Initialize BLIP processor and model
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Load DeBERTa model and tokenizer
deberta_tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')
deberta_model = DebertaModel.from_pretrained('microsoft/deberta-base')


class Test_Dataset(Dataset):
    def __init__(self, Comments_):
        self.comments = Comments_.copy()
        self.comments["text"] = self.comments["text"].map(lambda x: tokenizer(x, padding="max_length", truncation=True, return_tensors="pt"))
    
    def __len__(self):
        return len(self.comments)
    
    def __getitem__(self, idx):
        comment = self.comments.loc[idx, "text"]
        return comment

    
def infer(model, Test_DL):
    # Ensure model is in evaluation mode
    model.eval()
    pred = []
    logs = []

    with torch.no_grad():
        for comments in Test_DL:
            masks = comments["attention_mask"].squeeze(1).to(device)
            input_ids = comments["input_ids"].squeeze(1).to(device)

            # Move model to the same device as input tensors
            model.to(device)

            # Perform inference
            output = model(input_ids, attention_mask=masks)

            # Move logits and model components to CPU
            logits = output.logits.cpu().numpy()  # Move logits to CPU and convert to NumPy
            model.to('cpu')  # Move the model back to CPU for consistency

            logs.append(logits)
            pred_class = torch.argmax(output.logits.cpu(), dim=1).item()  # Move prediction to CPU
            pred.append(pred_class)

    return pred, logs


def fetch_image(image_url):

    response = requests.get(image_url)
    image = None
    if(response.status_code == 200):
        image = Image.open(BytesIO(response.content)).convert("RGB")
    else:
        black_image_size = (224, 224)  
        image = Image.new("RGB", black_image_size, "black")
    return image
    
    
def predict(final_text):
    def clean(text):
        text = text.replace("<hyperlink>","").replace("<mention>","")
        return text
    x = final_text
    x = clean(x)

    text_inputs = [x]
    columns = ['text']
    df = pd.DataFrame(text_inputs, columns=columns)

    X_test = df
    Test_data = Test_Dataset(X_test)
    Test_Loader = DataLoader(Test_data, shuffle=False)
    pred,logs = infer(Distil_bert,Test_Loader)

    input_ids = deberta_tokenizer.encode(x, return_tensors='pt')
    with torch.no_grad():
        embeddings = deberta_model(input_ids).last_hidden_state

    flattened_embeddings = embeddings.view(-1, embeddings.size(-1))
    mean_embeddings = torch.mean(flattened_embeddings, dim=0)
    mean_embeddings_np = mean_embeddings.numpy()

    selected_bucket = pred[0]
#     model_path = f"/kaggle/input/regression-models/outputs/model_bucket_{selected_bucket}.joblib"
    model_path = f"/kaggle/input/regression-models/outputs/outputs/model_bucket_{selected_bucket}.joblib"
    selected_model = XGBRegressor()
    selected_model.load_model(model_path)
    combined_features = mean_embeddings

    features_reshaped = combined_features.view(1, -1).numpy()

    prediction = selected_model.predict(features_reshaped)
#     k = 15*np.exp(prediction/25)
#     if(k > 5000):
#         k = 150*prediction
    return prediction

def main():
    
    st.title("Task1 : Behaviour Simulation")


    with st.form("user_input_form"):

        img_url = st.text_input("Enter the Image URL:")
        if img_url:
            img = fetch_image(img_url)
        else:
            img = None
            
        if img is not None:
            img = img.resize((224, 224))
            st.image(img, caption="Processed Image", use_column_width=True)
        else:
            black_image_size = (224, 224)  
            img = Image.new("RGB", black_image_size, "black")

        tweet_content = st.text_input("Enter the Tweet Content: ")
        inferred_company = st.text_input("Enter the Inferred Company: ")
        date_time = st.text_input("Enter the Date and Time (e.g., 2018-01-29 10:51:17): ")

        inputs = blip_processor(img, return_tensors="pt")
        out = blip_model.generate(**inputs)
        image_caption = blip_processor.decode(out[0], skip_special_tokens=True)

        final_text = (
            f"Following is the information about Twitter post."
            f"Caption for Image of post: {image_caption}, "
            f"Text content: {tweet_content}, "
            f"Inferred company: {inferred_company}, "
            f"Date and time: {date_time} "
        )
        
        st.write(final_text)

        submitted = st.form_submit_button("Submit")
        if submitted:
            prediction = predict(final_text)
            st.success(f"Prediction : {prediction}")

if __name__ == "__main__":
    main()

Writing my_app.py


In [4]:
from pyngrok import ngrok
import threading

def run_ngrok():
    ngrok_tunnel = ngrok.connect(8501)
    print(f'Public URL: {ngrok_tunnel.public_url}')
    ngrok_tunnel.block_until_close()

# Start ngrok in a separate thread
ngrok_thread = threading.Thread(target=run_ngrok)
ngrok_thread.start()

In [5]:
!streamlit run --server.port 8501 my_app.py > /dev/null

Exception in thread Thread-4 (run_ngrok):
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_42/567140438.py", line 7, in run_ngrok
AttributeError: 'NgrokTunnel' object has no attribute 'block_until_close'


Public URL: https://6e53-35-237-118-44.ngrok-free.app
tokenizer_config.json: 100%|██████████████████| 28.0/28.0 [00:00<00:00, 210kB/s]
vocab.txt: 100%|█████████████████████████████| 232k/232k [00:00<00:00, 4.98MB/s]
tokenizer.json: 100%|████████████████████████| 466k/466k [00:00<00:00, 8.93MB/s]
config.json: 100%|█████████████████████████████| 483/483 [00:00<00:00, 4.08MB/s]
model.safetensors: 100%|██████████████████████| 268M/268M [00:01<00:00, 255MB/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
preprocessor_config.json: 100%|████████████████| 287/287 [00:00<00:00, 2.05MB/s]
tokenizer_config.json: 100%|███████████████████| 506/506 [00:00<00:00, 4.12MB/s]
vocab.txt: 100%|██████████