# Installing all the dependencies 

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install av
!pip install pandas
!pip install transformers
!pip install re
!pip install ast
!pip install requests
!pip install numpy
!pip install torc

# Importing all the necessary libraries and functions

In [None]:
import pandas as pd
import transformers
import av
import re
import ast
import requests
import numpy as np
import torch
from transformers import AutoImageProcessor, AutoTokenizer, VisionEncoderDecoderModel,AutoModel ,BitsAndBytesConfig, AutoModelForCausalLM
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import requests

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Import the test dataset and company-to-sector mapping
Enter path of the test dataset and company-to-sector mapping excel files

In [None]:
ds = pd.read_excel('/kaggle/input/test-ds/test_data.xlsx')
company2sector = pd.read_excel('/kaggle/input/company2sector/company2sector (1).xlsx')

# Load all the models ,tokenizers and image processors

In [None]:
processor_1 = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
tokenizer_vid = AutoTokenizer.from_pretrained("gpt2")
model_vid = VisionEncoderDecoderModel.from_pretrained("Neleac/timesformer-gpt2-video-captioning").to(device)
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model_img = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
model_final = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", quantization_config=bnb_config, device_map="auto")
tokenizer_final = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

# Functions to generate captions ,prompts and final tweet

In [None]:
count = 0
count1 = 0

def video_caption(video_url):
    try:

        urls_with_bitrate = re.findall(r"url='(.*?)', bitrate=(\d+|None)", video_url)


        filtered_urls = [url for url, bitrate in urls_with_bitrate if bitrate.lower() != 'none']
        final_url = filtered_urls[0]
        with requests.get(final_url, stream=True) as response:
            response.raise_for_status()
            with open("downloaded_video.mp4", "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)

        container = av.open("downloaded_video.mp4")
        seg_len = container.streams.video[0].frames
        clip_len = model_vid.config.encoder.num_frames
        indices = set(np.linspace(0, seg_len, num=clip_len, endpoint=False).astype(np.int64))
        frames = []
        container.seek(0)

        for i, frame in enumerate(container.decode(video=0)):
            if i in indices:
                frames.append(frame.to_ndarray(format="rgb24"))
        gen_kwargs = {
            "min_length": 10,
            "max_length": 30,
            "num_beams": 4,
        }
        pixel_values = processor_1(frames, return_tensors="pt").pixel_values.to(device)
        tokens = model_vid.generate(pixel_values, **gen_kwargs)
        return tokenizer_vid.batch_decode(tokens, skip_special_tokens=True)[0]
    except Exception as e:
        print(e)
        return "blank"
    
def img_caption(img_url):
    try:
        image_data_ast = ast.parse(img_url)
        last_url = None

        for node in ast.walk(image_data_ast):
            if isinstance(node, ast.Str) and "https://" in node.s:
                last_url = node.s
        raw_image = Image.open(requests.get(last_url, stream=True).raw).convert('RGB')
        inputs = processor(raw_image, return_tensors="pt").to(device)
        return processor.decode(model_img.generate(**inputs)[0], skip_special_tokens=True)
    except Exception as e:
        print(e)
        return "blank"


def captions(url):
    global count
    count+=1
    if (count%500 == 499):
        print(count)
    if url[1] == 'V' or url[1] == 'G':
        return video_caption(url)
    else:
        return img_caption(url)
        
def get_sector(company):
    return company2sector[company2sector['Company']== company]['Sector'].values[0]

def get_prompt(row):
    if row['caption'] != 'blank':
        result=(
            "Company : " + row['inferred company'] + "\n" +
            "Sector : " + row['sector'] + "\n" +
            "Likes : " + str(row['likes']) + "\n" +
            "Image/Video Description : " + row['caption'] + "\n" +
            "Goal: Generate a tweet text to increase brand awareness and to maximize the likes.The output only the tweet text.")
    else:
        result=(
            "Company : " + row['inferred company'] + "\n" +
            "Sector : " + row['sector'] + "\n" +
            "Goal: Generate a tweet text to increase brand awareness and to maximize the likes.The output should consist only of tweet text ")
    return result

def gen_tweet(prompt):
    global count1
    count1+=1
    messages = [
        {"role": "user", "content": prompt}
    ]


    encodeds = tokenizer_final.apply_chat_template(messages, return_tensors="pt")

    model_inputs = encodeds.to(device)


    generated_ids = model_final.generate(model_inputs, max_new_tokens=200, do_sample=True ,pad_token_id= tokenizer_final.eos_token_id)
    decoded = tokenizer_final.batch_decode(generated_ids)
    if count1%500 == 499:
        print(count1)
    return re.findall(r'\[/INST\](.*?)<\/s>', decoded[0])



## Generating captions for the Video/Image/GIF

In [None]:
ds.loc[:, 'caption'] = ds['media'].apply(captions)

## Writing the sectors in which the company works in the dataframe

In [None]:
ds.loc[:, 'sector'] = ds['inferred company'].apply(get_sector)

## Generating prompts for the LLM to generate tweet

In [None]:
ds['prompt'] = ds.apply(get_prompt, axis=1)

## Generating outputs from the prompts

In [None]:
ds.loc[:, 'output'] = ds['prompt'].apply(gen_tweet)

## Saving the final dataframe in the form of excel

In [None]:
ds.to_excel('submission2.xlsx')