In [None]:
import torch
torch.cuda.empty_cache()
print(f"Free Memory: {torch.cuda.memory_reserved() / 1024 ** 2:.2f} MB")


In [1]:
import textwrap
import torch
from transformers import AutoTokenizer
from PIL import Image
import importlib.util
import sys
import time  # Import time module to introduce delay

model_dir = '/home/raw/Desktop/Coding/military_int_icc/shakti-2B-041224'
sys.path.append(model_dir)

# Dynamically import the configuration and model
config_module_path = f"{model_dir}/configuration_shakti.py"
model_module_path = f"{model_dir}/modeling_shakti.py"

# Load the configuration module dynamically
spec_config = importlib.util.spec_from_file_location("shaktiConfig", config_module_path)
config_module = importlib.util.module_from_spec(spec_config)
sys.modules["shaktiConfig"] = config_module
spec_config.loader.exec_module(config_module)

# Load the model module dynamically
spec_model = importlib.util.spec_from_file_location("shaktiModel", model_module_path)
model_module = importlib.util.module_from_spec(spec_model)
sys.modules["shaktiModel"] = model_module
spec_model.loader.exec_module(model_module)

# Now you can use the classes from the dynamically loaded modules
from shaktiConfig import shaktiConfig
from shaktiModel import shaktiModel

# Load the custom model configuration
config = shaktiConfig.from_pretrained(model_dir)

# Load the custom model using the configuration
model = shaktiModel.from_pretrained(model_dir, config=config, attn_implementation='sdpa', torch_dtype=torch.half)
model.eval().cuda()

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# Initialize the processor (if custom processor function is provided in the model)
processor = model.init_processor(tokenizer)




  from .autonotebook import tqdm as notebook_tqdm
HyperChakravyuhaForCausalLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


In [None]:

# # Load and process image
# image = Image.new('RGB', (500, 500), color='red')

# List of queries which user can enter 
queries = [
    """
Give me complete corrected lines using below context:
Detected Text: State the universal law ak gravilalion. - Confidence: 0.9311787486076355
Detected Text: a force called he gravilalional korce. the korce acling - Confidence: 0.8632688522338867
Detected Text: Whai do you mean by free fall? - Confidence: 0.9386471509933472
Detected Text: tonce. Hhe maion at dhe object is said lo have free fall. - Confidence: 0.8472165465354919
Detected Text: What do youmean by accelenalion due lo giavidu? - Confidence: 0.8162661790847778
Detected Text: When an object kalls'Howands the graund Knom a height, - Confidence: 0.8196004033088684
Detected Text: Velocity praduces accelenalion in ihe obiect yhis - Confidence: 0.859413206577301
Detected Text: acceleralian in Known ar acceleralion due to gravly.Ai - Confidence: 0.8293883800506592
Detected Text: value is gwven by g.8 ml - Confidence: 0.8043153285980225
Detected Text: Ik the moon allnaci the eailh;why does the failh - Confidence: 0.8403022289276123
Detected Text: nat mave dowatds Jhe moon ? - Confidence: 0.8502532839775085
Detected Text: yhe Eaih and the moon expencences equlal gravitalionad - Confidence: 0.8557673096656799
Detected Text: tonce txom each othen.However, the may of The Eailh - Confidence: 0.8026825189590454
Detected Text: W much langen than The may ok the moon. Hence, it - Confidence: 0.8428501486778259
Detected Text: accelerale at a saile much mare Than the acceleralion - Confidence: 0.8634287714958191
Detected Text: nate af the moon iowandr the eanth Fon this rearon - Confidence: 0.8300524950027466
Detected Text: the Eailh doesn't move lowands the moon - Confidence: 0.8661238551139832
"""
]
# # Set the maximum width for the image
# max_width = 300  # Desired maximum width

# # Calculate the new height while maintaining the aspect ratio
# aspect_ratio = image.height / image.width
# new_width = min(image.width, max_width)  # Ensure the width doesn't exceed max_width
# new_height = int(new_width * aspect_ratio)

# # Resize the image while maintaining its aspect ratio
# resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)

# # Display the resized image in the Jupyter Notebook
# display(resized_image)

# Process each query
for idx, query in enumerate(queries, start=1):
    print(f"Query {idx}: {query}")
    # Prepare messages
    messages = [
        {"role": "user", "content": query},
        {"role": "assistant", "content": ""}
    ]

    print("Final text message jo model me jaayega: ", messages)

    # Process inputs
    inputs = processor(messages, images= None, videos=None)
    inputs.to('cuda')

    # Update input parameters
    inputs.update({
        'tokenizer': tokenizer,
        'max_new_tokens': 100,
        'decode_text': True,
    })

    # Generate description
    output = model.generate(**inputs)

    # Print the answer
    print("Answer:")
    for sentence in output[0].split('.'):
        if sentence.strip():  # Avoid empty lines caused by trailing periods
            wrapped_text = textwrap.fill(sentence.strip(), width=100)  # Adjust line width as needed
            print(wrapped_text)
    print("\n" + "-" * 80 + "\n")  # Separator between queries

    # Clear the GPU memory after 5 seconds
    # time.sleep(5)  # Wait for 5 seconds
    # torch.cuda.empty_cache()  # Clear the CUDA cache

    # Optionally, you can check memory usage after clearing the cache
    # print("GPU Memory Cleared")
    # print(f"Free Memory: {torch.cuda.memory_reserved() / 1024 ** 2:.2f} MB")



Query 1: 
Give me complete corrected lines using below context:
Detected Text: State the universal law ak gravilalion. - Confidence: 0.9311787486076355
Detected Text: a force called he gravilalional korce. the korce acling - Confidence: 0.8632688522338867
Detected Text: Whai do you mean by free fall? - Confidence: 0.9386471509933472
Detected Text: tonce. Hhe maion at dhe object is said lo have free fall. - Confidence: 0.8472165465354919
Detected Text: What do youmean by accelenalion due lo giavidu? - Confidence: 0.8162661790847778
Detected Text: When an object kalls'Howands the graund Knom a height, - Confidence: 0.8196004033088684
Detected Text: Velocity praduces accelenalion in ihe obiect yhis - Confidence: 0.859413206577301
Detected Text: acceleralian in Known ar acceleralion due to gravly.Ai - Confidence: 0.8293883800506592
Detected Text: value is gwven by g.8 ml - Confidence: 0.8043153285980225
Detected Text: Ik the moon allnaci the eailh;why does the failh - Confidence: 0.8403022

UnboundLocalError: local variable 'image_tensor_list' referenced before assignment

: 

  from .autonotebook import tqdm as notebook_tqdm
HyperChakravyuhaForCausalLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


Query 1:
An error occurred: local variable 'image_tensor_list' referenced before assignment


: 

In [None]:
# import textwrap
# import torch
# from transformers import AutoTokenizer
# from PIL import Image
# import importlib.util
# import sys
# import time
# from typing import List, Optional, Union

# def setup_model(model_dir: str):
#     """
#     Set up and initialize the model, tokenizer, and processor
#     """
#     sys.path.append(model_dir)

#     # Dynamically import configuration and model
#     config_module_path = f"{model_dir}/configuration_shakti.py"
#     model_module_path = f"{model_dir}/modeling_shakti.py"

#     # Load configuration module
#     spec_config = importlib.util.spec_from_file_location("shaktiConfig", config_module_path)
#     config_module = importlib.util.module_from_spec(spec_config)
#     sys.modules["shaktiConfig"] = config_module
#     spec_config.loader.exec_module(config_module)

#     # Load model module
#     spec_model = importlib.util.spec_from_file_location("shaktiModel", model_module_path)
#     model_module = importlib.util.module_from_spec(spec_model)
#     sys.modules["shaktiModel"] = model_module
#     spec_model.loader.exec_module(model_module)

#     # Import required classes
#     from shaktiConfig import shaktiConfig
#     from shaktiModel import shaktiModel

#     # Initialize model components
#     config = shaktiConfig.from_pretrained(model_dir)
#     model = shaktiModel.from_pretrained(
#         model_dir, 
#         config=config, 
#         attn_implementation='sdpa', 
#         torch_dtype=torch.half
#     )
#     model.eval().cuda()

#     # Initialize tokenizer
#     tokenizer = AutoTokenizer.from_pretrained(model_dir)

#     # Initialize processor
#     processor = model.init_processor(tokenizer)

#     return model, tokenizer, processor

# def process_inputs(
#     model,
#     tokenizer,
#     processor,
#     images: Optional[Union[str, List[str]]] = None,
#     query: Optional[str] = None,
#     max_new_tokens: int = 100,
#     clear_gpu_memory: bool = False,
#     gpu_clear_delay: int = 5
# ) -> List[str]:
#     """
#     Process inputs based on different scenarios:
#     1. Multiple images with one query
#     2. Single image with one query
#     3. Image(s) only (generates general description)
#     4. Query only (generates text response)
#     """
#     # Convert single image path to list
#     if isinstance(images, str):
#         images = [images]
    
#     # Process images if provided
#     processed_images = []
#     if images:
#         for img_path in images:
#             try:
#                 img = Image.open(img_path).convert("RGB")
#                 processed_images.append(img)
#             except Exception as e:
#                 print(f"Error loading image {img_path}: {str(e)}")
#                 continue
    
#     # Prepare default query if none provided
#     if not query and processed_images:
#         query = "What can you see in the image(s) in detail?"
    
#     # Prepare messages based on scenario
#     messages = []
#     if processed_images and query:
#         messages = [
#             {"role": "user", "content": f"<|image|> {query}"},
#             {"role": "assistant", "content": ""}
#         ]
#     elif processed_images:
#         messages = [
#             {"role": "user", "content": "<|image|> Please describe what you see."},
#             {"role": "assistant", "content": ""}
#         ]
#     elif query:
#         messages = [
#             {"role": "user", "content": query},
#             {"role": "assistant", "content": ""}
#         ]
#     else:
#         raise ValueError("Either images or query must be provided")

#     # Process inputs
#     inputs = processor(
#         messages,
#         images=processed_images if processed_images else None,
#         videos=None
#     )
#     inputs.to('cuda')

#     # Update input parameters
#     inputs.update({
#         'tokenizer': tokenizer,
#         'max_new_tokens': max_new_tokens,
#         'decode_text': True,
#     })

#     # Generate response
#     outputs = []
#     try:
#         output = model.generate(**inputs)
        
#         # Format and store the response
#         if isinstance(output, list):
#             for response in output:
#                 formatted_response = []
#                 for sentence in response.split('.'):
#                     if sentence.strip():
#                         wrapped_text = textwrap.fill(sentence.strip(), width=100)
#                         formatted_response.append(wrapped_text)
#                 outputs.append('\n'.join(formatted_response))
#         else:
#             formatted_response = []
#             for sentence in output.split('.'):
#                 if sentence.strip():
#                     wrapped_text = textwrap.fill(sentence.strip(), width=100)
#                     formatted_response.append(wrapped_text)
#             outputs.append('\n'.join(formatted_response))

#     except Exception as e:
#         print(f"Error generating response: {str(e)}")
#         outputs.append(f"Error: {str(e)}")

#     # Clear GPU memory if requested
#     if clear_gpu_memory:
#         time.sleep(gpu_clear_delay)
#         torch.cuda.empty_cache()
#         print("GPU Memory Cleared")
#         print(f"Free Memory: {torch.cuda.memory_reserved() / 1024 ** 2:.2f} MB")

#     return outputs

# def display_image(image_path: str, max_width: int = 300):
#     """
#     Display image with specified maximum width while maintaining aspect ratio
#     """
#     image = Image.open(image_path).convert("RGB")
#     aspect_ratio = image.height / image.width
#     new_width = min(image.width, max_width)
#     new_height = int(new_width * aspect_ratio)
#     resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
#     display(resized_image)

# def main():
#     # Initialize model and components
#     model_dir = '/home/raw/Desktop/Coding/military_int_icc/shakti-2B-041224'
#     model, tokenizer, processor = setup_model(model_dir)

#     # Example usage for different scenarios
#     try:
#         # Scenario 1: Multiple images with one query
#         images = ['/home/raw/Downloads/11.jpg', '/home/raw/Downloads/12.jpg']
#         query = "What objects can you identify in these images?"
#         print("\nProcessing multiple images with query...")
#         for img in images:
#             display_image(img)
#         responses = process_inputs(model, tokenizer, processor, images=images, query=query)
#         for i, response in enumerate(responses, 1):
#             print(f"\nResponse {i}:")
#             print(response)
#             print("-" * 80)

#         # Scenario 2: Single image with query
#         print("\nProcessing single image with query...")
#         display_image(images[0])
#         response = process_inputs(
#             model, tokenizer, processor,
#             images=images[0],
#             query="Describe this image in detail."
#         )
#         print("\nResponse:")
#         print(response[0])
#         print("-" * 80)

#         # Scenario 3: Image only
#         print("\nProcessing image only...")
#         display_image(images[0])
#         response = process_inputs(model, tokenizer, processor, images=images[0])
#         print("\nResponse:")
#         print(response[0])
#         print("-" * 80)

#         # Scenario 4: Query only
#         print("\nProcessing query only...")
#         response = process_inputs(
#             model, tokenizer, processor,
#             query="What are the potential implications of this situation?"
#         )
#         print("\nResponse:")
#         print(response[0])
#         print("-" * 80)

#     except Exception as e:
#         print(f"Error in main execution: {str(e)}")

# if __name__ == "__main__":
#     main()

In [None]:
# import textwrap
# import torch
# from transformers import AutoTokenizer
# from PIL import Image
# import importlib.util
# import sys
# import os  # Import os module to manage processes
# import time  # Import time module to introduce delay

# model_dir = '/home/raw/Desktop/Coding/military_int_icc/shakti-2B-041224'
# sys.path.append(model_dir)

# # Dynamically import the configuration and model
# config_module_path = f"{model_dir}/configuration_shakti.py"
# model_module_path = f"{model_dir}/modeling_shakti.py"

# # Load the configuration module dynamically
# spec_config = importlib.util.spec_from_file_location("shaktiConfig", config_module_path)
# config_module = importlib.util.module_from_spec(spec_config)
# sys.modules["shaktiConfig"] = config_module
# spec_config.loader.exec_module(config_module)

# # Load the model module dynamically
# spec_model = importlib.util.spec_from_file_location("shaktiModel", model_module_path)
# model_module = importlib.util.module_from_spec(spec_model)
# sys.modules["shaktiModel"] = model_module
# spec_model.loader.exec_module(model_module)

# # Now you can use the classes from the dynamically loaded modules
# from shaktiConfig import shaktiConfig
# from shaktiModel import shaktiModel

# # Load the custom model configuration
# config = shaktiConfig.from_pretrained(model_dir)

# # Load the custom model using the configuration
# model = shaktiModel.from_pretrained(model_dir, config=config, attn_implementation='sdpa', torch_dtype=torch.half)
# model.eval().cuda()

# # Initialize the tokenizer
# tokenizer = AutoTokenizer.from_pretrained(model_dir)

# # Initialize the processor (if custom processor function is provided in the model)
# processor = model.init_processor(tokenizer)

# # Load and process image
# image = Image.open('/home/raw/Downloads/11.jpg').convert("RGB")

# # List of queries which user can enter 
# queries = [
#     "What can you see in the image in detail?",
#     "How can we prevent it?",
#     "Give me a breif",
# ]

# # Set the maximum width for the image
# max_width = 300  # Desired maximum width

# # Calculate the new height while maintaining the aspect ratio
# aspect_ratio = image.height / image.width
# new_width = min(image.width, max_width)  # Ensure the width doesn't exceed max_width
# new_height = int(new_width * aspect_ratio)

# # Resize the image while maintaining its aspect ratio
# resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)

# # Display the resized image in the Jupyter Notebook
# display(resized_image)


In [None]:

# # Process each query
# for idx, query in enumerate(queries, start=1):
#     print(f"Query {idx}: {query}")
#     # Prepare messages
#     messages = [
#         {"role": "user", "content": f"""<|image|>
# {query}"""},
#         {"role": "assistant", "content": ""}
#     ]

#     # Process inputs
#     inputs = processor(messages, images=[image], videos=None)
#     inputs.to('cuda')

#     # Update input parameters
#     inputs.update({
#         'tokenizer': tokenizer,
#         'max_new_tokens': 500,
#         'decode_text': True,
#     })

#     # Generate description
#     output = model.generate(**inputs)

#     # Clear the GPU memory after 5 seconds
#     time.sleep(5)  # Wait for 5 seconds
#     torch.cuda.empty_cache()  # Clear the CUDA cache

#     # # Optionally, kill the process to free GPU memory
#     # current_pid = os.getpid()  # Get the process ID of the current script
#     # print(f"Killing process with PID: {current_pid}")
#     # os.system(f"kill -9 {current_pid}")  # Force kill the current process
#     # break  # Exit after killing the process to avoid unnecessary iterations



In [None]:
# from model_loader import model, tokenizer, processor  # Importing from model_loader.py
# import asyncio
# from fastapi import FastAPI, UploadFile, Form, HTTPException
# from fastapi.responses import JSONResponse
# from fastapi.middleware.cors import CORSMiddleware
# from pydantic import BaseModel
# from PIL import Image
# import torch
# from transformers import AutoTokenizer
# import importlib.util
# import sys
# import os

# # Initialize FastAPI app
# app = FastAPI()

# # Add CORS middleware
# # app.add_middleware(
# #     CORSMiddleware,
# #     allow_origins=["*"],  # Change "*" to specific domains for production
# #     allow_credentials=True,
# #     allow_methods=["*"],
# #     allow_headers=["*"],
# # )


# @app.post("/analyze/")
# async def analyze(query: str = Form(...), image: UploadFile = None):
#     try:
#         # Process the image if provided
#         img = None
#         if image:
#             img = Image.open(image.file).convert("RGB")
#             print("Image loaded successfully:", img.size)

#         # Prepare messages
#         messages = [
#             {"role": "user", "content": f"""<|image|>
# {query}"""},
#             {"role": "assistant", "content": ""}
#         ]

#         print("Query:", query)
#         print("Image:", img)

#         # Process inputs
#         inputs = processor(messages, images=[img] if img else None, videos=None)
#         inputs.to('cuda')
#         inputs.update({
#             'tokenizer': tokenizer,
#             'max_new_tokens': 500,
#             'decode_text': True,
#         })

#         # Generate the response
#         print("Output generation is started. ")
#         output = model.generate(**inputs)

#         print("Output generation is ended. ")
#         print(output[0])

#         # Send the output
#         return JSONResponse(content={"response": output[0]})
#     except Exception as e:
#         print("Error:", str(e))
#         raise HTTPException(status_code=500, detail=str(e))






























