<style>
    .white {
        background-color: #FFFFFF;
    }

</style>
<div class="white">
<img src = "fai_gradient_logo.png">
</div>

# Falcons.ai Personal Recall using Microsoft Phi-3-Vision

## 1. Prior to running the remainder of this notebook ensure you have the PHI3 model downloaded to a sub directory 'phi3'
## 2. After which you can comment or delete the cell

In [None]:
# # Define the path where you want to save the model and processor
# model_id = "microsoft/Phi-3-vision-128k-instruct" 
# model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", _attn_implementation='flash_attention_2')
# model_save_path = "./phi3"
# processor_save_path = "./phi3"

# # Save the model with safe serialization
# model.save_pretrained(model_save_path, safe_serialization=False)

# # Save the processor
# processor.save_pretrained(processor_save_path)

In [None]:
import os
import glob
import time
import piexif
from PIL import ImageGrab
screenshot = ImageGrab.grab()
import datetime
import textwrap
import warnings
from datetime import datetime
from PIL import Image 
# Suppress all warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from transformers import AutoProcessor 
from transformers import AutoModelForCausalLM 
from IPython.display import display, Markdown, Latex, JSON

In [None]:
def Image_desc(image_path,prompt='Describe this image in excruciating detail'):
    # I have downloaded the model to a sub-directory titled "phi3"
    model_id = "./phi3" 
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto")
    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) 
    messages = [ 
        {"role": "user", "content": "<|image_1|>\n"+prompt}, 
    ] 
    # I will be using local images
    image = Image.open(image_path) 
    prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(prompt, [image], return_tensors="pt").to("cuda:0") 
    generation_args = { 
        "max_new_tokens": 500, 
        "temperature": 0.0, 
        "do_sample": False, 
    } 
    generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args) 

    # remove input tokens 
    generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
    response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] 
    ## Commented this out so as not to render each iteration
    # plt.imshow(image)
    # wrapped_title = "\n".join(textwrap.wrap(response, width=40)) 
    # plt.title(wrapped_title)
    # plt.axis('off')  # Hide the axis for better visualization
    # plt.show()
    del model
    del processor
    return response 

## File Versioning check

In [None]:
def get_next_versioned_filename(filepath):
    if not os.path.exists(filepath):
        return filepath

    # Split the file into name and extension
    dirname, filename = os.path.split(filepath)
    basename, ext = os.path.splitext(filename)

    # Initialize the version number
    version = 1

    # Find the next available version number
    while True:
        versioned_filename = f"{basename}_v{version}{ext}"
        versioned_filepath = os.path.join(dirname, versioned_filename)
        if not os.path.exists(versioned_filepath):
            break
        version += 1

    return versioned_filepath

## Save image description to log file

In [None]:
def save_image_description(image_path, description):
    # Get today's date
    today = datetime.now().strftime("%Y-%m-%d")
    
    # Create a log file name based on the current date
    log_file_name = f"log_{today}.txt"
    log_file_path = os.path.join(os.path.dirname(image_path), log_file_name)
    
    # Get the base name of the image file without extension
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    
    # Prepare the log entry
    log_entry = f"Image: {base_name}\nDescription: {description}\n\n"
    
    # Write the log entry to the daily log file
    with open(log_file_path, 'a') as log_file:
        log_file.write(log_entry)


## Get last image written

In [None]:
def get_last_png_written():
    # Use glob to find all .png files in the directory
    png_files = glob.glob(os.path.join('./recall/', '*.png'))
    
    if not png_files:
        # Return None if no .png files are found
        return None

    # Get the most recently modified .png file
    last_written_png = max(png_files, key=os.path.getmtime)
    
    return last_written_png

## Check image difference

In [None]:
from PIL import Image, ImageChops

def are_images_different(image_path1, image_path2):
    
    # Open the images
    img1 = Image.open(image_path1)
    img2 = Image.open(image_path2)
    
    # Compare the images using ImageChops.difference
    diff = ImageChops.difference(img1, img2)
    
    # If the images are the same, the difference image will have no non-zero pixels
    if diff.getbbox():
        return True
    else:
        return False


## Recall functionality combined

In [None]:
def run_recall():
    # Get the last modified image in the recall directory
    prev_img = get_last_png_written()
    from PIL import ImageGrab
    # Capture the entire screen
    screenshot = ImageGrab.grab()
    import datetime
    now = datetime.datetime.now()
    datetime_stamp = now.strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"recall/screenshot_{datetime_stamp}.png"
    new_path = get_next_versioned_filename(filename)
    screenshot.save(new_path)
    # Check if the last screenshot and the llatest screenshot are differnet
    if are_images_different(prev_img, new_path):
        # Get image description from PHI3 Vision model
        img_desc = Image_desc(new_path)
        ## Set exif data
        # Define EXIF metadata
        img_desc_bytes = img_desc.encode('utf-8')
        exif_dict = {
            "0th": {
            },
            "Exif": {
                piexif.ExifIFD.UserComment: img_desc_bytes
            }
        }
        # Convert the dictionary to binary data so that it doess not error
        exif_bytes = piexif.dump(exif_dict)
        # Write image path and descriptioni to a log file
        save_image_description(new_path, img_desc)
        targetImage = Image.open(new_path)
        targetImage.save(new_path, exif=exif_bytes)
        # Close the screenshot
        screenshot.close()
        #print('Run normal')
    else:
        # If they are the same nothing has changed and delete the last screenshot to avoid duplicates
        os.remove(new_path)
    return filename

In [None]:
#!pip install pillow piexif


## Start Falcon Recall

In [None]:
# 1 week = 10080 minutes
minutes = 10080
for i in range(minutes):
    run_recall()
    # Take screenshot every 1 minute
    time.sleep(30)

## Check the progreess by looking in the 'recall' dir.
- it will contain a log file for each day.
- the images are saved in a datetime format.
- The log files can then be accessed however you like. 
- - Basic ctrl+f
- - Loaded into a db
- - Embedded into a local Retrieval-Augmented Generation solution


## Check Image metadata

In [None]:

# Open the image
image_path = "./recall/<image name>"
image = Image.open(image_path)

# Extract EXIF data
exif_data = piexif.load(image.info['exif'])

# Extract and print the UserComment tag
user_comment_tag = piexif.ExifIFD.UserComment

if user_comment_tag in exif_data['Exif']:
    user_comment = exif_data['Exif'][user_comment_tag]
    # Decode the UserComment from bytes to string
    user_comment_str = user_comment.decode('utf-8')
    print(f"UserComment: {user_comment_str}")
else:
    print("UserComment tag not found.")
