In [1212]:
#!pip install ollama
#!pip install opencv-python
#!pip install -U matplotlib
#!pip install timm
#!pip install gTTS
#!pip install playsound
!pip install pyttsx3

Collecting pyttsx3
  Downloading pyttsx3-2.98-py3-none-any.whl.metadata (3.8 kB)
Collecting comtypes (from pyttsx3)
  Downloading comtypes-1.4.10-py3-none-any.whl.metadata (7.2 kB)
Collecting pypiwin32 (from pyttsx3)
  Downloading pypiwin32-223-py3-none-any.whl.metadata (236 bytes)
Downloading pyttsx3-2.98-py3-none-any.whl (34 kB)
Downloading comtypes-1.4.10-py3-none-any.whl (241 kB)
Downloading pypiwin32-223-py3-none-any.whl (1.7 kB)
Installing collected packages: pypiwin32, comtypes, pyttsx3
Successfully installed comtypes-1.4.10 pypiwin32-223 pyttsx3-2.98


In [1214]:
#Ollama
import ollama

#MiDas Model PyTorch
import cv2
import torch
import urllib.request

#Matplotlib
import matplotlib.pyplot as plt
import statistics

#Numpy
import numpy as np
from scipy import ndimage

#TTS
from gtts import gTTS
import os
import playsound
import datetime 
import pyttsx3

In [4]:
#Select Model
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

#Load Model
midas = torch.hub.load("intel-isl/MiDaS", model_type)

#Send to CPU
device = torch.device("cpu")
midas.to(device)
midas.eval()

Using cache found in C:\Users\canad/.cache\torch\hub\intel-isl_MiDaS_master


DPTDepthModel(
  (pretrained): Module(
    (model): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_featur

In [5]:
#Load Transformation/Resize
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

Using cache found in C:\Users\canad/.cache\torch\hub\intel-isl_MiDaS_master


In [6]:
#Create Custom Guide Model
#modelfile = open("Modelfile.md", 'r')
model = "makeUofT_guide_model"
personality = "Be concise. Limit replies to 1-2 sentences."
ollama.create(model=model, from_='llama3.2-vision', system=personality)
#modelfile.close()

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [1056]:
#Load Image and Apply Transformers
def process_image(img):
    image_id = f"ESP32-CAM-Images\\{img}.jpg"
    img = cv2.imread(image_id)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    input_batch = transform(img).to(device)    

    #Predict and Resize to Original Resolution
    with torch.no_grad():
        prediction = midas(input_batch)

        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=True,
        ).squeeze()
    output = prediction.cpu().numpy()    

    plt.xticks([])
    plt.yticks([])
    plt.imshow(output)
    plt.savefig("Depth_Map", bbox_inches = 'tight', pad_inches = 0)
    plt.close()

In [1005]:
def crop_img(img, width_kept=50):
    center_x, center_y = img.shape[1] / 2, img.shape[0] / 2
    left_x, right_x = center_x - width_kept, center_x + width_kept
    top_y, bottom_y = center_y, center_y * 2
    img_cropped = img[int(top_y):int(bottom_y), int(left_x):int(right_x)]
    return img_cropped

def create_cropped_img():
    img = cv2.imread("Depth_Map.png")
    img = crop_img(img)

    plt.xticks([])
    plt.yticks([])
    plt.imshow(img)
    plt.savefig("Depth_Map_2", bbox_inches = 'tight', pad_inches = 0)
    plt.close()

In [1011]:
def is_path_clear(depth_img, gradient_threshold=10, edge_ratio_threshold=0.05):
    """
    Determines if a depth map has a smooth gradient (i.e., no obstacle in front) or not.

    Args:
        depth_img (numpy.ndarray): Depth map image loaded using cv2.imread("Depth_Map.png").
                                   It can be a grayscale or BGR image.
        gradient_threshold (float): The gradient magnitude above which a pixel is considered an edge.
        edge_ratio_threshold (float): The fraction of edge pixels above which the depth map is considered to have an obstacle.

    Returns:
        bool: True if the gradient is smooth (i.e., no obstacle), False if there is an obstacle.
    """
    # Convert to grayscale if the image is in color.
    if len(depth_img.shape) == 3:
        gray = cv2.cvtColor(depth_img, cv2.COLOR_BGR2GRAY)
    else:
        gray = depth_img.copy()
    
    # Compute gradients using the Sobel operator.
    grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    
    # Calculate the gradient magnitude.
    grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)
    
    # Determine how many pixels have a high gradient (indicative of an abrupt change in depth).
    edge_pixels = np.sum(grad_magnitude > gradient_threshold)
    total_pixels = gray.size
    edge_ratio = edge_pixels / total_pixels

    # Debug prints (optional)
    # print(f"Edge pixels: {edge_pixels}, Total pixels: {total_pixels}, Edge ratio: {edge_ratio:.3f}")
    
    # If a significant portion of the image has a high gradient, assume an obstacle is present.
    if edge_ratio > edge_ratio_threshold:
        return False  # Obstacle detected
    else:
        return True   # Smooth gradient, no obstacle detected

In [1077]:
def is_obstacle(img):
    process_image(img)
    create_cropped_img()
    depth_map = cv2.imread("Depth_Map_2.png")
    if is_path_clear(depth_map, gradient_threshold=8.5, edge_ratio_threshold=0.045):
        return False
    else:
        return True

#Test Images
"""
state_list = [True,False,False,True,True,True,True,True,True,False,False,False,False]
image_list = [[f"TestImage{i}",state_list[i]] for i in range(len(state_list))]
counter = 0
for img in image_list:
    detection = is_obstacle(img[0])
    print(f"Comparing {img[0]}")
    if detection == img[1]:
        counter += 1
        print(f"Expected: {img[1]} - Recieved:{detection}")
    else:
        print(f"Expected: {img[1]} - Recieved:{detection}")
print(counter)
"""

'\nstate_list = [True,False,False,True,True,True,True,True,True,False,False,False,False]\nimage_list = [[f"TestImage{i}",state_list[i]] for i in range(len(state_list))]\ncounter = 0\nfor img in image_list:\n    detection = is_obstacle(img[0])\n    print(f"Comparing {img[0]}")\n    if detection == img[1]:\n        counter += 1\n        print(f"Expected: {img[1]} - Recieved:{detection}")\n    else:\n        print(f"Expected: {img[1]} - Recieved:{detection}")\nprint(counter)\n'

In [1242]:
def speak_text(mytext):
    engine = pyttsx3.init()
    engine.say(mytext)
    engine.runAndWait()

In [1244]:
if is_obstacle("TestImage3"):
    conclusion_text = "OBSTACLE WARNING"
else:
    conclusion_text = "PATHWAY CLEAR"

speak_text(conclusion_text)

In [521]:
#Create Response
response = ollama.chat(
    model='makeUofT_guide_model',
    messages=[{
        'role': 'user',
        'content': 'I have provided a depth map of how near objects are from my position. If I walk directly forward one meter, will I run into an object? Reply yes or no.',
        'images': ["Depth_Map.png"]
    }]
)

print(response)

model='makeUofT_guide_model' created_at='2025-02-16T10:21:31.0679604Z' done=True done_reason='stop' total_duration=265452295600 load_duration=65779368400 prompt_eval_count=64 prompt_eval_duration=184064000000 eval_count=3 eval_duration=12889000000 message=Message(role='assistant', content='No.', images=None, tool_calls=None)
