In [None]:
import requests
from bs4 import BeautifulSoup
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import time

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
# Initialize the Chrome webdriver
options = Options()
options.add_argument("--headless")  # Run Chrome in headless mode
driver = webdriver.Chrome(options=options)

In [None]:
def download_images(base_url, folder_path, total_pages=1):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    img_count = 1

    # Initialize the Chrome webdriver
    options = Options()
    options.add_argument("--headless")  # Run Chrome in headless mode
    driver = webdriver.Chrome(options=options)

    try:
        for page in range(1, total_pages + 1):
            # Construct the URL for the current page
            url = f"{base_url}&page={page}"
            print(f"Scraping page: {page}")

            # Load the page
            driver.get(url)
            time.sleep(2)  # Add a delay to allow time for the page to load dynamically

            # Scroll down the page to load all images
            last_height = driver.execute_script("return document.body.scrollHeight")
            while True:
                # Scroll down to bottom
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

                # Wait to load page
                time.sleep(2)

                # Calculate new scroll height and compare with last scroll height
                new_height = driver.execute_script("return document.body.scrollHeight")
                if new_height == last_height:
                    break
                last_height = new_height

            # Extract the HTML content after all images have been loaded
            html_content = driver.page_source

            # Parse the HTML content
            soup = BeautifulSoup(html_content, 'html.parser')

            # Find the div with class 'mui-1kkefsa-gridContainer-root'
            grid_container = soup.find('div', class_='mui-1kkefsa-gridContainer-root')
            if not grid_container:
                print(f"No images found on page {page}")
                continue

            # Find all divs with the specified data-automation attribute
            divs = grid_container.find_all('div', {'data-automation': 'AssetGrids_GridItemContainer_div'})
            if not divs:
                print(f"No images found on page {page}")
                continue

            # Download and save each image
            for div in divs:
                picture = div.find('picture')
                if picture:
                    img_tag = picture.find('img')
                    if img_tag and img_tag['src']:
                        img_url = img_tag['src']
                        try:
                            img_data = requests.get(img_url).content
                            img_name = f"img{img_count}.jpg"
                            img_path = os.path.join(folder_path, img_name)
                            with open(img_path, 'wb') as f:
                                f.write(img_data)
                            print(f"Downloaded {img_name} from {img_url}")
                            img_count += 1
                        except Exception as e:
                            print(f"Failed to download image from {img_url}: {e}")
    finally:
        # Close the webdriver
        driver.quit()

In [None]:
# # Base URL of the website with the images (excluding the page parameter)
# base_url = "https://www.shutterstock.com/search/icu-doctor-patient?image_type=photo&mreleased=true&orientation=horizontal"
# # Folder path to save the images
# folder_path = "DoctorImages"

# # Total number of pages to scrape
# total_pages = 11

# # Call the function to download images
# download_images(base_url, folder_path, total_pages)

In [None]:
# Base URL of the website with the images (excluding the page parameter)
base_url = "https://www.shutterstock.com/search/doctor-and-patient-in-icu?mreleased=true"
# Folder path to save the images
folder_path = "DoctorImages"

# Total number of pages to scrape
total_pages = 13

# Call the function to download images
download_images(base_url, folder_path, total_pages)

In [None]:
# downloading labelImg

# !pip3 install labelImg
import labelImg
!labelImg

In [None]:
import json
import os

def convert_to_yolo_format(json_dir, images_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    for json_file in os.listdir(json_dir):
        # if json_file.endswith('.json'):
        with open(os.path.join(json_dir, json_file)) as f:
            data = json.load(f)
        
        image_filename = data['task']['data']['image'].split('=')[1].replace('%5C', '/')
        print(image_filename)
        image_path = os.path.join(image_filename)
        if not os.path.exists(image_path):
            print(f"Image {image_path} not found, skipping.")
            continue

        if 'result' not in data or not data['result']:
            print(f"image {image_path} has no annotations, skipping")
            continue

        width = data['result'][0]['original_width']
        height = data['result'][0]['original_height']

        yolo_annotations = []
        for annotation in data['result']:
            label = annotation['value']['rectanglelabels'][0]
            class_id = 0 if label == 'doctor' else 1  # Assuming 'doctor' is 0 and 'patient' is 1
            x_center = (annotation['value']['x'] + annotation['value']['width'] / 2) / 100
            y_center = (annotation['value']['y'] + annotation['value']['height'] / 2) / 100
            w = annotation['value']['width'] / 100
            h = annotation['value']['height'] / 100

            yolo_annotations.append(f"{class_id} {x_center} {y_center} {w} {h}")

        image_filename = data['task']['data']['image'].split('=')[1].replace('/', '%5C')
        yolo_file_path = output_dir + "/" + f"{os.path.splitext(image_filename)[0]}.txt"
        # yolo_file_path = os.path.join(output_dir, f"{os.path.splitext(image_filename)[0]}.txt")
        print(yolo_file_path)
        with open(yolo_file_path, 'w') as f:
            f.write("\n".join(yolo_annotations))
        
        # Copy image to output directory
        os.system(f"cp {image_path} {output_dir}")

json_dir = 'LabeledDoctor'
images_dir = 'DoctorImages'
output_dir = 'AnnotationsYolo'

convert_to_yolo_format(json_dir, images_dir, output_dir)


In [None]:
import os
import shutil
import random
import json

def split_dataset(json_dir, image_dir, output_dir, split_ratio=0.8):
    os.makedirs(os.path.join(output_dir, 'train', 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'train', 'labels'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'test', 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'test', 'labels'), exist_ok=True)
    
    json_files = [f for f in os.listdir(json_dir)]
    # json_files = json_files.sort()
    print(json_files)
    random.shuffle(json_files)
    # print(len(json_files))
    
    split_point = int(len(json_files) * split_ratio)
    train_files = json_files[:split_point]
    test_files = json_files[split_point:]
    
    return train_files, test_files

json_dir = 'LabeledDoctor'
image_dir = 'ICU season 1'
output_dir = 'ICU_show_images'

train_files, test_files = split_dataset(json_dir, image_dir, output_dir)
print(train_files)
print(test_files)

In [None]:
import os
import json

def convert_to_yolo_format(json_files, json_dir, image_dir, output_dir):
    for json_file in json_files:
        with open(os.path.join(json_dir, json_file)) as f:
            data = json.load(f)
        
        image_filename = data['task']['data']['image'].split('=')[1]
        image_path = os.path.join(image_filename.replace('%5C', '/'))
        if not os.path.exists(image_path):
            print(f"Image {image_path} not found, skipping.")
            continue

        if 'result' not in data or not data['result']:
            print(f"Image {image_path} has no annotations, skipping.")
            continue

        width = data['result'][0]['original_width']
        height = data['result'][0]['original_height']

        yolo_annotations = []
        for annotation in data['result']:
            label = annotation['value']['rectanglelabels'][0]
            class_id = 0 if label == 'doctor' else 1  # Assuming 'doctor' is 0 and 'patient' is 1
            x_center = (annotation['value']['x'] + annotation['value']['width'] / 2) / 100
            y_center = (annotation['value']['y'] + annotation['value']['height'] / 2) / 100
            w = annotation['value']['width'] / 100
            h = annotation['value']['height'] / 100

            yolo_annotations.append(f"{class_id} {x_center} {y_center} {w} {h}")

        image_filename = data['task']['data']['image'].split('=')[1].replace('/', '%5C')
        yolo_file_path = os.path.join(output_dir, 'labels', f"{os.path.splitext(image_filename)[0]}.txt")
        with open(yolo_file_path, 'w') as f:
            f.write("\n".join(yolo_annotations))
        
        # Copy image to output directory
        shutil.copy(image_path, os.path.join(output_dir, 'images', image_filename))


json_dir = 'LabeledDoctor'
image_dir = 'DoctorImages'
output_dir = 'New_doctor_images'

# Process training data
train_output_dir = os.path.join(output_dir, 'train')
convert_to_yolo_format(train_files, json_dir, image_dir, train_output_dir)

# Process testing data
test_output_dir = os.path.join(output_dir, 'test')
convert_to_yolo_format(test_files, json_dir, image_dir, test_output_dir)

In [None]:
import os

data_path = 'New_doctor_images/data.yaml'

# Check if the data file exists
if os.path.exists(data_path):
    print(f"{data_path} exists.")
else:
    print(f"{data_path} does not exist.")

# Check if train and val directories exist
train_dir = 'New_doctor_images/train/images'
val_dir = 'New_doctor_images/test/images'

if os.path.exists(train_dir) and os.path.exists(val_dir):
    print("Train and validation directories exist.")
else:
    print("One or both directories do not exist.")

In [None]:
from ultralytics import YOLO
import os

# Define paths
data_path = 'C:/Mini Project/Intel Project/datasets/New_doctor_images/data.yaml'
print(f"Data path: {os.path.abspath(data_path)}")  # Print absolute path to data.yaml

# Create a YOLOv8 model instance
model = YOLO('yolov8n.pt')  # You can change 'n' to 's', 'm', 'l', or 'x' for larger models

# Train the model
model.train(data=data_path, epochs=50, imgsz=640, batch=8, name='yolov8_doctor_patient14')

# Save the model
model.save('yolov8_doctor_patient.pt')

In [None]:
import os
print("Current Working Directory:", os.getcwd())

In [None]:
from ultralytics import YOLO
import os

# Load the trained model
model = YOLO('C:/Mini Project/Intel Project/runs/detect/yolov8_doctor_patient142/weights/best.pt')

# Directory containing test images
test_images_dir = 'C:/Mini Project/Intel Project/datasets/New_doctor_images/test/images'

# List all test images
test_images = [os.path.join(test_images_dir, img) for img in os.listdir(test_images_dir) if img.endswith(('.jpg', '.jpeg', '.png'))]

# Run predictions
results = model.predict(source=test_images, conf=0.25, save=True, save_dir='predictions')

# Display the results
for result in results:
    print(result)  # This will print the details of each prediction
    result.show()  # This will display the image with predictions (requires GUI environment)

# Evaluate the model on the validation dataset
val_results = model.val()

# Print evaluation results
print(val_results)


In [1]:
from ultralytics import YOLO
import os

# Load the trained model
model = YOLO('C:/Mini Project/Intel Project/runs/detect/yolov8_doctor_patient142/weights/best.pt')

# Directory containing test images
test_images_dir = 'C:/Mini Project/Intel Project/Internet Images'

# List all test images
test_images = [os.path.join(test_images_dir, img) for img in os.listdir(test_images_dir) if img.endswith(('.jpg', '.jpeg', '.png'))]

# Run predictions
results = model.predict(source=test_images, conf=0.25, save=True, save_dir='ICU_predict1')

# Display the results
for result in results:
    print(result)  # This will print the details of each prediction
    result.show()  # This will display the image with predictions (requires GUI environment)

# Evaluate the model on the validation dataset
val_results = model.val()

# Print evaluation results
print(val_results)



0: 640x640 3 doctors, 529.9ms
1: 640x640 1 patient, 529.9ms
2: 640x640 3 doctors, 1 patient, 529.9ms
3: 640x640 1 doctor, 1 patient, 529.9ms
4: 640x640 2 doctors, 529.9ms
5: 640x640 1 doctor, 529.9ms
6: 640x640 (no detections), 529.9ms
7: 640x640 1 doctor, 1 patient, 529.9ms
8: 640x640 3 doctors, 529.9ms
9: 640x640 1 doctor, 1 patient, 529.9ms
10: 640x640 2 doctors, 1 patient, 529.9ms
11: 640x640 2 doctors, 529.9ms
12: 640x640 1 patient, 529.9ms
13: 640x640 1 patient, 529.9ms
14: 640x640 2 doctors, 1 patient, 529.9ms
15: 640x640 1 doctor, 1 patient, 529.9ms
16: 640x640 1 doctor, 529.9ms
17: 640x640 2 patients, 529.9ms
18: 640x640 1 doctor, 1 patient, 529.9ms
19: 640x640 4 doctors, 529.9ms
20: 640x640 (no detections), 529.9ms
Speed: 12.6ms preprocess, 529.9ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns\detect\predict5[0m
ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: N

KeyboardInterrupt: 

In [3]:
# renaming all the files of icu folder
# Function to rename multiple files
import os

def renaming():   
    folder = "ICU season 1"
    for count, filename in enumerate(os.listdir(folder)):
        dst = f"icu {str(count)}.jpg"
        src =f"{folder}/{filename}"  # foldername/filename, if .py file is outside folder
        dst =f"{folder}/{dst}"
         
        # rename() function will
        # rename all the files
        os.rename(src, dst)

renaming()

In [17]:
import json
import os

def convert_to_yolo_format(json_dir, images_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    for json_file in os.listdir(json_dir):
        # if json_file.endswith('.json'):
        with open(os.path.join(json_dir, json_file)) as f:
            data = json.load(f)
        
        image_filename = data['task']['data']['image'].split('=')[1].replace('%5C', '/').replace('%20', ' ')
        find = image_filename.find('ICU season 1')
        # print(find)
        image_filename = image_filename[find:]
        print(image_filename)
        image_path = os.path.join(image_filename)
        if not os.path.exists(image_path):
            print(f"Image {image_path} not found, skipping.")
            continue

        if 'result' not in data or not data['result']:
            print(f"image {image_path} has no annotations, skipping")
            continue

        width = data['result'][0]['original_width']
        height = data['result'][0]['original_height']

        yolo_annotations = []
        for annotation in data['result']:
            label = annotation['value']['rectanglelabels'][0]
            if label == 'doctor':
                class_id = 0
            elif label == 'patient':
                class_id = 1
            else:
                class_id = 2
            # class_id = 0 if label == 'doctor' else 1  # Assuming 'doctor' is 0 and 'patient' is 1
            x_center = (annotation['value']['x'] + annotation['value']['width'] / 2) / 100
            y_center = (annotation['value']['y'] + annotation['value']['height'] / 2) / 100
            w = annotation['value']['width'] / 100
            h = annotation['value']['height'] / 100

            yolo_annotations.append(f"{class_id} {x_center} {y_center} {w} {h}")

        #image_filename = data['task']['data']['image'].split('=')[1].replace('/', '%5C').replace(' ', '%20')
        print(image_filename)
        find1 = image_filename.find("icu")
        find2 = image_filename.find('.')
        print(find1)
        print(find2)
        yolo_file_path = output_dir + "/" + image_filename[find1:find2] + ".txt"
        # yolo_file_path = os.path.join(output_dir, f"{os.path.splitext(image_filename)[0]}.txt")
        print(yolo_file_path)
        with open(yolo_file_path, 'w') as f:
            f.write("\n".join(yolo_annotations))
        
        # Copy image to output directory
        os.system(f"cp {image_path} {output_dir}")

json_dir = 'ICU show annotations'
images_dir = 'ICU season 1'
output_dir = 'AnnotationsYolo2'

convert_to_yolo_format(json_dir, images_dir, output_dir)


ICU season 1/icu 0.jpg
ICU season 1/icu 0.jpg
13
18
AnnotationsYolo2/icu 0.txt
ICU season 1/icu 1.jpg
ICU season 1/icu 1.jpg
13
18
AnnotationsYolo2/icu 1.txt
ICU season 1/icu 10.jpg
ICU season 1/icu 10.jpg
13
19
AnnotationsYolo2/icu 10.txt
ICU season 1/icu 100.jpg
ICU season 1/icu 100.jpg
13
20
AnnotationsYolo2/icu 100.txt
ICU season 1/icu 101.jpg
ICU season 1/icu 101.jpg
13
20
AnnotationsYolo2/icu 101.txt
ICU season 1/icu 102.jpg
ICU season 1/icu 102.jpg
13
20
AnnotationsYolo2/icu 102.txt
ICU season 1/icu 103.jpg
ICU season 1/icu 103.jpg
13
20
AnnotationsYolo2/icu 103.txt
ICU season 1/icu 104.jpg
ICU season 1/icu 104.jpg
13
20
AnnotationsYolo2/icu 104.txt
ICU season 1/icu 105.jpg
ICU season 1/icu 105.jpg
13
20
AnnotationsYolo2/icu 105.txt
ICU season 1/icu 106.jpg
ICU season 1/icu 106.jpg
13
20
AnnotationsYolo2/icu 106.txt
ICU season 1/icu 107.jpg
ICU season 1/icu 107.jpg
13
20
AnnotationsYolo2/icu 107.txt
ICU season 1/icu 108.jpg
ICU season 1/icu 108.jpg
13
20
AnnotationsYolo2/icu 108

In [39]:
import os
import shutil
import random
import json

def split_dataset(json_dir, image_dir, output_dir, split_ratio=0.8):
    os.makedirs(os.path.join(output_dir, 'train', 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'train', 'labels'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'test', 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'test', 'labels'), exist_ok=True)
    
    json_files = [f for f in os.listdir(json_dir)]
    # json_files = json_files.sort()
    print(json_files)
    random.shuffle(json_files)
    # print(len(json_files))
    
    split_point = int(len(json_files) * split_ratio)
    train_files = json_files[:split_point]
    test_files = json_files[split_point:]
    
    return train_files, test_files

json_dir = 'ICU show annotations'
image_dir = 'ICU season 1'
output_dir = 'ICU_show_images'

train_files, test_files = split_dataset(json_dir, image_dir, output_dir)
print(train_files)
print(test_files)

['303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445'

In [50]:
import os
import json

def convert_to_yolo_format(json_files, json_dir, image_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    for json_file in json_files:
        # if json_file.endswith('.json'):
        print(json_file)
        with open(json_dir + "/" + json_file) as f:
            data = json.load(f)
        #print(data)
        
        image_filename = data['task']['data']['image'].split('=')[1].replace('%5C', '/').replace('%20', ' ')
        find = image_filename.find('ICU season 1')
        # print(find)
        image_filename = image_filename[find:]
        # print(image_filename)
        image_path = os.path.join(image_filename)
        if not os.path.exists(image_path):
            print(f"Image {image_path} not found, skipping.")
            continue

        if 'result' not in data or not data['result']:
            print(f"image {image_path} has no annotations, skipping")
            continue

        width = data['result'][0]['original_width']
        height = data['result'][0]['original_height']

        yolo_annotations = []
        for annotation in data['result']:
            label = annotation['value']['rectanglelabels'][0]
            if label == 'doctor':
                class_id = 0
            elif label == 'patient':
                class_id = 1
            else:
                class_id = 2
            # class_id = 0 if label == 'doctor' else 1  # Assuming 'doctor' is 0 and 'patient' is 1
            x_center = (annotation['value']['x'] + annotation['value']['width'] / 2) / 100
            y_center = (annotation['value']['y'] + annotation['value']['height'] / 2) / 100
            w = annotation['value']['width'] / 100
            h = annotation['value']['height'] / 100

            yolo_annotations.append(f"{class_id} {x_center} {y_center} {w} {h}")

        #image_filename = data['task']['data']['image'].split('=')[1].replace('/', '%5C').replace(' ', '%20')
        print(image_filename)
        find1 = image_filename.find("icu")
        find2 = image_filename.find('.')
        # print(find1)
        # print(find2)
        yolo_file_path = output_dir + "/labels/" + image_filename[find1:find2] + ".txt"
        #yolo_file_path = os.path.join(output_dir, f"{os.path.splitext(image_filename)[0]}.txt")
        print(yolo_file_path)
        with open(yolo_file_path, 'w') as f:
            f.write("\n".join(yolo_annotations))
        
        # Copy image to output directory
        print(image_path)
        shutil.copy(image_path, output_dir +  '/images/' +  image_filename[find1:find2] + ".jpg")


json_dir = 'ICU show annotations'
image_dir = 'ICU season 1'
output_dir = 'ICU_show_images'

# Process training data
train_output_dir = output_dir + "/train"
#print(train_files)
convert_to_yolo_format(train_files, json_dir, image_dir, train_output_dir)

# Process testing data
test_output_dir = output_dir + "/test"
#print(test_files)
convert_to_yolo_format(test_files, json_dir, image_dir, test_output_dir)

607
ICU season 1/icu 47.jpg
ICU_show_images/train/labels/icu 47.txt
ICU season 1/icu 47.jpg
311
ICU season 1/icu 105.jpg
ICU_show_images/train/labels/icu 105.txt
ICU season 1/icu 105.jpg
648
ICU season 1/icu 84.jpg
ICU_show_images/train/labels/icu 84.txt
ICU season 1/icu 84.jpg
652
ICU season 1/icu 88.jpg
ICU_show_images/train/labels/icu 88.txt
ICU season 1/icu 88.jpg
542
ICU season 1/icu 313.jpg
ICU_show_images/train/labels/icu 313.txt
ICU season 1/icu 313.jpg
394
ICU season 1/icu 180.jpg
ICU_show_images/train/labels/icu 180.txt
ICU season 1/icu 180.jpg
461
ICU season 1/icu 240.jpg
ICU_show_images/train/labels/icu 240.txt
ICU season 1/icu 240.jpg
370
ICU season 1/icu 159.jpg
ICU_show_images/train/labels/icu 159.txt
ICU season 1/icu 159.jpg
499
ICU season 1/icu 275.jpg
ICU_show_images/train/labels/icu 275.txt
ICU season 1/icu 275.jpg
605
ICU season 1/icu 45.jpg
ICU_show_images/train/labels/icu 45.txt
ICU season 1/icu 45.jpg
610
ICU season 1/icu 5.jpg
ICU_show_images/train/labels/icu 5.