In [1]:
!pip install opencv-python pandas openpyxl tensorflow


Collecting tensorflow
  Downloading tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow)
  Downloading tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata (5.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.15.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.15.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.15.0->tensorflow)
  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.15.0->tensorflow)
  Downloading gast-0.5.4-py3-none-any.whl (19 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.15.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
     ---------------------------------------- 0.0/57.5 kB ? eta -:--:--
  

In [2]:
import cv2
import pandas as pd
import os
from openpyxl import load_workbook
from shutil import copyfile
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity

def preprocess_image(img_path):
    # Load and preprocess the image for ResNet50
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = img_array.reshape((1, *img_array.shape))
    return img_array

def compute_cosine_similarity(img_array1, img_array2):
    # Compute cosine similarity between two image arrays
    return cosine_similarity(img_array1, img_array2)[0][0]

def group_sheets(workbook_path):
    # Load ResNet50 model
    base_model = ResNet50(weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
    
    # Load Excel workbook
    workbook = load_workbook(workbook_path)
    
    # Create groups for sheets
    data_sheets = []
    image_sheets = {}
    
    # Iterate through sheets and classify them
    for sheet_name in workbook.sheetnames:
        sheet = workbook[sheet_name]
        
        # Check if the sheet contains images
        has_images = any(isinstance(cell.value, str) and "image" in cell.value.lower() for row in sheet.iter_rows() for cell in row)
        
        if has_images:
            # Compare the first image in each image sheet
            if image_sheets:
                img1_path = list(image_sheets.values())[0]
                img2_path = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(img2_path, sheet["A1"].value)
                
                img_array1 = preprocess_image(img1_path)
                img_array2 = preprocess_image(img2_path)
                
                similarity = compute_cosine_similarity(img_array1, img_array2)
                
                if similarity > 0.8:  # Adjust the threshold based on your needs
                    image_sheets[sheet_name] = img2_path
                else:
                    data_sheets.append(sheet_name)
                    
                os.remove(img2_path)
            else:
                image_sheets[sheet_name] = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(image_sheets[sheet_name], sheet["A1"].value)
        else:
            data_sheets.append(sheet_name)
    
    # Create separate workbooks for each group
    for sheets, group_name in zip([data_sheets, list(image_sheets.keys())], ["Data", "Image"]):
        if sheets:
            new_workbook_path = f"{workbook_path.split('.')[0]}_{group_name}.xlsx"
            new_workbook = pd.ExcelWriter(new_workbook_path, engine='openpyxl')
            
            for sheet_name in sheets:
                sheet = workbook[sheet_name]
                sheet.to_excel(new_workbook, sheet_name=sheet_name, index=False)
            
            new_workbook.save()
    
    workbook.close()

# Example usage
input_excel_path = "C:/Users/Shreshtha/Downloads/Project UHC/Image segregation.xlsx"
group_sheets(input_excel_path)





Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5


AttributeError: 'Worksheet' object has no attribute 'to_excel'

In [3]:
import cv2
import pandas as pd
import os
from openpyxl import load_workbook
from shutil import copyfile
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

def preprocess_image(img_path):
    # Load and preprocess the image for ResNet50
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = img_array.reshape((1, *img_array.shape))
    return img_array

def compute_cosine_similarity(img_array1, img_array2):
    # Compute cosine similarity between two image arrays
    return cosine_similarity(img_array1, img_array2)[0][0]

def hash_image(image_path):
    # Use imagehash library to compute hash for an image
    return str(imagehash.average_hash(Image.open(image_path)))

def compare_hashes(hash1, hash2):
    # Compare two image hashes
    return hash1 - hash2

def group_sheets(workbook_path):
    # Load ResNet50 model
    base_model = ResNet50(weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
    
    # Load Excel workbook
    workbook = load_workbook(workbook_path)
    
    # Create groups for sheets
    data_sheets = []
    image_sheets = {}
    
    # Iterate through sheets and classify them
    for sheet_name in workbook.sheetnames:
        sheet = workbook[sheet_name]
        
        # Check if the sheet contains images
        has_images = any(isinstance(cell.value, str) and "image" in cell.value.lower() for row in sheet.iter_rows() for cell in row)
        
        if has_images:
            # Compare the first image in each image sheet
            if image_sheets:
                img1_path = list(image_sheets.values())[0]
                img2_path = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(img2_path, sheet["A1"].value)
                
                img_array1 = preprocess_image(img1_path)
                img_array2 = preprocess_image(img2_path)
                
                similarity = compute_cosine_similarity(img_array1, img_array2)
                
                if similarity > 0.8:  # Adjust the threshold based on your needs
                    image_sheets[sheet_name] = img2_path
                else:
                    data_sheets.append(sheet_name)
                    
                os.remove(img2_path)
            else:
                image_sheets[sheet_name] = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(image_sheets[sheet_name], sheet["A1"].value)
        else:
            data_sheets.append(sheet_name)
    
    # Create separate workbooks for each group
    for sheets, group_name in zip([data_sheets, list(image_sheets.keys())], ["Data", "Image"]):
        if sheets:
            new_workbook_path = f"{workbook_path.split('.')[0]}_{group_name}.xlsx"
            new_workbook = pd.ExcelWriter(new_workbook_path, engine='openpyxl')
            
            for sheet_name in sheets:
                # Copy the entire sheet to a new workbook
                new_sheet = new_workbook.book.create_sheet(sheet_name)
                for row in workbook[sheet_name].iter_rows():
                    new_sheet.append([cell.value for cell in row])
            
            # Remove the default empty sheet created by pd.ExcelWriter
            new_workbook.book.remove(new_workbook.book.worksheets[0])
            
            new_workbook.save()

    workbook.close()

# Example usage
input_excel_path = "C:/Users/Shreshtha/Downloads/Project UHC/Image segregation.xlsx"
group_sheets(input_excel_path)


AttributeError: 'OpenpyxlWriter' object has no attribute 'save'

In [4]:
import cv2
import pandas as pd
import os
from openpyxl import load_workbook
from shutil import copyfile
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

def preprocess_image(img_path):
    # Load and preprocess the image for ResNet50
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = img_array.reshape((1, *img_array.shape))
    return img_array

def compute_cosine_similarity(img_array1, img_array2):
    # Compute cosine similarity between two image arrays
    return cosine_similarity(img_array1, img_array2)[0][0]

def hash_image(image_path):
    # Use imagehash library to compute hash for an image
    return str(imagehash.average_hash(Image.open(image_path)))

def compare_hashes(hash1, hash2):
    # Compare two image hashes
    return hash1 - hash2

def group_sheets(workbook_path):
    # Load ResNet50 model
    base_model = ResNet50(weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
    
    # Load Excel workbook
    workbook = load_workbook(workbook_path)
    
    # Create groups for sheets
    data_sheets = []
    image_sheets = {}
    
    # Iterate through sheets and classify them
    for sheet_name in workbook.sheetnames:
        sheet = workbook[sheet_name]
        
        # Check if the sheet contains images
        has_images = any(isinstance(cell.value, str) and "image" in cell.value.lower() for row in sheet.iter_rows() for cell in row)
        
        if has_images:
            # Compare the first image in each image sheet
            if image_sheets:
                img1_path = list(image_sheets.values())[0]
                img2_path = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(img2_path, sheet["A1"].value)
                
                img_array1 = preprocess_image(img1_path)
                img_array2 = preprocess_image(img2_path)
                
                similarity = compute_cosine_similarity(img_array1, img_array2)
                
                if similarity > 0.8:  # Adjust the threshold based on your needs
                    image_sheets[sheet_name] = img2_path
                else:
                    data_sheets.append(sheet_name)
                    
                os.remove(img2_path)
            else:
                image_sheets[sheet_name] = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(image_sheets[sheet_name], sheet["A1"].value)
        else:
            data_sheets.append(sheet_name)
    
    # Create separate workbooks for each group
    for sheets, group_name in zip([data_sheets, list(image_sheets.keys())], ["Data", "Image"]):
        if sheets:
            new_workbook_path = f"{workbook_path.split('.')[0]}_{group_name}.xlsx"
            new_workbook = pd.ExcelWriter(new_workbook_path, engine='openpyxl')
            
            for sheet_name in sheets:
                # Copy the entire sheet to a new workbook
                new_sheet = new_workbook.book.create_sheet(sheet_name)
                for row in workbook[sheet_name].iter_rows():
                    new_sheet.append([cell.value for cell in row])
            
            # Remove the default empty sheet created by pd.ExcelWriter
            new_workbook.book.remove(new_workbook.book.worksheets[0])
            
            new_workbook.save()  # Corrected line to save the Excel file

    workbook.close()

# Example usage
input_excel_path = "C:/Users/Shreshtha/Downloads/Project UHC/Image segregation.xlsx"
group_sheets(input_excel_path)


AttributeError: 'OpenpyxlWriter' object has no attribute 'save'

In [5]:
import cv2
import pandas as pd
import os
from openpyxl import load_workbook
from shutil import copyfile
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

def preprocess_image(img_path):
    # Load and preprocess the image for ResNet50
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = img_array.reshape((1, *img_array.shape))
    return img_array

def compute_cosine_similarity(img_array1, img_array2):
    # Compute cosine similarity between two image arrays
    return cosine_similarity(img_array1, img_array2)[0][0]

def hash_image(image_path):
    # Use imagehash library to compute hash for an image
    return str(imagehash.average_hash(Image.open(image_path)))

def compare_hashes(hash1, hash2):
    # Compare two image hashes
    return hash1 - hash2

def group_sheets(workbook_path):
    # Load ResNet50 model
    base_model = ResNet50(weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
    
    # Load Excel workbook
    workbook = load_workbook(workbook_path)
    
    # Create groups for sheets
    data_sheets = []
    image_sheets = {}
    
    # Iterate through sheets and classify them
    for sheet_name in workbook.sheetnames:
        sheet = workbook[sheet_name]
        
        # Check if the sheet contains images
        has_images = any(isinstance(cell.value, str) and "image" in cell.value.lower() for row in sheet.iter_rows() for cell in row)
        
        if has_images:
            # Compare the first image in each image sheet
            if image_sheets:
                img1_path = list(image_sheets.values())[0]
                img2_path = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(img2_path, sheet["A1"].value)
                
                img_array1 = preprocess_image(img1_path)
                img_array2 = preprocess_image(img2_path)
                
                similarity = compute_cosine_similarity(img_array1, img_array2)
                
                if similarity > 0.8:  # Adjust the threshold based on your needs
                    image_sheets[sheet_name] = img2_path
                else:
                    data_sheets.append(sheet_name)
                    
                os.remove(img2_path)
            else:
                image_sheets[sheet_name] = os.path.join("temp_images", sheet_name + "_temp_image.png")
                cv2.imwrite(image_sheets[sheet_name], sheet["A1"].value)
        else:
            data_sheets.append(sheet_name)
    
    # Create separate workbooks for each group
    for sheets, group_name in zip([data_sheets, list(image_sheets.keys())], ["Data", "Image"]):
        if sheets:
            new_workbook_path = f"{workbook_path.split('.')[0]}_{group_name}.xlsx"
            new_workbook = load_workbook(new_workbook_path)

            for sheet_name in sheets:
                # Copy the entire sheet to a new workbook
                new_sheet = new_workbook.create_sheet(sheet_name)
                for row in workbook[sheet_name].iter_rows():
                    new_sheet.append([cell.value for cell in row])

            # Remove the default empty sheet created by load_workbook
            if 'Sheet' in new_workbook.sheetnames:
                new_workbook.remove(new_workbook['Sheet'])

            new_workbook.save(new_workbook_path)

    workbook.close()

# Example usage
input_excel_path = "C:/Users/Shreshtha/Downloads/Project UHC/Image segregation.xlsx"
group_sheets(input_excel_path)


BadZipFile: File is not a zip file

In [6]:
import cv2
import pandas as pd
import os
from shutil import copyfile
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

def preprocess_image(img_path):
    # Load and preprocess the image for ResNet50
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = img_array.reshape((1, *img_array.shape))
    return img_array

def compute_cosine_similarity(img_array1, img_array2):
    # Compute cosine similarity between two image arrays
    return cosine_similarity(img_array1, img_array2)[0][0]

def hash_image(image_path):
    # Use imagehash library to compute hash for an image
    return str(imagehash.average_hash(Image.open(image_path)))

def compare_hashes(hash1, hash2):
    # Compare two image hashes
    return hash1 - hash2

def group_sheets(workbook_path):
    # Load Excel workbook using pd.read_excel
    workbook = pd.read_excel(workbook_path, sheet_name=None)

    # Create groups for sheets
    data_sheets = []
    image_sheets = {}

    # Iterate through sheets and classify them
    for sheet_name, sheet_df in workbook.items():
        # Check if the sheet contains images
        has_images = any(isinstance(cell, str) and "image" in cell.lower() for col in sheet_df.columns for cell in sheet_df[col])

        if has_images:
            # Your existing logic for handling images
            pass
        else:
            data_sheets.append(sheet_name)

    # Create separate Data and Image workbooks (pandas ExcelWriter)
    with pd.ExcelWriter('output_data.xlsx') as writer_data, pd.ExcelWriter('output_image.xlsx') as writer_image:
        for sheet_name, sheet_df in workbook.items():
            if sheet_name in data_sheets:
                sheet_df.to_excel(writer_data, sheet_name=sheet_name, index=False)
            else:
                sheet_df.to_excel(writer_image, sheet_name=sheet_name, index=False)

# Example usage
input_excel_path = 'C:/Users/Shreshtha/Downloads/Project UHC/Image segregation.xlsx'
group_sheets(input_excel_path)
