# Rotate Text in image

The images from the slide scanner are rarely in an orientation where the text runs horizontally across the page. This is a script that aims to rotate the images to correct for this. 

--- 
### Import dependancies

In [1]:
import pytesseract as pytess
import numpy as np 
import skimage 
import scipy 
# import aicsimageio
import os 
import tifffile as tf
from PIL import Image
import tkinter as tk 
import pandas as pd
import matplotlib.pyplot as plt
from tkinter import filedialog
import napari

----
### Find four couners of the image

In [63]:
def corners_of_text(ordered):
    ''' '''
    # Initalise
    left_col = ordered[ordered['centroid-1'] < ordered['centroid-1'].min() + 25] 
    top_row = ordered[ordered['centroid-0'] < ordered['centroid-0'].min() + 25]
    bottom_row = ordered[ordered['centroid-1'] > ordered['centroid-1'].max() - 25]
    # Top left
    tl = left_col[left_col['centroid-0'] == left_col['centroid-0'].min()]
    # Bottom left
    bl = left_col[left_col['centroid-0'] == left_col['centroid-0'].max()]
    # Top Right
    tr = top_row[top_row['centroid-1'] == top_row['centroid-1'].max()]
    # Bottom Right
    br = bottom_row[bottom_row['centroid-0'] == bottom_row['centroid-0'].max()]
    # Put all the for points in the array. 
    four_corners = [np.array([tl['centroid-0'].iloc[0], tl['centroid-1'].iloc[0]]),
                    np.array([tr['centroid-0'].iloc[0], tr['centroid-1'].iloc[0]]), 
                    np.array([bl['centroid-0'].iloc[0], bl['centroid-1'].iloc[0]]), 
                    np.array([br['centroid-0'].iloc[0], br['centroid-1'].iloc[0]]) ]
    
    return(np.array(four_corners))

----
### Calculate Angle

In [52]:
def angle_calc(four_corners): 
    ''' ''' 
    print()
        
    # delta x 
    delta_x = four_corners[2][0] - four_corners[0][0]
    delta_y = four_corners[2][1] - four_corners[0][1]

    # angle 
    angle = 180 * ( np.arctan(delta_x / delta_y) / np.pi)

    return(angle)

----
### Get text from image


In [4]:
def read_text_from_im(image): 
    ''' '''
    # Perform OCR. 
    text = pytess.image_to_string(image)
    # Clean up the text from the OCR. 
    text = text.replace( chr(10), '_' )
    text = text.replace( ' ' , '_' )
    text = text.replace( '/', '_' )
    text = text.replace('=', '')
    text = text.replace('?', '2')

    return(text)

-----
### Find image letters

In [5]:
def get_letters_and_locs(label_im): 
    ''' '''
    # Initalise 
    props = {'label', 'area', 'centroid'}
    # 
    label = skimage.measure.label(image_data)
    # Find the properties of the letters. 
    label_props = pd.DataFrame( skimage.measure.regionprops_table(label, properties = props) )
    # Ignore the labels with an area smaller than 250 pixels.
    label_props = label_props[ label_props['area'] > 250 ]

    return(label, label_props)

---
### Get images folder

In [106]:
# # # Creates dialogue to ask directory
# # # Get the folder containing the image stack. 
root = tk.Tk()
root.attributes('-topmost', True)
root.withdraw() # Stops a second window opening
folder = filedialog.askdirectory(title = 'Select Stack file')

all_files = os.listdir(folder) 

files = []
for i in range( len(all_files) ):
    if all_files[i][-4:] == '.png': 
        files.append(all_files[i])

print(len(files))
folder

28


'N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/MCL1/Labels'

---------
### Get SVS file folder

In [107]:
# Get into the slides folder
slides_folder = os.path.dirname(
            os.path.dirname( folder ) ) + '/Slide_Ims/'
print(slides_folder)
# List folders 
experiment_folders = os.listdir(slides_folder)
# Get the current working folder
current_folder = os.path.basename(
                os.path.dirname(folder) )
# SVS folder 
svs_folder = slides_folder + current_folder + '/'
# SVS files
svs_files = os.listdir(svs_folder)
print(svs_folder)

N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/


----
# Get image data

In [108]:
# Initalise

results = []
pytess.pytesseract.tesseract_cmd = r'C:/Users/rcorbyn/AppData/Local/Programs/Tesseract-OCR/tesseract.exe'
# pytess.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'

# fig, ax = plt.subplots(nrows = 10, ncols = 3, figsize = (30, 50))

# Loop around all files
# for file in files:
for i in range( len(files) ):
    # 
    file = files[i]
    # print(file)
    svs_of_interest = 0
    # Open the image
    image_data = np.array( Image.open(folder + '/' + file) )
    # Sum all channels
    image_data = np.sum( image_data, axis = 2 )
    # binery 
    image_data[image_data < np.max(image_data)] = 0
    image_data[image_data > 1] = 1
    # Segment just the lettes in the image. 
    letters, letter_props = get_letters_and_locs(image_data)
    
    # Find the leters at the corners of the label. 
    corners = corners_of_text(letter_props)
    # Find the angle of rotation
    angle = angle_calc(corners)
    
    # Rotate the image by the angle found above.
    if angle < -10:
        angle = 90 + angle
    elif angle > 10:
        angle = angle - 90
    # Rotate the image
    rotated_letters = scipy.ndimage.rotate(image_data, angle ) 

    # Segment just the lettes in the image. 
    letters_2, letter_props_2 = get_letters_and_locs(rotated_letters)
    # Find the leters at the corners of the label. 
    corners_2 = corners_of_text(letter_props_2)

    # # # Crop image 
    crop = rotated_letters[int(corners_2[0][0]) - 50 : int(corners_2[3][0])+50, 
                          int(corners_2[0][1]) - 50 : int(corners_2[3][1])+50]
    
    # 
    skelton = skimage.morphology.skeletonize(crop)
    skelton = skelton + np.roll(skelton, (0,1)) + np.roll(skelton, (0,-1)
                    ) + np.roll(skelton, (1,0))+ np.roll(skelton, (-1,0))
    # 
    label_text = read_text_from_im(skelton)
    #
    #####
    ## Rename file
    # First_underscore
    first_under = file.find('_')
    # last underscore 
    last_under = file.rfind('_')
    print(file[first_under+1:last_under])

    for j in range( len(svs_files) ):
        # 
        if svs_files[j].find(file[first_under+1:last_under]) >- 1 and len(svs_files[j][0:-4]) == len(file[first_under+1:last_under]):
            svs_of_interest = svs_files[j]
            
    if len(svs_of_interest) > 1: 
        print(svs_folder + svs_of_interest)
        print(svs_folder +  label_text[0:-2] + svs_of_interest[-4:])
    
    os.rename(svs_folder +svs_of_interest, 
             svs_folder +  label_text[0:-2] + svs_of_interest[-4:]) 
    
    


R09_DT_060625_MCL-004
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/R09_DT_060625_MCL-004.svs
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/MCL_1_LIV__RBCMT_RMH137_2d_23I2045__SM_ROS_25x_522.svs

R09_DT_060625_MCL-002
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/R09_DT_060625_MCL-002.svs
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/MCL_1_LIV_RBCMT__RMH126_5a_2312644_SM_ROS_225x_522.svs

R09_DT_060625_MCL-009
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/R09_DT_060625_MCL-009.svs
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/MCL_1_LIV__RBCMT_RMH135_2f_23_2639.svs

R09_DT_060625_MCL-014
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/R09_DT_060625_MCL-014.svs
N:/RCORBYN/User_Data/Current_Projects/20250703_Danis/Slide_Ims/MCL1/MCL_1_LIV_RBCMIT__RMH156_1d_24_18448_GS_ROS_25x_522.svs

R09_DT_060625_MCL-015
N:/RCORBYN/User_Data/Current_Pro