In [30]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
import re
from pathlib import Path
import cv2 as cv

In [32]:
img_path = '/work3/s220243/Thesis'
base_path = '/zhome/ac/d/174101/thesis'

In [33]:
df = pd.read_excel(f"{base_path}/data/imageAnalysis_information.xlsx")
df  = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df.head()

Unnamed: 0,event_id,IBT number,genus,species,start date,end date,date of picture,image no,video,temperature,picture interval,comments,Reshape Job ID,Reshape plate position,Ofba reference list,Comments
0,ima001,IBT 12085,Penicillium,smithii,2023-02-20,2023-02-27,day 4,192,,25,30 min,,,,,
1,ima001,IBT 13516,Penicilluim,bilaiae,2023-02-20,2023-02-27,day 4,192,,25,30 min,,,,,
2,ima001,IBT 21948,Penicilluim,malodoratum,2023-02-20,2023-02-27,day 4,192,,25,30 min,,,,,
3,ima001,IBT 21902,Aspergillus,astellatus,2023-02-20,2023-02-27,day 4,192,,25,30 min,,,,,
4,ima001,NID174,Aspergillus,nidulans,2023-02-20,2023-02-27,day 4,192,,25,30 min,,,,,


In [34]:
species_genus_df = pd.DataFrame({"IBT_number": df['IBT number'],
                          "Target" : df["genus"]+"-"+df["species"]})
species_genus_df.dropna()

Unnamed: 0,IBT_number,Target
0,IBT 12085,Penicillium-smithii
1,IBT 13516,Penicilluim-bilaiae
2,IBT 21948,Penicilluim-malodoratum
3,IBT 21902,Aspergillus-astellatus
4,NID174,Aspergillus-nidulans
...,...,...
163,IBT 32197,Purpureocillium-lilacinum
164,IBT 32286,Aspergillus-wentii
165,IBT 32384,Aspergillus-aculeatinus
166,IBT 32420,Aspergillus-ustus


In [35]:
ibt_numbers=[]
img_paths=[]

# Get a list of all the images paths
img = Path(f"{img_path}/images")
paths = list(img.glob('**/*.jpeg'))

# Create list of all IBTs and paths to images for the given IBT
for path in paths:
  match = re.search(r'IBT \d+',str(path))
  if match:
    ibt_numbers.append(match.group())
    img_paths.append(str(path))
  else:
    ibt_numbers.append("ACU1")
    img_paths.append(str(path))

# Create DF from a lists
paths_df = pd.DataFrame({"IBT_number": ibt_numbers,
                     "path": img_paths})

# Merge target_df and paths_df
paths_df = paths_df.groupby('IBT_number')['path'].apply(list).reset_index()
merged_df = pd.merge(species_genus_df, paths_df, on='IBT_number', how='inner')
target_paths_df = merged_df.explode('path')
target_paths_df

# Drop all images that are prior to day 2
target_paths_df['Image_number'] = target_paths_df['path'].apply(lambda x: x.split('/')[-1].split('.')[0])
target_paths_df['Image_number'] = target_paths_df['Image_number'].str.replace(r'\D', '', regex=True)
target_paths_df['Image_number'] = target_paths_df['Image_number'].astype(int)
target_df = target_paths_df[(target_paths_df['Image_number'] >= 48) & (target_paths_df['Image_number'] <= 168)]
val_df = target_paths_df[(target_paths_df['Image_number'] > 168) & (target_paths_df['Image_number'] <= 192)]

In [None]:
def apply_lbp(image_path, output_directory):
    # Read the image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Apply Local Binary Patterns (LBP)
    radius = 3
    n_points = 8 * radius
    lbp_image = local_binary_pattern(image, n_points, radius, method='uniform')
    
    # Convert to uint8 (required for saving)
    lbp_image_uint8 = (lbp_image * 255).astype('uint8')
    
    # Get the file name
    file_name = image_path.split('/')[-1]  # Assuming Unix-style paths
    
    # Save the processed image
    output_path = join(output_directory, file_name)
    cv2.imwrite(output_path, lbp_image_uint8)

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    image_path = row['image_path']
    output_directory = row['output_directory']
    
    # Apply LBP and save the processed image
    apply_lbp(image_path, output_directory)

In [37]:
base = Path(f"{img_path}/data_split")
base.mkdir(exist_ok=True)

# Create train-test split folders
train_dst = base / "train"
test_dst = base / "test"
val_dst = base / "validation"

shutil.rmtree(train_dst)
shutil.rmtree(test_dst)
shutil.rmtree(val_dst)
print('Directories removed')

train_dst.mkdir(exist_ok=True)
test_dst.mkdir(exist_ok=True)
val_dst.mkdir(exist_ok=True)

# Perform train-test split
train_df, test_df = train_test_split(target_df, test_size=0.3, random_state=42)
#test_df, val_df = train_test_split(test_df, test_size=0.3, random_state=42)

def copy_images(df, dst_folder):
  for _, row in df.iterrows():
    target_dst = dst_folder / str(row['Target'])
    target_dst.mkdir(exist_ok=True)
    #print(str(row['path']))
    shutil.copy(str(row['path']), str(target_dst))

print('copying...')

# Copy images to train directory
copy_images(train_df, train_dst)
print('train images copy finished')

# Copy images to test directory
copy_images(test_df, test_dst)
print('test images copy finished')

# Copy images to validation directory
copy_images(val_df, val_dst)
print('validation images copy finished')

copying...
/work3/s220243/Thesis/images/IBT 42139/RIS1_0_TL_20_preset/98.jpeg
/work3/s220243/Thesis/images/IBT 26504/RIS1_0_TL_20_preset/96.jpeg
/work3/s220243/Thesis/images/IBT 42139/RIS1_0_TL_20_preset/140.jpeg
/work3/s220243/Thesis/images/IBT 40896/RIS1_0_TL_20_preset/136.jpeg
/work3/s220243/Thesis/images/IBT 35839/RIS1_0_TL_20_preset/144.jpeg
/work3/s220243/Thesis/images/IBT 8189/RIS1_0_TL_20_preset/137.jpeg
/work3/s220243/Thesis/images/IBT 21948/6w_top_medium_preset/119.jpeg
/work3/s220243/Thesis/images/IBT 36721/RIS1_0_TL_20_preset/70.jpeg
/work3/s220243/Thesis/images/IBT 42867/RIS1_0_TL_20_preset/61.jpeg
/work3/s220243/Thesis/images/IBT 41274/RIS1_0_TL_20_preset/135.jpeg
/work3/s220243/Thesis/images/IBT 31674/RIS1_0_TL_20_preset/72.jpeg
/work3/s220243/Thesis/images/IBT 36710/RIS1_0_TL_20_preset/55.jpeg
/work3/s220243/Thesis/images/IBT 42683/RIS1_0_TL_20_preset/69.jpeg
/work3/s220243/Thesis/images/IBT 42198/RIS1_0_TL_20_preset/77.jpeg
/work3/s220243/Thesis/images/IBT 32802/RIS1_0

KeyboardInterrupt: 