In [4]:
import os
from PIL import Image
import pandas as pd

# Define directories and label information
directories = [
    # "/Users/push/Desktop/COMP9517/project"
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/K-01/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/K-03/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-01/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-02/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-03/label"
]

classes = [
    "unlabelled",
    "asphalt",
    "dirt",
    "mud",
    "water",
    "gravel",
    "other-terrain",
    "tree-trunk",
    "tree-foliage",
    "bush",
    "fence",
    "structure",
    "pole",
    "vehicle",
    "rock",
    "log",
    "other-object",
    "sky",
    "grass",
]

palette = [
    (0, 0, 0),
    (230, 25, 75),
    (60, 180, 75),
    (255, 225, 25),
    (0, 130, 200),
    (145, 30, 180),
    (70, 240, 240),
    (240, 50, 230),
    (210, 245, 60),
    (230, 25, 75),
    (0, 128, 128),
    (170, 110, 40),
    (255, 250, 200),
    (128, 0, 0),
    (170, 255, 195),
    (128, 128, 0),
    (250, 190, 190),
    (0, 0, 128),
    (128, 128, 128),
]

# Create a dictionary to map colors to labels
color_to_label = {color: label for color, label in zip(palette, classes)}

# Initialize lists to store results
image_names = []
image_labels = []

# Function to analyze image
def analyze_image(image_path):
    img = Image.open(image_path)
    img = img.convert('RGB')
    colors = img.getcolors(maxcolors=256)
    labels = set()
    for count, color in colors:
        if color in color_to_label:
            labels.add(color_to_label[color])
    return labels

# Iterate through directories and images
for directory in directories:
    if os.path.exists(directory):
        for filename in os.listdir(directory):
            if filename.endswith(".png"):
                image_path = os.path.join(directory, filename)
                labels = analyze_image(image_path)
                image_names.append(filename)
                image_labels.append(list(labels))
    else:
        print(f"Directory {directory} not found.")

# Create DataFrame
df = pd.DataFrame({"image_name": image_names, "labels": image_labels})

# Create a summary of label occurrences
label_summary = df["labels"].explode().value_counts().reset_index()
label_summary.columns = ["label", "count"]

# Display the dataframe with label summary
# print(df)
# print(label_summary)
df.to_csv('image_label_analysis1.csv', index=False)
label_summary.to_csv('label_summary1.csv', index=False)
print("finish")

finish


In [6]:
df

Unnamed: 0,image_name,labels
0,1624326629-371279283.png,"[log, sky, tree-trunk, tree-foliage, grass, dirt]"
1,1624325691-564820246.png,"[log, sky, tree-trunk, tree-foliage, grass, dirt]"
2,1624328122-075194213.png,"[log, sky, tree-trunk, tree-foliage, grass, dirt]"
3,1624327694-947167618.png,"[sky, tree-foliage, dirt, tree-trunk]"
4,1624327915-712721967.png,"[sky, tree-trunk, tree-foliage, grass, dirt]"
...,...,...
9301,1639700738-624872257.png,"[log, tree-trunk, rock, tree-foliage, other-ob..."
9302,1639698132-450041239.png,"[sky, tree-trunk, tree-foliage, grass, dirt]"
9303,1639697854-660065083.png,"[log, sky, tree-trunk, rock, tree-foliage, oth..."
9304,1639698089-241732111.png,"[log, sky, tree-trunk, tree-foliage, other-obj..."


In [5]:
label_summary

Unnamed: 0,label,count
0,tree-foliage,9301
1,tree-trunk,9285
2,sky,8691
3,grass,8570
4,dirt,8084
5,log,4188
6,other-object,2081
7,structure,781
8,rock,474
9,water,424


In [7]:
import pandas as pd

# Load the image_label_analysis.csv file
df = pd.read_csv('/Users/push/Desktop/COMP9517/project/image_label_analysis.csv')

# Define the labels of interest
labels_of_interest = ["rock", "water", "gravel", "pole", "mud", "fence", "other-terrain", "bush", "vehicle"]

# Filter the DataFrame to include only rows with the labels of interest
filtered_df = df[df['labels'].apply(lambda x: any(label in x for label in labels_of_interest))]

# Explode the 'labels' column to count each label separately
exploded_labels = filtered_df['labels'].explode()

# Filter the exploded labels to include only the labels of interest
filtered_exploded_labels = exploded_labels[exploded_labels.isin(labels_of_interest)]

# Save the filtered DataFrame and the summary to CSV files
filtered_df.to_csv('filtered_image_label_analysis.csv', index=False)

# Print the number of selected images and the label summary
print(f"Total number of selected images: {filtered_df.shape[0]}")
print(filtered_df)

Total number of selected images: 1742
                    image_name  \
8     1624325509-188306684.png   
12    1624328914-422656580.png   
19    1624326606-522429513.png   
41    1624325539-673859954.png   
45    1624325983-350774940.png   
...                        ...   
9275  1639698453-949841165.png   
9288  1639700745-356607521.png   
9299  1639699726-569578861.png   
9301  1639700738-624872257.png   
9303  1639697854-660065083.png   

                                                 labels  
8     ['gravel', 'other-object', 'grass', 'structure...  
12    ['other-object', 'water', 'sky', 'tree-trunk',...  
19    ['grass', 'structure', 'rock', 'sky', 'dirt', ...  
41    ['gravel', 'grass', 'structure', 'sky', 'tree-...  
45    ['grass', 'rock', 'sky', 'dirt', 'tree-trunk',...  
...                                                 ...  
9275  ['log', 'grass', 'sky', 'dirt', 'pole', 'tree-...  
9288  ['grass', 'water', 'rock', 'dirt', 'tree-trunk...  
9299  ['other-object', 'grass',

In [8]:
filtered_df

Unnamed: 0,image_name,labels
8,1624325509-188306684.png,"['gravel', 'other-object', 'grass', 'structure..."
12,1624328914-422656580.png,"['other-object', 'water', 'sky', 'tree-trunk',..."
19,1624326606-522429513.png,"['grass', 'structure', 'rock', 'sky', 'dirt', ..."
41,1624325539-673859954.png,"['gravel', 'grass', 'structure', 'sky', 'tree-..."
45,1624325983-350774940.png,"['grass', 'rock', 'sky', 'dirt', 'tree-trunk',..."
...,...,...
9275,1639698453-949841165.png,"['log', 'grass', 'sky', 'dirt', 'pole', 'tree-..."
9288,1639700745-356607521.png,"['grass', 'water', 'rock', 'dirt', 'tree-trunk..."
9299,1639699726-569578861.png,"['other-object', 'grass', 'sky', 'dirt', 'pole..."
9301,1639700738-624872257.png,"['other-object', 'log', 'grass', 'rock', 'dirt..."


In [9]:
import pandas as pd
import ast

# Load the image_label_analysis.csv file
df = pd.read_csv('/Users/push/Desktop/COMP9517/project/filtered_image_label_analysis.csv')

# Convert the 'labels' column from string representation of list back to actual list
df['labels'] = df['labels'].apply(ast.literal_eval)

# Define the full set of labels
all_possible_labels = [
    "tree-foliage", "tree-trunk", "sky", "grass", "dirt",
    "log", "other-object", "structure", "rock", "water",
    "gravel", "pole", "mud", "fence", "other-terrain",
    "bush", "vehicle"
]

# Create a summary of all labels occurrences
all_labels = [label for sublist in df['labels'] for label in sublist if label in all_possible_labels]
label_summary = pd.Series(all_labels).value_counts().reset_index()
label_summary.columns = ["label", "count"]

# Save the summary to a CSV file
label_summary.to_csv('filtered_label_summary.csv', index=False)

# Print the label summary
print("Label Summary:")
print(label_summary)


Label Summary:
            label  count
0    tree-foliage   1742
1      tree-trunk   1733
2           grass   1672
3             sky   1613
4            dirt   1490
5             log    771
6    other-object    480
7            rock    474
8           water    424
9          gravel    418
10      structure    410
11           pole    354
12            mud    271
13          fence    194
14  other-terrain     84
15           bush     34
16        vehicle     25


copy data to dataset file
-

In [2]:
import os
import pandas as pd
import shutil

# Define paths
csv_path = "/Users/push/Desktop/COMP9517/project/filtered_image_label_analysis.csv"
target_path = "/Users/push/Desktop/COMP9517/project/dataset"
source_dirs = [
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/K-01/image",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/K-03/image",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-01/image",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-02/image",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-03/image"
]

# Load the CSV file
df = pd.read_csv(csv_path)

# Ensure the target directory exists
os.makedirs(target_path, exist_ok=True)

# Copy images
for image_name in df['image_name']:
    found = False
    for source_dir in source_dirs:
        source_path = os.path.join(source_dir, image_name)
        if os.path.exists(source_path):
            shutil.copy(source_path, target_path)
            found = True
            print(f"Copied {image_name} to {target_path}")
            break
    if not found:
        print(f"{image_name} not found in any source directories")


Copied 1624325509-188306684.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624328914-422656580.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624326606-522429513.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624325539-673859954.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624325983-350774940.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624325298-075235113.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624328441-063673271.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624326342-808313628.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624326572-352169988.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624328451-583272593.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624325450-763934629.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624325408-084579247.png to /Users/push/Desktop/COMP9517/project/dataset
Copied 1624328831-141635038.png to /User

Copy label to label_set file
-

In [3]:
import os
import pandas as pd
import shutil

# Define paths
csv_path = "/Users/push/Desktop/COMP9517/project/filtered_image_label_analysis.csv"
target_path = "/Users/push/Desktop/COMP9517/project/label_set"
source_dirs = [
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/K-01/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/K-03/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-01/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-02/label",
    "/Users/push/Desktop/COMP9517/project/61541v001/data/WildScenes/WildScenes2d/V-03/label"
]

# Load the CSV file
df = pd.read_csv(csv_path)

# Ensure the target directory exists
os.makedirs(target_path, exist_ok=True)

# Copy images
for image_name in df['image_name']:
    found = False
    for source_dir in source_dirs:
        source_path = os.path.join(source_dir, image_name)
        if os.path.exists(source_path):
            shutil.copy(source_path, target_path)
            found = True
            print(f"Copied {image_name} to {target_path}")
            break
    if not found:
        print(f"{image_name} not found in any source directories")


Copied 1624325509-188306684.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624328914-422656580.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624326606-522429513.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624325539-673859954.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624325983-350774940.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624325298-075235113.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624328441-063673271.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624326342-808313628.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624326572-352169988.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624328451-583272593.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624325450-763934629.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 1624325408-084579247.png to /Users/push/Desktop/COMP9517/project/label_set
Copied 162432883