## Import libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from wild_time_data import available_time_steps
from wild_time_data import load_dataset, num_outputs
import random
random.seed(21100)

In [None]:
list_labels = ('airport', 'airport hangar', 'airport terminal', 'amusement park', 'aquaculture', 'archaeological site', 'barn', 
                   'border checkpoint', 'burial site', 'car dealership', 'construction site', 'crop field', 'dam', 'debris/rubble', 
                   'educational institution', 'electric substation', 'factory/powerplant', 'fire station', 'flooded road', 'fountain', 
                   'gas station', 'golf course', 'ground transportation station', 'helipad', 'hospital', 'impoverished settlement', 
                   'interchange', 'lake/pond', 'lighthouse', 'military facility', 'multi-unit residential', 'nuclear powerplant', 
                   'office building', 'oil/gas facility', 'park', 'parking lot/garage', 'place of worship', 'police station', 'port', 
                   'prison', 'race track', 'railway bridge', 'recreational facility', 'road bridge', 'runway', 'shipyard', 'shopping mall', 
                   'single-unit residential', 'smokestack', 'solar farm', 'space facility', 'stadium', 'storage tank', 'surface mine', 
                   'swimming pool', 'toll booth', 'tower', 'tunnel opening', 'waste disposal', 'water treatment facility', 'wind farm', 'zoo')
labels = {}
for i, name in enumerate(list_labels):
    labels[i] = name

## Work with fmow

In [None]:
print(available_time_steps("fmow"))
final_length = 0
for i in range(6,16):
    data=load_dataset(dataset_name="fmow", split="train", time_step=i, data_dir=r"Dataset", transform=lambda x :x)
    print(data.size)
    final_length += data.size
print(final_length)
for i in range(10):
    print(data[i][1].item())
print(data[0][0].shape)
# images are of class numpy.ndarray with shape (224, 224, 3)

In [None]:
rand_img = random.randrange(data.size)

img = data[rand_img][0]
plt.imshow(img.astype('uint8'))
plt.show()
print(labels[data[rand_img][1].item()])

In [None]:
chosen_year = 2013
data = load_dataset(dataset_name="fmow", split="train", time_step=chosen_year-2002, data_dir=r"Dataset", transform=lambda x :x)
num_output = num_outputs('fmow')
vec = [[] for _ in range(num_output)]
lab = [[] for _ in range(num_output)]

for i in range(num_output):
    matrices_to_append = []
    lab_to_append = []
    for j in range(data.size):
        if(data[j][1].item()==i):
            matrices_to_append.append(data[j][0])
            lab_to_append.append(data[j][1].item())
    vec[i].append(matrices_to_append)
    lab[i].append(lab_to_append)
# We divide images by their label in a chosen year (can be done on whole dataset)
print(data.size)

In [None]:
for k in range(num_output):
    if(len(lab[k][0])>0):
        img = vec[k][0][0]
        plt.imshow(img.astype('uint8'))
        plt.show()
        print(labels[lab[k][0][0]])

# Plot one image per each label

In [None]:
label = 10
if(len(lab[label][0])>0):
        index = len(lab[label][0])
        for k in range(index):
            img = vec[label][0][k]
            plt.imshow(img.astype('uint8'))
            plt.show()
            print(labels[lab[label][0][k]])

# Plot all images for a given label

Processing images

In [None]:
data = []
size = []
for i in available_time_steps("fmow"):
    data.append(load_dataset(dataset_name="fmow", split="train", time_step=i, data_dir=r"Dataset", transform=lambda x :x))
    size.append(data[i].size)
    #data[i] + load_dataset(dataset_name="fmow", split="test", time_step=i, data_dir=r"Feature Extraction\Wild-Time-Data\converter", transform=lambda x :x)
print(size)

In [None]:
from matplotlib.patches import Patch
# Define the number of bars
num_bars = len(size)
years = range(2002, 2002+num_bars)

# Create a color list based on the condition
colors = ['darkorange'] * 6 + ['lightblue'] * (num_bars - 6)


plt.figure(figsize=(13, 6))
# Create the histogram
plt.bar(x=years, height=size, color=colors)

# Add labels and title
plt.xlabel('Year', fontsize=16)
plt.ylabel('Number of samples', fontsize=16)
plt.title('Labeled images per year', fontsize=18)

handles = [Patch(color='darkorange', label='Initialization Samples'),
           Patch(color='lightblue', label='Training Samples')]

# Add legend
plt.legend(handles=handles, loc='upper right', framealpha=1, fontsize=13)

# Show the plot
plt.yticks(fontsize=14)
plt.xticks(years, fontsize=14)  # Set x-ticks to be the range of bars

filepath = f'figures/Initial data/data_distribution.eps'
plt.savefig(filepath, format='eps', dpi=1200)

In [None]:
from collections import defaultdict

# Step 1: Find all unique labels in the dataset across all years
all_labels = set()
for year_data in data:
    for sample in year_data:
        all_labels.add(sample[1].item())  # Collecting the label

all_labels = sorted(all_labels)  # Sort labels alphabetically (or numerically if applicable)

# Step 2: Create the list of lists (data) to hold the count of samples per label per year
label_counts_per_year = []

for year_data in data:
    # Initialize a dictionary to count samples per label for this year
    label_count = defaultdict(int)
    
    # Count occurrences of each label in this year
    for sample in year_data:
        label = sample[1].item()
        label_count[label] += 1
    
    # Create a list for this year where each entry is the count of a label in `all_labels`
    counts_for_this_year = [label_count[label] for label in all_labels]
    label_counts_per_year.append(counts_for_this_year)

# Step 3: Transpose the result to have labels as rows and years as columns
data = np.array(label_counts_per_year).T  # Transpose the list

In [None]:
years = range(2002, 2018)  # List of years
labels = {}
for i, name in enumerate(list_labels):
    labels[i] = name

# Convert the data into a NumPy array for easy manipulation
data = np.array(data)

size = np.array(size)

proportions = data / size

# Number of categories (labels)
num_labels = len(labels)

# Create an array of indices for the x-axis (one for each category)
x = np.arange(num_labels)

# Colors for each year
colors = plt.colormaps['tab20b'].colors[:len(years)]  # Get distinct colors for years

# Initialize a figure
fig, ax = plt.subplots(figsize=(15, 9))

# Plot each year's data as a stacked bar
for i in range(len(years)):
    if i == 0:
        # Plot the base of the bar (first year)
        ax.bar(x, proportions[:, i], color=colors[i], label=str(years[i]))
    else:
        # Add subsequent years on top
        ax.bar(x, proportions[:, i], bottom=np.sum(proportions[:, :i], axis=1), color=colors[i], label=str(years[i]))

# Set x-ticks to show the category labels
ax.set_xticks(x)
ax.set_xticklabels(labels.values(), rotation=90, fontsize=14)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)
# Add labels and title
ax.set_xlabel('Labels', fontsize=16)
ax.set_ylabel('Proportion of Samples', fontsize=16)
ax.set_title('Proportion of Samples per Label Divided by Year', fontsize=18)

ax.legend(title='Year', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=13, title_fontsize=14)  # Place the legend outside the plot

plt.tight_layout()

filepath = f'figures/Initial data/proportion_labels.eps'
plt.savefig(filepath, format='eps', dpi=1200)

In [None]:
years = range(2002, 2018)  # List of years
labels = {}
for i, name in enumerate(list_labels):
    labels[i] = name

# Convert the data into a NumPy array for easy manipulation
data = np.array(data)

# Number of categories (labels)
num_labels = len(labels)

# Create an array of indices for the x-axis (one for each category)
x = np.arange(num_labels)

# Colors for each year
colors = plt.colormaps['tab20b'].colors[:len(years)]  # Get distinct colors for years

# Initialize a figure
fig, ax = plt.subplots(figsize=(15, 9))

# Plot each year's data as a stacked bar
for i in range(len(years)):
    if i == 0:
        # Plot the base of the bar (first year)
        ax.bar(x, data[:, i], color=colors[i], label=str(years[i]))
    else:
        # Add subsequent years on top
        ax.bar(x, data[:, i], bottom=np.sum(data[:, :i], axis=1), color=colors[i], label=str(years[i]))

# Set x-ticks to show the category labels
ax.set_xticks(x)
ax.set_xticklabels(labels.values(), rotation=90, fontsize=14)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)
# Add labels and title
ax.set_xlabel('Labels', fontsize=16)
ax.set_ylabel('Number of Samples', fontsize=16)
ax.set_title('Number of Samples per Label Divided by Year', fontsize=18)

ax.legend(title='Year', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=13, title_fontsize=14)  # Place the legend outside the plot

plt.tight_layout()

filepath = f'figures/Initial data/absolute_value_labels.eps'
plt.savefig(filepath, format='eps', dpi=1200)

In [None]:
np.sum(data, axis=1)