In [None]:
# %pip install tqdm
# %pip install python-dotenv
# %pip install torch==2.4.0+cu118
# %pip install scikit_learn==1.2.2
# %pip install ipython
# %pip install pandas
# %pip install numpy
# %pip install matplotlib
# %pip install tabulate
# %pip install scipy
# %pip install git+https://github.com/Louis-Li-dev/ML_tool_kit

In [1]:
import os
import sys
parent_dir = os.path.join(os.getcwd(), '..')
if parent_dir not in sys.path: sys.path.append(parent_dir)
from utility.data_utils import *
from utility.visuals import *
from dotenv import load_dotenv
from model.CNN import ConditionalSegmentationVAE
from mkit.torch_support.tensor_utils import xy_to_tensordataset
from torch import nn
from IPython.display import clear_output
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
load_dotenv()
DATA_DIR = os.getenv("DATA_DIR")


- Dataset

In [None]:
if not os.path.exists(DATA_DIR): raise FileNotFoundError("Make sure the data directory is correctly placed.")

In [None]:
files = get_files(DATA_DIR)

return_list = []
file = files[0]
city_name = file.split('\\')[-1].split('.csv')[0].split('_')[0]

path_name = process_and_transform_data(file, resolution=.5, overwrite=True)
with open(path_name, 'rb') as f:
    result_dict = pickle.load(f)
labels = result_dict['labels']
encoder = result_dict['encoder']
MAX_LEN = result_dict['max length']
file_name = result_dict['file name']
WIDTH = result_dict['width']
HEIGHT = result_dict['height']

- x y splitting

In [None]:



unique_labels = [u for u in labels if np.array(np.where(u != 0)).T.shape[0] > 1]


train_labels, test_labels = train_test_split(np.expand_dims(np.array(unique_labels), axis=1), test_size=.2)


In [None]:


# Assuming 'train_labels' is the data you're transforming
pca = PCA(n_components=0.96)  # Retain 96% of variance for PCA
pca.fit(train_labels.reshape(train_labels.shape[0], -1))
transformed = pca.transform(train_labels.reshape(train_labels.shape[0], -1))



In [None]:

knn = NearestNeighbors(n_jobs=-1)
knn.fit(transformed)
features = []
for ele in transformed:
    indices = knn.kneighbors(ele.reshape(1, -1), n_neighbors=3, return_distance=False)[0]
    features.append(transformed[indices])
features = np.array(features)
features = features.reshape(features.shape[0], -1)

In [None]:

# Hyperparameters
noise_dim = 100
condition_dim = features.shape[-1]
batch_size = 32
num_epochs = 7000
lr = 0.0002  # Learning rate
betas = (0.5, 0.999)  # Beta parameters for Adam optimizer
output_dim = transformed.shape[1]


In [None]:
fig, axes = plt.subplots(1, 3, figsize=(10, 5))
axes = axes.flatten()
axes[0].imshow(test_labels[0][0])
x, y, _, _ = get_x_y(test_labels, MAX_LEN=MAX_LEN, encoder=encoder)
x = np.array(x)
y = np.array(y)
axes[1].imshow(x[0][0])
axes[2].imshow(y[0][0])
plt.show()

In [None]:

# Assuming 'train_labels' is the data you're transforming
test_pca = PCA(n_components=0.96)  # Retain 96% of variance for PCA
test_pca.fit(x.reshape(x.shape[0], -1))
test_transformed = pca.transform(x.reshape(x.shape[0], -1))
test_transformed.shape, x.shape


In [None]:

test_features = []
for ele in test_transformed:
    indices = knn.kneighbors(ele.reshape(1, -1), n_neighbors=3, return_distance=False)[0]
    test_features.append(transformed[indices])
test_features = np.array(test_features)
test_features = test_features.reshape(test_features.shape[0], -1)
test_features.shape

In [None]:
x.shape

In [None]:
test_input = torch.tensor(test_features).float().to(device)
index = 18
noise = torch.randn(test_input.shape[0], noise_dim, device=device)
output = G(noise, test_input).detach().cpu().numpy()
output_shaped = pca.inverse_transform(output).reshape(len(output), x.shape[-2], x.shape[-1])
output_shaped.shape


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter

# Assuming you already have 'output_shaped', 'x', and 'y' as inputs
output_shaped[output_shaped < 0] = 0

attempt = 1
plt.rcParams['font.family'] = 'Times New Roman'
fig, axes_list = plt.subplots(len(output_shaped), 3, figsize=(10, 100))

# Gaussian filter parameters (adjust these for your case)
sigma = 1.2  # Standard deviation for the Gaussian filter

for i in range(len(output_shaped)):
    axes = axes_list[i].flatten()

    # Apply Gaussian filter to spread out pixel values
    output_shaped_spread = gaussian_filter(output_shaped[i], sigma=sigma)

    axes[0].imshow(gaussian_filter(x[i][0], sigma=sigma))
    axes[1].imshow(gaussian_filter(y[i][0], sigma=sigma))
    axes[2].imshow(output_shaped_spread)

    if i == 0:
        axes[0].set_title('Input Itinerary', fontweight='bold')
        axes[1].set_title('Expected Itinerary', fontweight='bold')
        axes[2].set_title('Generated Itinerary (Spread)', fontweight='bold')

    for ax in axes:
        ax.set_xticks([])
        ax.set_yticks([])

# Save the figure with spread-out images
plt.savefig(f'../fig/gan_output/{i}_{attempt}_spread.png')


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter

# Assuming you already have 'output_shaped', 'x', and 'y' as inputs
output_shaped[output_shaped < 0] = 0

attempt = 1
plt.rcParams['font.family'] = 'Times New Roman'

# Adjust figure size for a more balanced layout
fig, axes_list = plt.subplots(len(output_shaped), 3, figsize=(15, len(output_shaped) * 5))

# Gaussian filter parameters (adjust these for your case)
sigma = 1.2  # Standard deviation for the Gaussian filter

for i in range(len(output_shaped)):
    axes = axes_list[i].flatten()

    # Apply Gaussian filter to spread out pixel values
    output_shaped_spread = gaussian_filter(output_shaped[i], sigma=sigma)
    
    # Apply Gaussian filter to input and expected images as well
    x_filtered = gaussian_filter(x[i][0], sigma=sigma)
    y_filtered = gaussian_filter(y[i][0] + x[i][0], sigma=sigma)

    # Display images with appropriate colormap
    im0 = axes[0].imshow(x_filtered, cmap='viridis')
    im1 = axes[1].imshow(y_filtered, cmap='viridis')
    im2 = axes[2].imshow(output_shaped_spread, cmap='viridis')

    # Titles with better readability
    if i == 0:
        axes[0].set_title('Input Itinerary', fontsize=14, fontweight='bold')
        axes[1].set_title('Expected Itinerary', fontsize=14, fontweight='bold')
        axes[2].set_title('Generated Itinerary (Spread)', fontsize=14, fontweight='bold')

    # Remove ticks for cleaner look
    for ax in axes:
        ax.set_xticks([])
        ax.set_yticks([])

    # Add colorbars for better understanding of intensity
    plt.colorbar(im0, ax=axes[0], shrink=0.6)
    plt.colorbar(im1, ax=axes[1], shrink=0.6)
    plt.colorbar(im2, ax=axes[2], shrink=0.6)

# Adjust the layout to avoid overlap
plt.tight_layout()

# Save the figure with high resolution (dpi = 300 for high-quality output)
plt.savefig(f'../fig/gan_output/{i}_{attempt}_spread.png', dpi=300)

# Show plot (optional)
# plt.show()


In [None]:

output_shaped[output_shaped < 0] = 0
attempt = 1
plt.rcParams['font.family'] = 'Times New Roman'
fig, axes_list = plt.subplots(len(output_shaped), 3, figsize=(10, 100))
for i in range(len(output_shaped)):
    axes = axes_list[i].flatten()

    axes[0].imshow(x[i][0])
    axes[1].imshow(y[i][0])
    axes[2].imshow(output_shaped[i])
    if i == 0:
        axes[0].set_title('input itinerary', fontweight='bold')
        axes[1].set_title('expected itinerary', fontweight='bold')
        axes[2].set_title('generated itinerary', fontweight='bold')
    for ax in axes:
        ax.set_xticks([])
        ax.set_yticks([])

plt.savefig(f'../fig/gan_output/{i}_{attempt}.png')
