In [None]:
import os
import sys
parent_dir = os.path.join(os.getcwd(), '..')
if parent_dir not in sys.path: sys.path.append(parent_dir)
from utility.data_utils import *
from utility.visuals import *
from model.dimensional_reduction import *
from model.gans import *
from model.rf import *
from model.dnn import *
from model.knn import *
from dotenv import load_dotenv
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from scipy.ndimage import gaussian_filter
from utility.eval import *
from utility.functional import *
load_dotenv()
DATA_DIR = os.getenv("DATA_DIR")
PCA_N_COMPONENTS = float(os.getenv("PCA_N_COMPONENTS"))
NOISE_DIM = int(os.getenv("NOISE_DIM"))
BATCH_SIZE = int(os.getenv("BATCH_SIZE"))
NUM_EPOCHS = int(os.getenv("NUM_EPOCHS"))
NUM_EPOCHS = 1


files = get_files(DATA_DIR)
file = files[0]
city_name = file.split('\\')[-1].split('.csv')[0].split('_')[0]

path_name = process_and_transform_data(file, resolution=.5, overwrite=True)
with open(path_name, 'rb') as f:
    result_dict = pickle.load(f)
labels = result_dict['labels']
encoder = result_dict['encoder']
MAX_LEN = result_dict['max length']
file_name = result_dict['file name']
WIDTH = result_dict['width']
HEIGHT = result_dict['height']
unique_labels = [u for u in labels if np.array(np.where(u != 0)).T.shape[0] > 1]
train_labels, test_labels = train_test_split(np.expand_dims(np.array(unique_labels), axis=1), test_size=.2)
train_labels = train_labels.reshape(len(train_labels), -1)

original dataset size: 238
dataset size with duplicates removed: 172


In [25]:
dr = DimensionalityReducer(
    method='tsne'
)
transformed_tsne = dr.fit_and_transform(train_labels.reshape(train_labels.shape[0], -1))

In [26]:

knn = KNNFeaturesExtractor()
knn.fit(transformed_tsne)
features = knn.predict(transformed_tsne)
features = features.reshape(len(features), -1)
OUTPUT_DIM = train_labels.shape[1]
CONDITION_DIM = features.shape[-1]
x, y, _, _ = get_x_y(test_labels, MAX_LEN=MAX_LEN, encoder=encoder)
x = np.array(x)
y = np.array(y)
test_pca = DimensionalityReducer(method='tsne')
test_transformed = test_pca.fit_and_transform(
    x.reshape(x.shape[0], -1)
)
test_features = knn.predict(test_transformed)
test_features = test_features.reshape(len(test_features), -1)

In [27]:

knn_manager = KNNManager(n_neighbors=1)
knn_manager.train(features, train_labels)
dnn_manager = DNNManager(
    input_dim=CONDITION_DIM, 
    output_dim=OUTPUT_DIM, 
    batch_size=BATCH_SIZE, 
    num_epochs=NUM_EPOCHS,
    lr=3e-4,
    betas=(.5, 0.999)
)
dnn_manager.train(features, train_labels, verbose=1)

gan_manager = GANManager(
    noise_dim=NOISE_DIM,
    condition_dim=features.shape[-1],
    output_dim=train_labels.shape[1],
    batch_size=BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    lr=0.0002,
    betas = (.5, 0.999),
)
gan_manager.train(features, train_labels, verbose=1)

rf_manager = RandomForestManager()
rf_manager.train(features, train_labels)

knn_manager = KNNManager(n_neighbors=1)
knn_manager.train(features, train_labels)


100%|██████████| 1/1 [00:00<00:00, 23.97it/s]
100%|██████████| 1/1 [00:00<00:00,  7.59it/s]


Train MSE: 0.9980


In [28]:

gan_output = gan_manager.predict(test_features).reshape(
    len(test_features), WIDTH, HEIGHT
)
gan_output[gan_output < 0] = 0
dnn_output = dnn_manager.predict(test_features).reshape(
    len(test_features), WIDTH, HEIGHT
)
dnn_output[dnn_output < 0] = 0
rf_output = rf_manager.predict(test_features).reshape(
    len(test_features), WIDTH, HEIGHT
)
knn_output = knn_manager.predict(test_features).reshape(
    len(test_features), WIDTH, HEIGHT
)


In [29]:

SIGMA = 1
# Apply Gaussian filter to each 2D image in a set of images
def apply_gaussian(images, sigma):
    # images: numpy array with shape (n_samples, height, width)
    return np.array([gaussian_filter(img, sigma=sigma) for img in images])

# Apply filter to each model's output
gan_filtered = apply_gaussian(gan_output, sigma=SIGMA)
dnn_filtered = apply_gaussian(dnn_output, sigma=SIGMA)
rf_filtered  = apply_gaussian(rf_output, sigma=SIGMA)
knn_filtered = apply_gaussian(knn_output, sigma=SIGMA)
act = x + y
act = act[:, 0, :]
# Ensure the ground truth 'act' is filtered per image (if not already)
act_filtered = np.array([gaussian_filter(img, sigma=SIGMA) for img in act])


# Calculate average metrics for each model
jsd_gan = np.mean([compute_jsd(act_filtered[i], gan_filtered[i]) for i in range(len(act_filtered))])
jsd_dnn = np.mean([compute_jsd(act_filtered[i], dnn_filtered[i]) for i in range(len(act_filtered))])
jsd_rf  = np.mean([compute_jsd(act_filtered[i], rf_filtered[i]) for i in range(len(act_filtered))])
jsd_knn = np.mean([compute_jsd(act_filtered[i], knn_filtered[i]) for i in range(len(act_filtered))])

rmse_gan = np.mean([compute_rmse(act_filtered[i], gan_filtered[i]) for i in range(len(act_filtered))])
rmse_dnn = np.mean([compute_rmse(act_filtered[i], dnn_filtered[i]) for i in range(len(act_filtered))])
rmse_rf  = np.mean([compute_rmse(act_filtered[i], rf_filtered[i]) for i in range(len(act_filtered))])
rmse_knn = np.mean([compute_rmse(act_filtered[i], knn_filtered[i]) for i in range(len(act_filtered))])

# Create a DataFrame to store the evaluation metrics
results_df = pd.DataFrame({
    "Model": ["GAN", "DNN", "RF", "KNN"],
    "Jensen-Shannon Divergence": [jsd_gan, jsd_dnn, jsd_rf, jsd_knn],
    "RMSE": [rmse_gan, rmse_dnn, rmse_rf, rmse_knn],
    "Filtered Output": [gan_filtered, dnn_filtered, rf_filtered, knn_filtered]
})

print(results_df)


  Model  Jensen-Shannon Divergence      RMSE  \
0   GAN                   0.652091  0.160944   
1   DNN                   0.645012  0.125711   
2    RF                   0.406379  0.031906   
3   KNN                   0.420430  0.033988   

                                     Filtered Output  
0  [[[0.017950632, 0.042251308, 0.09515086, 0.121...  
1  [[[0.08780075, 0.13276954, 0.1299285, 0.123073...  
2  [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...  
3  [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...  


In [30]:
results_df.drop(["Filtered Output"], axis=1)

Unnamed: 0,Model,Jensen-Shannon Divergence,RMSE
0,GAN,0.652091,0.160944
1,DNN,0.645012,0.125711
2,RF,0.406379,0.031906
3,KNN,0.42043,0.033988
