In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sklearnex import patch_sklearn
patch_sklearn()

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from pathlib import Path
import h5py
import pickle
from tqdm.notebook import tqdm
import pandas as pd

from sklearn.model_selection import train_test_split
from skimage.transform import resize
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

from src.data.file_utils import GetTV
from src.models import model, predict

from IPython.display import clear_output

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [3]:
flatten = lambda x : x.reshape(len(x), -1)

In [4]:
models = predict.list_models()
for idx, model in enumerate(models):
    print(idx, model)

0 ../models/weighted_outer_allold_set.pkl
1 ../models/lr_tv_inv_outer.pkl
2 ../models/weighted_outer_all.pkl
3 ../models/weighted_outer_allnew_half_set.pkl
4 ../models/lr_inversion_manual.pkl
5 ../models/lr.pkl
6 ../models/lr_supervised_mdl.pkl


# Data Prep

In [14]:
run_type = 'l-mode'
prep_filename = 'weighted_outer_dataset_' + run_type
prediction_filename = 'weighted_outer_' + run_type
algorithm = 'linear'
split_ratio = 0.2

In [15]:
tv_path = Path('../data/raw/tv_images') / run_type
label_path = Path('../data/labels/weighted_emission') / run_type
prep_path = Path('../data/processed/hdf5')
model_path = Path('../models')

In [16]:
tv = GetTV(tv_path)
files = tv.list_files()
file_lengths = tv.file_lengths()
cumulative_lengths = np.insert(np.cumsum(file_lengths), 0, 0)
tv_dim = tv.load(files[0], 'vid').shape

### Fill Out TV Image Data

In [17]:
def file_split_compile_prep(split_ratio,
                      prep_path,
                      prep_filename,
                      label_path,
                      files):
    # split by file
    train_files, test_files = train_test_split(files, test_size=split_ratio, random_state=182)
    [print(file.stem) for file in train_files]
    print('-------------------')
    [print(file.stem) for file in test_files]

    points_train = []
    tv_train = []
    points_test = []
    tv_test = []

    for _, file in enumerate(train_files):
        frames = tv.load(file, 'frames').astype('int')
        tv_image = tv.load(file, 'vid')[frames]
        
        test_label_file = (label_path / file.stem).with_suffix('.pkl')
        with open(test_label_file, 'rb') as f:
            labels = pickle.load(f)
            
        for i in range(len(frames)):
            tv_train.append(tv_image[i])
            points_train.append(labels[i])
            
    for _, file in enumerate(test_files):
        frames = tv.load(file, 'frames').astype('int')
        tv_image = tv.load(file, 'vid')[frames]
        
        test_label_file = (label_path / file.stem).with_suffix('.pkl')
        with open(test_label_file, 'rb') as f:
            labels = pickle.load(f)
            
        for i in range(len(frames)):
            tv_test.append(tv_image[i])
            points_test.append(labels[i])
            
    with h5py.File(prep_path / str(prep_filename + '.h5'), 'w') as f:
        f.create_dataset('vid_train', data=tv_train)
        f.create_dataset('points_train', data=points_train)
        f.create_dataset('vid_test', data=tv_test)
        f.create_dataset('points_test', data=points_test)
    print(str(prep_path / str(prep_filename + '.h5')),'has been saved!')

In [18]:
file_split_compile_prep(split_ratio, prep_path, prep_filename, label_path, files)

emission_structure_pu_cam240perp_190109
emission_structure_pu_cam240perp_190110
emission_structure_pu_cam240perp_190116
emission_structure_pu_cam240perp_199166
emission_structure_pu_cam240perp_199172
emission_structure_pu_cam240perp_190113
emission_structure_pu_cam240perp_199353
emission_structure_pu_cam240perp_199354
-------------------
emission_structure_pu_cam240perp_190114
emission_structure_pu_cam240perp_190115
../data/processed/hdf5/weighted_outer_dataset_l-mode.h5 has been saved!


# Train Test Deploy

## Linear Regression

### Train

In [69]:
file_name = prep_filename + '.h5'

with h5py.File(prep_path / file_name, 'r') as f:
    vid_train = f['vid_train'][:]
    points_train = f['points_train'][:]
    vid_test = f['vid_test'][:]
    points_test = f['points_test'][:]

files = tv.list_files()
elevation = tv.load(files[0], 'elevation')[0]
radii = tv.load(files[0], 'radii')[0]
vid_shape = tv.load(files[0], 'vid')[0].shape

In [70]:
X_train = flatten(vid_train)
X_test = flatten(vid_test)
y_train = points_train # flatten(points_train)
y_test = points_test # flatten(points_test)

In [71]:
mdl = LinearRegression()
mdl.fit(X_train, y_train)

### Test

In [72]:
# mdl = pickle.load(open('../models/weighted_outer_allold_set.pkl', 'rb'))
r_predict = mdl.predict(X_test)
# real_predict = np.array([radii[np.round(r_predict[:,0]).astype(int)],elevation[np.round(r_predict[:,1]).astype(int)]]).T * 100 # from meter to cm (dimension is n x 2)
# real_y = np.array([radii[np.round(y_test[:,0]).astype(int)],elevation[np.round(y_test[:,1]).astype(int)]]).T * 100
err = mean_absolute_error(r_predict,y_test) * 100
print(err)
# z_err = np.abs(real_predict[:,1] - real_y[:,1])
# print(f"RMS (cm) : {np.sqrt(np.square(dist).mean())}")
# print(f"Z RMS (cm) : {np.sqrt(np.square(z_err).mean())}")

7.900937986032498


In [73]:
print(np.sqrt(mean_squared_error(r_predict,y_test))*100)

11.0915888712356


### Deploy

old_set = before FY2024

In [8]:
ml_id = ''

In [61]:
pickle.dump(mdl, open(f"{model_path / prediction_filename}{ml_id}.pkl", 'wb'))

In [9]:
weight_ml_point_save_path = Path('../data/processed/weight_ml_point') / ml_id
weight_ml_point_save_path.mkdir(parents=True, exist_ok=True)

In [10]:
mdl = pickle.load(open(f"{model_path / prediction_filename}{ml_id}.pkl", 'rb'))

In [17]:
for file in files:
    
    clear_output(wait=True)
    
    print('Shot:', file.stem.split('_')[-1])
    point_save_name = weight_ml_point_save_path / f"{file.stem}.pkl"
    label_file = (label_path / file.stem).with_suffix('.pkl')
    with open(label_file, 'rb') as f:
        labels_cartesian = pickle.load(f)
    frames = tv.load(file, 'frames').astype('int')
    tv_image = tv.load(file, 'vid')[frames]
    tv_flatten = flatten(tv_image)
    
    prediction_cartesian = mdl.predict(tv_flatten)
    txt_file = (label_path / file.stem).with_suffix('.txt')
    with open(txt_file, 'w') as f:
        for point in prediction_cartesian:
            f.write(f"{point},")
    print(f"Saved {txt_file}")
    # pickle.dump(prediction_cartesian, open(point_save_name, 'wb'))

Shot: 199354
Saved ../data/labels/weighted_emission/all/emission_structure_pu_cam240perp_199354.txt


In [18]:
with open(label_file, 'rb') as f:
    labels_cartesian = pickle.load(f)
    print(len(labels_cartesian))

119


## Convolutional

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-09-03 18:58:15.900847: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-03 18:58:16.178173: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-03 18:58:16.250618: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-03 18:58:16.280191: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-03 18:58:16.454569: I tensorflow/core/platform/cpu_feature_guar

In [None]:
file_name = prep_filename + '.h5'

with h5py.File(prep_path / file_name, 'r') as f:
    vid_train = f['vid_train'][:]
    points_train = f['points_train'][:]
    vid_test = f['vid_test'][:]
    points_test = f['points_test'][:]

files = tv.list_files()
elevation = tv.load(files[0], 'elevation')[0]
radii = tv.load(files[0], 'radii')[0]
vid_shape = tv.load(files[0], 'vid')[0].shape

In [None]:
# Define the model
model = Sequential([
    InputLayer(input_shape=(720, 480, 1)),  # Input shape for grayscale image
    Conv2D(32, (3, 3), activation='relu'),  # First Conv layer with ReLU
    Conv2D(64, (3, 3), activation='relu'),  # Second Conv layer with ReLU
    MaxPooling2D(pool_size=(2, 2)),         # Max pooling layer with 2x2 pool size
    Dropout(0.5),                           # Dropout layer with 50% dropout rate
    Flatten(),                              # Flatten the output
    Dense(128, activation='relu'),          # First fully connected layer with ReLU
    Dropout(0.5),                           # Another Dropout layer
    Dense(2)                                # Second fully connected layer (output layer)
])

# Compile the model using Adam optimizer and Mean Squared Error loss
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error', 
              metrics=['mse'])

In [None]:
# Set up data generators for data augmentation and training
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32)
test_generator = test_datagen.flow(X_test, y_test, batch_size=32)

In [None]:
# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

# Evaluate the model on the test set
test_loss, test_mse = model.evaluate(test_generator)

print(f'Test loss (MSE): {test_loss}')
print(f'Test MSE: {test_mse}')

# Save Video

In [12]:
def nearest_index(array, value):
    """Find the index of the nearest value in an array."""
    return (np.abs(array - value)).argmin()

def get_index(arr, coord):
    # Get the insertion indices
    ind = np.searchsorted(coord, arr)

    # Correct the indices to point to the nearest actual index
    ind = np.clip(ind, 0, len(coord) - 1)

    # Now, adjust the indices to get the closest value
    for i, cval in enumerate(arr):
        if ind[i] > 0 and abs(cval - coord[ind[i] - 1]) < abs(cval - coord[ind[i]]):
            ind[i] -= 1
            
    return ind

In [13]:
flatten = lambda x : x.reshape(len(x), -1)

In [18]:
tv = GetTV(tv_path)
files = tv.list_files()
file_lengths = tv.file_lengths()
cumulative_lengths = np.insert(np.cumsum(file_lengths), 0, 0)
tv_dim = tv.load(files[0], 'vid').shape

with open(f"{model_path / prediction_filename}.pkl", 'rb') as f:
    mdl = pickle.load(f)
    
mp4_save_path = Path('../outputs/video/weighted_ml') / run_type
mp4_save_path.mkdir(parents=True, exist_ok=True)

In [23]:
for file in files:
    
    clear_output(wait=True)
    
    print('Shot:', file.stem.split('_')[-1])
    mp4_save_name = mp4_save_path / f"{file.stem.split('_')[-1]}.mp4"
    label_file = (label_path / file.stem).with_suffix('.pkl')
    with open(label_file, 'rb') as f:
        labels_cartesian = pickle.load(f)
    frames = tv.load(file, 'frames').astype('int')
    tv_image = tv.load(file, 'vid')[frames]
    tv_flatten = flatten(tv_image)
    inverted = tv.load(file, 'inverted')
    elevation = tv.load(file, 'elevation')[0]
    
    prediction_cartesian = mdl.predict(tv_flatten)
    
    prediction = get_index(prediction_cartesian, elevation, )
    labels = get_index(labels_cartesian, elevation)

    # Initialize figure and axes
    print("Animating...")
    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    
    img = ax.imshow(inverted[0], origin='lower')
    hline_label = ax.axhline(labels[0], c='lime', label='label')
    hline_prediction = ax.axhline(prediction[0], c='red', label='prediction', ls='--')
    ax.legend(loc='upper right')
    ax.set_title(f'Inverted View: 0')

    fig.suptitle(f"Shot {file.stem.split('_')[-1]}")
    frames = []
    # Function to update the plot
    def update(idx):
        img.set_data(inverted[idx])
        
        hline_label.set_ydata([labels[idx]])
        hline_prediction.set_ydata([prediction[idx]])
        
        ax.set_title(f'Inverted View: {idx}')
        
        return img, hline_label, hline_prediction
        
    # Create the animation using FuncAnimation
    ani = animation.FuncAnimation(fig, update, frames=range(inverted.shape[0]), blit=True, repeat=False)

    # Save the animation as an MP4 file
    print("Saving MP4...")
    FFwriter = animation.FFMpegWriter(fps=30, extra_args=["-vcodec", "libx264"])
    ani.save(mp4_save_name, writer=FFwriter)

    plt.close(fig)

Shot: 199354
Animating...
Saving MP4...
