# Notebook 5: Model Compression
## Introduction
# Loads models from Notebook 3, compresses for h=6 (chosen for balance), evaluates trade-offs.
# Justification: Dynamic/Float16/Int quantization for LSTM; param reduction/feature selection for RF. Ensures sustainability (lower energy) while maintaining ~95% accuracy.


In [1]:
import pandas as pd
import tensorflow as tf
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import mean_absolute_error, mean_squared_error
import os
import time

chosen_h = 6
df = pd.read_csv('/content/drive/MyDrive/sus-lsa/featured_data.csv', index_col=0, parse_dates=True)
train_size = int(len(df) * 0.8)
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]
features = [col for col in df.columns if col not in ['pm25_value', 'target']]
X_train = train_df[features]
y_train = train_df['target']
X_test = test_df[features]
y_test = test_df['target']

# LSTM Compression
lstm = tf.keras.models.load_model(f'/content/drive/MyDrive/sus-lsa/lstm_model_h{chosen_h}.h5', custom_objects={'mse': tf.keras.metrics.MeanSquaredError()})
# Dynamic Quant
converter = tf.lite.TFLiteConverter.from_keras_model(lstm)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()
with open(f'/content/drive/MyDrive/sus-lsa/lstm_dynamic_h{chosen_h}.tflite', 'wb') as f: f.write(tflite_model)
# Float16 Quant
converter = tf.lite.TFLiteConverter.from_keras_model(lstm)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
tflite_float16 = converter.convert()
with open(f'/content/drive/MyDrive/sus-lsa/lstm_float16_h{chosen_h}.tflite', 'wb') as f: f.write(tflite_float16)
# Int Quant (third for robustness)
converter = tf.lite.TFLiteConverter.from_keras_model(lstm)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
def representative_dataset():
    for _ in range(100): yield [X_test.sample(1).values.astype(np.float32)]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.SELECT_TF_OPS]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_int = converter.convert()
with open(f'/content/drive/MyDrive/sus-lsa/lstm_int_h{chosen_h}.tflite', 'wb') as f: f.write(tflite_int)

# Evaluate (example for dynamic; repeat for others)
interpreter = tf.lite.Interpreter(model_path=f'/content/drive/MyDrive/sus-lsa/lstm_dynamic_h{chosen_h}.tflite')
interpreter.allocate_tensors()
preds = []
start = time.time()
for x in X_test.values.reshape(-1, 1, len(features)):
    interpreter.set_tensor(interpreter.get_input_details()[0]['index'], x.astype(np.float32))
    interpreter.invoke()
    preds.append(interpreter.get_tensor(interpreter.get_output_details()[0]['index']))
mae = mean_absolute_error(y_test, preds)
size = os.path.getsize(f'/content/drive/MyDrive/sus-lsa/lstm_dynamic_h{chosen_h}.tflite') / (1024*1024)
print(f'MAE: {mae}, Size: {size} MB, Time: {time.time() - start}s')

# RF Compression
rf = joblib.load(f'/content/drive/MyDrive/sus-lsa/rf_model_h{chosen_h}.pkl')
# Feature Selection
selector = SelectFromModel(rf, prefit=True)
X_train_sel = selector.transform(X_train)
X_test_sel = selector.transform(X_test)
rf_sel = RandomForestRegressor(n_estimators=50)  # Reduced estimators
rf_sel.fit(X_train_sel, y_train)
joblib.dump(rf_sel, f'/content/drive/MyDrive/sus-lsa/rf_sel_h{chosen_h}.pkl')
# Param Reduction
rf_red = RandomForestRegressor(n_estimators=50, max_depth=10)
rf_red.fit(X_train, y_train)
joblib.dump(rf_red, f'/content/drive/MyDrive/sus-lsa/rf_red_h{chosen_h}.pkl')
# Evaluate similarly

pd.DataFrame(compression_results).to_csv('/content/drive/MyDrive/sus-lsa/compression_results.csv')  # Justification: Compares size/accuracy trade-offs for sustainable AI.




Saved artifact at '/tmp/tmp0a972k12'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 1, 18), dtype=tf.float32, name='input_layer_2')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  137282919367888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137282919370768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137282919370960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137284215124240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137282919370384: TensorSpec(shape=(), dtype=tf.resource, name=None)


ConverterError: Could not translate MLIR to FlatBuffer.<unknown>:0: error: loc(callsite(callsite(fused["CudnnRNNV3:", "sequential_2_1/lstm_2_1/CudnnRNNV3@__inference_function_357"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_388"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): 'tf.CudnnRNNV3' op is neither a custom op nor a flex op
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]): called from
<unknown>:0: note: loc(callsite(callsite(fused["CudnnRNNV3:", "sequential_2_1/lstm_2_1/CudnnRNNV3@__inference_function_357"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_388"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): see current operation: %1:5 = "tf.CudnnRNNV3"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = f32, device = "", direction = "unidirectional", dropout = 0.000000e+00 : f32, input_mode = "linear_input", is_training = true, num_proj = 0 : i64, rnn_mode = "lstm", seed = 0 : i64, seed2 = 0 : i64, time_major = false} : (tensor<?x1x18xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<14000xf32>, tensor<?xi32>) -> (tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<*xf32>, tensor<*xi8>)
<unknown>:0: note: loc(callsite(callsite(fused["CudnnRNNV3:", "sequential_2_1/lstm_2_1/CudnnRNNV3@__inference_function_357"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_388"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): Error code: ERROR_NEEDS_CUSTOM_OPS
<unknown>:0: error: failed while converting: 'main': 
Some ops in the model are custom ops, See instructions to implement custom ops: https://www.tensorflow.org/lite/guide/ops_custom 
Custom ops: CudnnRNNV3
Details:
	tf.CudnnRNNV3(tensor<?x1x18xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<14000xf32>, tensor<?xi32>) -> (tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<*xf32>, tensor<*xi8>) : {T = f32, device = "", direction = "unidirectional", dropout = 0.000000e+00 : f32, input_mode = "linear_input", is_training = true, num_proj = 0 : i64, rnn_mode = "lstm", seed = 0 : i64, seed2 = 0 : i64, time_major = false}

<unknown>:0: note: see current operation: 
"func.func"() <{arg_attrs = [{tf_saved_model.index_path = ["input_layer_2"]}], function_type = (tensor<?x1x18xf32>) -> tensor<?x1xf32>, res_attrs = [{tf_saved_model.index_path = ["output_0"]}], sym_name = "main"}> ({
^bb0(%arg0: tensor<?x1x18xf32>):
  %0 = "arith.constant"() <{value = dense<[-1, 50]> : tensor<2xi32>}> : () -> tensor<2xi32>
  %1 = "arith.constant"() <{value = dense_resource<__elided__> : tensor<1x50xf32>}> : () -> tensor<1x50xf32>
  %2 = "arith.constant"() <{value = dense_resource<__elided__> : tensor<14000xf32>}> : () -> tensor<14000xf32>
  %3 = "arith.constant"() <{value = dense<0.0462510623> : tensor<1xf32>}> : () -> tensor<1xf32>
  %4 = "arith.constant"() <{value = dense<1> : tensor<i32>}> : () -> tensor<i32>
  %5 = "arith.constant"() <{value = dense<50> : tensor<i32>}> : () -> tensor<i32>
  %6 = "arith.constant"() <{value = dense<0.000000e+00> : tensor<f32>}> : () -> tensor<f32>
  %7 = "arith.constant"() <{value = dense<1> : tensor<1xi32>}> : () -> tensor<1xi32>
  %8 = "arith.constant"() <{value = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32>
  %9 = "tfl.shape"(%arg0) : (tensor<?x1x18xf32>) -> tensor<3xi32>
  %10 = "tfl.strided_slice"(%9, %8, %7, %7) <{begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 1 : i32}> : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<i32>
  %11 = "tfl.reshape"(%10, %7) : (tensor<i32>, tensor<1xi32>) -> tensor<1xi32>
  %12 = "tfl.fill"(%11, %4) : (tensor<1xi32>, tensor<i32>) -> tensor<?xi32>
  %13 = "tfl.pack"(%10, %5) <{axis = 0 : i32, values_count = 2 : i32}> : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
  %14 = "tfl.fill"(%13, %6) : (tensor<2xi32>, tensor<f32>) -> tensor<?x50xf32>
  %15 = "tfl.expand_dims"(%14, %4) : (tensor<?x50xf32>, tensor<i32>) -> tensor<?x1x50xf32>
  %16:5 = "tfl.custom_tf"(%arg0, %15, %15, %2, %12) ({
  ^bb0(%arg1: tensor<?x1x18xf32>, %arg2: tensor<?x1x50xf32>, %arg3: tensor<?x1x50xf32>, %arg4: tensor<14000xf32>, %arg5: tensor<?xi32>):
    %19:5 = "tf.CudnnRNNV3"(%arg1, %arg2, %arg3, %arg4, %arg5) {T = f32, device = "", direction = "unidirectional", dropout = 0.000000e+00 : f32, input_mode = "linear_input", is_training = true, num_proj = 0 : i64, rnn_mode = "lstm", seed = 0 : i64, seed2 = 0 : i64, time_major = false} : (tensor<?x1x18xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<14000xf32>, tensor<?xi32>) -> (tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<*xf32>, tensor<*xi8>)
    "tfl.yield"(%19#0, %19#1, %19#2, %19#3, %19#4) : (tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<*xf32>, tensor<*xi8>) -> ()
  }) {T = f32, device = "", direction = "unidirectional", dropout = 0.000000e+00 : f32, input_mode = "linear_input", is_training = true, num_proj = 0 : i64, rnn_mode = "lstm", seed = 0 : i64, seed2 = 0 : i64, time_major = false} : (tensor<?x1x18xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<14000xf32>, tensor<?xi32>) -> (tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<?x1x50xf32>, tensor<*xf32>, tensor<*xi8>)
  %17 = "tfl.reshape"(%16#1, %0) : (tensor<?x1x50xf32>, tensor<2xi32>) -> tensor<?x50xf32>
  %18 = "tfl.fully_connected"(%17, %1, %3) <{fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"}> : (tensor<?x50xf32>, tensor<1x50xf32>, tensor<1xf32>) -> tensor<?x1xf32>
  "func.return"(%18) : (tensor<?x1xf32>) -> ()
}) {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_layer_2:0", outputs = "StatefulPartitionedCall_1:0"}, tf_saved_model.exported_names = ["serving_default"]} : () -> ()
