In [1]:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
print(f"GPU Available: {len(gpus) > 0}")
if gpus:
    print(f"GPU Type: {gpus[0]}")
else:
    print("❌ NO GPU!")
    print("Fix: Go to Runtime → Change runtime type → Select 'GPU' → Save")

GPU Available: True
GPU Type: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [2]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive')
print("✓ Mounted at:", os.getcwd())

# Create DeepVision folder
os.makedirs('DeepVision', exist_ok=True)
os.chdir('DeepVision')
print("✓ Working directory:", os.getcwd())

Mounted at /content/drive
✓ Mounted at: /content/drive/MyDrive
✓ Working directory: /content/drive/MyDrive/DeepVision


In [11]:
training_code = '''#!/usr/bin/env python3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import sys
import pickle
import numpy as np
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

print("\\n" + "="*80)
print("CSRNet - GPU TRAINING (Google Colab)")
print("="*80)

gpus = tf.config.list_physical_devices('GPU')
print(f"\\nGPU Devices: {len(gpus)}")

CONFIG = {
    'batch_size': 32,
    'epochs': 100,
    'learning_rate': 1e-3,
    'weight_decay': 1e-5,
    'data_path': 'processed_dataset_fixed/part_A_fixed.pkl',
    'results_path': 'results/csrnet_gpu_trained',
}

# LOAD DATA
print("\\n[LOADING DATA]")

if not Path(CONFIG['data_path']).exists():
    print(f"ERROR: {CONFIG['data_path']} not found")
    sys.exit(1)

with open(CONFIG['data_path'], 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train'].astype('float32')
y_density_train = data['y_density_train'][..., np.newaxis].astype('float32')
X_test = data['X_test'].astype('float32')
y_count_test = data['y_count_test']

print(f"✓ Training: X{X_train.shape}, y{y_density_train.shape}")
print(f"✓ Testing:  X{X_test.shape}")

# BUILD MODEL
print("\\n[BUILDING MODEL]")

model = keras.Sequential([
    layers.Input(shape=(256, 256, 3)),

    # Block 1
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.MaxPooling2D(2),

    # Block 2
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.MaxPooling2D(2),

    # Block 3
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    # layers.MaxPooling2D(2), # REMOVED THIS LAYER TO MATCH 64x64 GROUND TRUTH

    # Block 4
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),

    # Backend (dilated)
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(256, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(128, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),
    layers.Conv2D(64, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation='relu'),

    layers.Conv2D(1, 1, padding='same', activation='relu'),
], name='CSRNet')

optimizer = keras.optimizers.Adam(learning_rate=CONFIG['learning_rate'])
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

print(f"✓ Parameters: {model.count_params():,}")

# CALLBACKS
callbacks = [
    EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6, verbose=1),
]

# TRAIN
print("\\n[TRAINING]")
print(f"Batch size: {CONFIG['batch_size']}")
print(f"Max epochs: {CONFIG['epochs']}\\n")

history = model.fit(
    X_train, y_density_train,
    batch_size=CONFIG['batch_size'],
    epochs=CONFIG['epochs'],
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1,
)

# EVALUATE
print("\\n" + "="*80)
print("[EVALUATION]")
print("="*80)

predictions = []
for i in range(0, len(X_test), 50):
    batch = X_test[i:i+50]
    pred_batch = model.predict(batch, verbose=0)
    predictions.extend([p.sum() for p in pred_batch])

predictions = np.array(predictions)

mae = np.mean(np.abs(predictions - y_count_test))
rmse = np.sqrt(np.mean((predictions - y_count_test) ** 2))

print(f"\\nRESULTS:")
print(f"  MAE:   {mae:.2f} (Target: 70-150)")
print(f"  RMSE:  {rmse:.2f} (Target: 100-200)")

mae_pass = 70 <= mae <= 150
rmse_pass = 100 <= rmse <= 200

print(f"\\nTARGET ACHIEVEMENT:")
print(f"  MAE  [70-150]:    {'PASS' if mae_pass else 'FAIL'}")
print(f"  RMSE [100-200]:   {'PASS' if rmse_pass else 'FAIL'}")

print(f"\\nFirst 25 predictions:")
for i in range(min(25, len(predictions))):
    error = abs(predictions[i] - y_count_test[i])
    print(f"  {i+1:2d}: pred={predictions[i]:7.0f}, true={y_count_test[i]:7.0f}, error={error:6.0f}")

# SAVE
results_dir = Path(CONFIG['results_path'])
results_dir.mkdir(parents=True, exist_ok=True)

with open(results_dir / 'results.pkl', 'wb') as f:
    pickle.dump({
        'mae': float(mae),
        'rmse': float(rmse),
        'predictions': predictions.tolist(),
        'ground_truth': y_count_test.tolist(),
        'model_params': model.count_params(),
        'mae_pass': bool(mae_pass),
        'rmse_pass': bool(rmse_pass),
    }, f)

model.save(results_dir / 'model.keras')

print(f"\\n✓ Results saved to: {results_dir}")
print("="*80)

if mae_pass and rmse_pass:
    print("\\n✓✓✓ SUCCESS! TARGETS ACHIEVED! ✓✓✓\\n")
else:
    print("\\n⚠ Check results above\\n")
'''

with open('train_gpu_colab.py', 'w') as f:
    f.write(training_code)

print("✓ Training script created: train_gpu_colab.py")
print("✓ Ready for data upload")

✓ Training script created: train_gpu_colab.py
✓ Ready for data upload


In [6]:
import os
path = 'processed_dataset_fixed/part_A_fixed.pkl'
if os.path.exists(path):
    size_mb = os.path.getsize(path) / (1024**2)
    print(f"✓ Data ready! Size: {size_mb:.0f} MB")
else:
    print(f"❌ File not found: {path}")
    print("Upload part_A_fixed.pkl to: DeepVision/processed_dataset_fixed/")

✓ Data ready! Size: 369 MB


In [12]:
!python train_gpu_colab.py


2025-12-06 13:33:13.197237: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765027993.216847   10407 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765027993.222953   10407 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765027993.238684   10407 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765027993.238708   10407 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765027993.238714   10407 computation_placer.cc:177] computation placer alr

In [17]:
training_code = '''#!/usr/bin/env python3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import sys
import pickle
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

print("\\n" + "="*80)
print("CSRNet - ADVANCED GPU TRAINING")
print("="*80)

gpus = tf.config.list_physical_devices('GPU')
print(f"\\nGPU: {len(gpus) > 0}")

# Mixed precision for GPU
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)
print("Mixed precision: ENABLED")

CONFIG = {
    'batch_size': 16,
    'epochs': 200,
    'learning_rate': 1e-4,
    'weight_decay': 5e-5,
    'data_path': 'processed_dataset_fixed/part_A_fixed.pkl',
    'results_path': 'results/csrnet_advanced_gpu',
}

# LOAD DATA
print("\\n[LOADING DATA]")

with open(CONFIG['data_path'], 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train'].astype('float32')
y_density_train = data['y_density_train'][..., np.newaxis].astype('float32')
X_test = data['X_test'].astype('float32')
y_count_test = data['y_count_test']

# Split data
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_density_train, test_size=0.15, random_state=42)

print(f"✓ Train: {X_tr.shape}, Val: {X_val.shape}")
print(f"✓ Test: {X_test.shape}")

# BUILD MODEL with BatchNorm
print("\\n[BUILDING MODEL]")

model = keras.Sequential([
    layers.Input(shape=(256, 256, 3)),

    # Block 1
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(2),
    layers.Dropout(0.2),

    # Block 2
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(2),
    layers.Dropout(0.2),

    # Block 3
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    # layers.MaxPooling2D(2), # REMOVED THIS LAYER TO MATCH 64x64 GROUND TRUTH
    layers.Dropout(0.2),

    # Block 4
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    # Backend - Dilated
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(128, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(64, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    layers.Conv2D(1, 1, padding='same', activation='relu'),
], name='CSRNet-Advanced')

# Custom loss: MSE + MAE
def combined_loss(y_true, y_pred):
    mse = tf.keras.losses.mse(y_true, y_pred)
    mae = tf.keras.losses.mae(y_true, y_pred)
    return 0.7 * mse + 0.3 * mae

optimizer = optimizers.Adam(learning_rate=CONFIG['learning_rate'])
model.compile(optimizer=optimizer, loss=combined_loss, metrics=['mae', 'mse'])
print(f"✓ Parameters: {model.count_params():,}")

# PHASE 1: Warm-up
print("\\n[PHASE 1: WARM-UP TRAINING]")

callbacks1 = [
    EarlyStopping(monitor='val_mae', patience=30, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_mae', factor=0.7, patience=15, min_lr=1e-7, verbose=1),
]

history1 = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    batch_size=CONFIG['batch_size'],
    epochs=100,
    callbacks=callbacks1,
    verbose=1,
)

# PHASE 2: Fine-tuning with lower LR
print("\\n[PHASE 2: FINE-TUNING]")

optimizer2 = optimizers.Adam(learning_rate=CONFIG['learning_rate'] / 5)
model.compile(optimizer=optimizer2, loss=combined_loss, metrics=['mae', 'mse'])

callbacks2 = [
    EarlyStopping(monitor='val_mae', patience=50, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_mae', factor=0.8, patience=20, min_lr=1e-8, verbose=1),
]

history2 = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    batch_size=CONFIG['batch_size'] * 2,
    epochs=100,
    callbacks=callbacks2,
    verbose=1,
)

# EVALUATE
print("\\n" + "="*80)
print("[EVALUATION]")
print("="*80)

predictions = []
for i in range(0, len(X_test), 32):
    batch = X_test[i:i+32]
    pred_batch = model.predict(batch, verbose=0)
    predictions.extend([max(0, p.sum()) for p in pred_batch])

predictions = np.array(predictions)

mae = np.mean(np.abs(predictions - y_count_test))
rmse = np.sqrt(np.mean((predictions - y_count_test) ** 2))

print(f"\\nRESULTS:")
print(f"  MAE:   {mae:.2f} (Target: 70-150)")
print(f"  RMSE:  {rmse:.2f} (Target: 100-200)")

mae_pass = 70 <= mae <= 150
rmse_pass = 100 <= rmse <= 200

print(f"\\nTARGET ACHIEVEMENT:")
print(f"  MAE  [70-150]:    {'PASS' if mae_pass else 'FAIL'}")
print(f"  RMSE [100-200]:   {'PASS' if rmse_pass else 'FAIL'}")

print(f"\\nFirst 25 predictions:")
for i in range(min(25, len(predictions))):
    error = abs(predictions[i] - y_count_test[i])
    print(f"  {i+1:2d}: pred={predictions[i]:7.0f}, true={y_count_test[i]:7.0f}, error={error:6.0f}")

# SAVE
results_dir = Path(CONFIG['results_path'])
results_dir.mkdir(parents=True, exist_ok=True)

with open(results_dir / 'results.pkl', 'wb') as f:
    pickle.dump({
        'mae': float(mae),
        'rmse': float(rmse),
        'predictions': predictions.tolist(),
        'ground_truth': y_count_test.tolist(),
        'model_params': model.count_params(),
        'mae_pass': bool(mae_pass),
        'rmse_pass': bool(rmse_pass),
    }, f)

model.save(results_dir / 'model.keras')

print(f"\\n✓ Results saved to: {results_dir}")
print("="*80)

if mae_pass and rmse_pass:
    print("\\n✓✓✓ SUCCESS! TARGETS ACHIEVED! ✓✓✓\\n")
else:
    print("\\n⚠ Check results - may need further optimization\\n")
'''

with open('train_gpu_advanced.py', 'w') as f:
    f.write(training_code)

print("✓ Advanced training script created")

✓ Advanced training script created


In [18]:
!python train_gpu_advanced.py

2025-12-06 14:16:43.214285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765030603.248765   23207 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765030603.259955   23207 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765030603.284005   23207 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765030603.284043   23207 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765030603.284050   23207 computation_placer.cc:177] computation placer alr

In [23]:
training_code = '''#!/usr/bin/env python3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import sys, pickle, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

print("\\n" + "="*80)
print("CSRNet - ULTIMATE SOLUTION")
print("="*80)

gpus = tf.config.list_physical_devices('GPU')
print(f"\\nGPU: {len(gpus) > 0}")

CONFIG = {
    'batch_size': 8,
    'epochs': 300,
    'learning_rate': 5e-5,
    'weight_decay': 1e-4,
    'data_path': 'processed_dataset_fixed/part_A_fixed.pkl',
    'results_path': 'results/csrnet_ultimate_ensemble',
}

# LOAD DATA
print("\\n[LOADING DATA]")

with open(CONFIG['data_path'], 'rb') as f:
    data = pickle.load(f)

X_train_raw = data['X_train'].astype('float32')
y_density_train_raw = data['y_density_train'][..., np.newaxis].astype('float32')
X_test_raw = data['X_test'].astype('float32')
y_count_test = data['y_count_test']

# Per-image normalization
print("Normalizing per-image...")
X_train_norm = np.zeros_like(X_train_raw)
X_test_norm = np.zeros_like(X_test_raw)

for i in range(len(X_train_raw)):
    for c in range(3):
        X_train_norm[i,:,:,c] = (X_train_raw[i,:,:,c] - X_train_raw[i,:,:,c].mean()) / (X_train_raw[i,:,:,c].std() + 1e-5)

for i in range(len(X_test_raw)):
    for c in range(3):
        X_test_norm[i,:,:,c] = (X_test_raw[i,:,:,c] - X_test_raw[i,:,:,c].mean()) / (X_test_raw[i,:,:,c].std() + 1e-5)

y_density_train_norm = np.zeros_like(y_density_train_raw)
for i in range(len(y_density_train_raw)):
    max_val = y_density_train_raw[i].max()
    if max_val > 0:
        y_density_train_norm[i] = y_density_train_raw[i] / max_val

X_train, X_test = X_train_norm, X_test_norm
y_density_train = y_density_train_norm

print(f"✓ Train: {X_train.shape}, Test: {X_test.shape}")

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_density_train, test_size=0.1, random_state=42)
print(f"✓ Split: train={X_tr.shape[0]}, val={X_val.shape[0]}")

# BUILD MODEL
print("\\n[BUILDING MODEL]")

model = keras.Sequential([
    layers.Input(shape=(256, 256, 3)),

    # Block 1
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(2),
    layers.Dropout(0.3),

    # Block 2
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(2),
    layers.Dropout(0.3),

    # Block 3
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    # layers.MaxPooling2D(2),
    layers.Dropout(0.3),

    # Block 4
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    # Backend
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Dropout(0.2),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Dropout(0.2),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(128, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(64, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    layers.Conv2D(1, 1, padding='same', activation='relu'),
], name='CSRNet-Ultimate')

# Robust loss
def robust_loss(y_true, y_pred):
    huber = tf.keras.losses.Huber(delta=0.5)
    mae = tf.keras.losses.mae
    return 0.8 * huber(y_true, y_pred) + 0.2 * mae(y_true, y_pred)

optimizer = optimizers.Adam(learning_rate=CONFIG['learning_rate'])
model.compile(optimizer=optimizer, loss=robust_loss, metrics=['mae', 'mse'])
print(f"✓ Parameters: {model.count_params():,}")

# TRAIN
print("\\n[ULTRA-CONSERVATIVE TRAINING]")
print(f"Batch: {CONFIG['batch_size']}, LR: {CONFIG['learning_rate']}, Epochs: {CONFIG['epochs']}\\n")

callbacks = [
    EarlyStopping(monitor='val_mae', patience=50, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_mae', factor=0.9, patience=20, min_lr=1e-8, verbose=1),
]

history = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    batch_size=CONFIG['batch_size'],
    epochs=CONFIG['epochs'],
    callbacks=callbacks,
    verbose=1,
)

# EVALUATE
print("\\n" + "="*80)
print("[EVALUATION WITH POST-PROCESSING]")
print("="*80)

predictions_normalized = []
for i in range(0, len(X_test), 16):
    batch = X_test[i:i+16]
    pred_batch = model.predict(batch, verbose=0)
    predictions_normalized.extend([max(0, p.sum()) for p in pred_batch])

predictions_normalized = np.array(predictions_normalized)

# Denormalize
predictions = np.zeros_like(predictions_normalized)
for i in range(len(predictions_normalized)):
    max_density = y_density_train_raw[i % len(y_density_train_raw)].max()
    predictions[i] = predictions_normalized[i] * max_density

# Post-processing
q25 = np.percentile(predictions, 25)
q75 = np.percentile(predictions, 75)
iqr = q75 - q25
predictions_clipped = np.clip(predictions, q25 - 1.5*iqr, q75 + 1.5*iqr)

mae = np.mean(np.abs(predictions_clipped - y_count_test))
rmse = np.sqrt(np.mean((predictions_clipped - y_count_test) ** 2))

print(f"\\nRESULTS:")
print(f"  MAE:   {mae:.2f} (Target: 70-150)")
print(f"  RMSE:  {rmse:.2f} (Target: 100-200)")

mae_pass = 70 <= mae <= 150
rmse_pass = 100 <= rmse <= 200

print(f"\\nTARGET ACHIEVEMENT:")
print(f"  MAE  [70-150]:    {'PASS' if mae_pass else 'FAIL'}")
print(f"  RMSE [100-200]:   {'PASS' if rmse_pass else 'FAIL'}")

print(f"\\nFirst 25 predictions:")
for i in range(min(25, len(predictions_clipped))):
    error = abs(predictions_clipped[i] - y_count_test[i])
    print(f"  {i+1:2d}: pred={predictions_clipped[i]:7.0f}, true={y_count_test[i]:7.0f}, error={error:6.0f}")

# SAVE
results_dir = Path(CONFIG['results_path'])
results_dir.mkdir(parents=True, exist_ok=True)

with open(results_dir / 'results.pkl', 'wb') as f:
    pickle.dump({
        'mae': float(mae),
        'rmse': float(rmse),
        'predictions': predictions_clipped.tolist(),
        'ground_truth': y_count_test.tolist(),
        'model_params': model.count_params(),
        'mae_pass': bool(mae_pass),
        'rmse_pass': bool(rmse_pass),
    }, f)

model.save(results_dir / 'model.keras')

print(f"\\n✓ Results saved to: {results_dir}")
print("="*80)

if mae_pass and rmse_pass:
    print("\\n✓✓✓ SUCCESS! TARGETS ACHIEVED! ✓✓✓\\n")
else:
    print(f"\\nResults: MAE {mae:.2f}, RMSE {rmse:.2f}\\n")
'''

with open('train_gpu_ultimate.py', 'w') as f:
    f.write(training_code)

print("✓ Ultimate training script created")

✓ Ultimate training script created


In [24]:
!python train_gpu_ultimate.py

2025-12-06 16:05:22.263601: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765037122.292898   52812 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765037122.300908   52812 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765037122.316851   52812 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765037122.316886   52812 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765037122.316891   52812 computation_placer.cc:177] computation placer alr

In [28]:
training_code = '''#!/usr/bin/env python3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import sys, pickle, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

print("\\n" + "="*80)
print("CSRNet - OPTIMIZED ADVANCED")
print("="*80)

gpus = tf.config.list_physical_devices('GPU')
print(f"\\nGPU: {len(gpus) > 0}")

CONFIG = {
    'batch_size': 16,
    'epochs': 150,
    'learning_rate': 8e-5,
    'weight_decay': 5e-5,
    'data_path': 'processed_dataset_fixed/part_A_fixed.pkl',
    'results_path': 'results/csrnet_optimized_advanced',
}

# LOAD
print("\\n[LOADING DATA]")
with open(CONFIG['data_path'], 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train'].astype('float32')
y_density_train = data['y_density_train'][..., np.newaxis].astype('float32')
X_test = data['X_test'].astype('float32')
y_count_test = data['y_count_test']

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_density_train, test_size=0.15, random_state=42)

print(f"✓ Train: {X_tr.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# MODEL
print("\\n[BUILDING MODEL]")

model = keras.Sequential([
    layers.Input(shape=(256, 256, 3)),

    # Block 1
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(64, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(2),
    layers.Dropout(0.2),

    # Block 2
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(128, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(2),
    layers.Dropout(0.2),

    # Block 3
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    # layers.MaxPooling2D(2), # REMOVED THIS LAYER TO MATCH 64x64 GROUND TRUTH
    layers.Dropout(0.2),

    # Block 4
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(512, 3, padding='same', kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    # Backend
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Dropout(0.15),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Dropout(0.15),
    layers.Conv2D(512, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(256, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(128, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.Conv2D(64, 3, padding='same', dilation_rate=2, kernel_regularizer=regularizers.l2(CONFIG['weight_decay']), activation=None),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    layers.Conv2D(1, 1, padding='same', activation='relu'),
], name='CSRNet-Optimized-Advanced')

def combined_loss(y_true, y_pred):
    mse = tf.keras.losses.mse(y_true, y_pred)
    mae = tf.keras.losses.mae(y_true, y_pred)
    return 0.7 * mse + 0.3 * mae

optimizer = optimizers.Adam(learning_rate=CONFIG['learning_rate'])
model.compile(optimizer=optimizer, loss=combined_loss, metrics=['mae', 'mse'])
print(f"✓ Parameters: {model.count_params():,}")

# PHASE 1
print("\\n[PHASE 1: INITIAL TRAINING]\\n")

callbacks = [
    EarlyStopping(monitor='val_mae', patience=25, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_mae', factor=0.6, patience=12, min_lr=1e-6, verbose=1),
]

history1 = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    batch_size=CONFIG['batch_size'],
    epochs=75,
    callbacks=callbacks,
    verbose=1,
)

# PHASE 2
print("\\n[PHASE 2: FINE-TUNING]\\n")

optimizer2 = optimizers.Adam(learning_rate=CONFIG['learning_rate'] / 3)
model.compile(optimizer=optimizer2, loss=combined_loss, metrics=['mae', 'mse'])

callbacks2 = [
    EarlyStopping(monitor='val_mae', patience=30, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_mae', factor=0.7, patience=15, min_lr=1e-7, verbose=1),
]

history2 = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    batch_size=CONFIG['batch_size'],
    epochs=75,
    callbacks=callbacks2,
    verbose=1,
)

# EVALUATE
print("\\n" + "="*80)
print("[EVALUATION ON TEST SET]")
print("="*80)

predictions = []
for i in range(0, len(X_test), 50):
    batch = X_test[i:i+50]
    pred_batch = model.predict(batch, verbose=0)
    predictions.extend([max(0, p.sum()) for p in pred_batch])

predictions = np.array(predictions)

mae = np.mean(np.abs(predictions - y_count_test))
rmse = np.sqrt(np.mean((predictions - y_count_test) ** 2))

print(f"\\nRESULTS:")
print(f"  MAE:   {mae:.2f} (Target: 70-150)")
print(f"  RMSE:  {rmse:.2f} (Target: 100-200)")

mae_pass = 70 <= mae <= 150
rmse_pass = 100 <= rmse <= 200

print(f"\\nTARGET ACHIEVEMENT:")
print(f"  MAE  [70-150]:    {'PASS' if mae_pass else 'FAIL'}")
print(f"  RMSE [100-200]:   {'PASS' if rmse_pass else 'FAIL'}")

# SAVE
results_dir = Path(CONFIG['results_path'])
results_dir.mkdir(parents=True, exist_ok=True)

with open(results_dir / 'results.pkl', 'wb') as f:
    pickle.dump({
        'mae': float(mae),
        'rmse': float(rmse),
        'predictions': predictions.tolist(),
        'ground_truth': y_count_test.tolist(),
        'model_params': model.count_params(),
        'mae_pass': bool(mae_pass),
        'rmse_pass': bool(rmse_pass),
    }, f)

model.save(results_dir / 'model.keras')

print(f"\\n✓ Results saved to: {results_dir}")
print("="*80)

if mae_pass and rmse_pass:
    print("\\n✓✓✓ SUCCESS! TARGETS ACHIEVED! ✓✓✓\\n")
else:
    print(f"\\nResults: MAE {mae:.2f}, RMSE {rmse:.2f}\\n")
'''

with open('train_gpu_advanced_v2.py', 'w') as f:
    f.write(training_code)

print("✓ Optimized Advanced training script created")

✓ Optimized Advanced training script created


In [29]:
!python train_gpu_advanced_v2.py

2025-12-06 16:41:38.272004: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765039298.291575   63569 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765039298.297644   63569 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765039298.312443   63569 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765039298.312472   63569 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765039298.312476   63569 computation_placer.cc:177] computation placer alr

In [31]:
training_code = '''#!/usr/bin/env python3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import pickle, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping

print("\\n" + "="*80)
print("CSRNet - DIRECT APPROACH")
print("="*80)

gpus = tf.config.list_physical_devices('GPU')
print(f"GPU: {len(gpus) > 0}\\n")

CONFIG = {
    'batch_size': 32,
    'epochs': 200,
    'learning_rate': 1e-4,
    'data_path': 'processed_dataset_fixed/part_A_fixed.pkl',
    'results_path': 'results/csrnet_direct',
}

print("[LOADING DATA]")
with open(CONFIG['data_path'], 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train'].astype('float32') / 255.0
y_density_train = data['y_density_train'].astype('float32')
X_test = data['X_test'].astype('float32') / 255.0
y_count_test = data['y_count_test'].astype('float32')

y_count_train = y_density_train.reshape(len(y_density_train), -1).sum(axis=1)

print(f"Train: {X_train.shape}, counts: {y_count_train.min():.0f}-{y_count_train.max():.0f}")

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_count_train, test_size=0.2, random_state=42)

print(f"Split: train={X_tr.shape[0]}, val={X_val.shape[0]}\\n")

print("[BUILDING MODEL]")

model = keras.Sequential([
    layers.Input(shape=(256, 256, 3)),

    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2),

    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2),

    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2),

    layers.Conv2D(256, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(256, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2),

    layers.Conv2D(512, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(512, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),

    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='relu'),
], name='CSRNet-Direct')

optimizer = optimizers.Adam(learning_rate=CONFIG['learning_rate'])
model.compile(optimizer=optimizer, loss='mae', metrics=['mae', 'mse'])

print(f"Parameters: {model.count_params():,}\\n")

print("[TRAINING]\\n")

callbacks = [
    EarlyStopping(monitor='val_mae', patience=40, restore_best_weights=True, verbose=1),
]

history = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    batch_size=CONFIG['batch_size'],
    epochs=CONFIG['epochs'],
    callbacks=callbacks,
    verbose=1,
)

print("\\n" + "="*80)
print("[EVALUATION]")
print("="*80)

predictions = model.predict(X_test, verbose=0).flatten()
predictions = np.maximum(predictions, 0)

mae = np.mean(np.abs(predictions - y_count_test))
rmse = np.sqrt(np.mean((predictions - y_count_test) ** 2))

print(f"\\nRESULTS:")
print(f"  MAE:   {mae:.2f} (Target: 70-150)")
print(f"  RMSE:  {rmse:.2f} (Target: 100-200)")

mae_pass = 70 <= mae <= 150
rmse_pass = 100 <= rmse <= 200

print(f"\\nTARGET ACHIEVEMENT:")
print(f"  MAE  [70-150]:    {'PASS' if mae_pass else 'FAIL'}")
print(f"  RMSE [100-200]:   {'PASS' if rmse_pass else 'FAIL'}")

results_dir = Path(CONFIG['results_path'])
results_dir.mkdir(parents=True, exist_ok=True)

with open(results_dir / 'results.pkl', 'wb') as f:
    pickle.dump({
        'mae': float(mae),
        'rmse': float(rmse),
        'predictions': predictions.tolist(),
        'ground_truth': y_count_test.tolist(),
        'model_params': model.count_params(),
        'mae_pass': bool(mae_pass),
        'rmse_pass': bool(rmse_pass),
    }, f)

model.save(results_dir / 'model.keras')

print(f"\\n✓ Saved to: {results_dir}")
print("="*80)

if mae_pass and rmse_pass:
    print("\\n✓✓✓ SUCCESS! TARGETS ACHIEVED! ✓✓✓\\n")
else:
    print(f"\\nMAE {mae:.2f}, RMSE {rmse:.2f}\\n")
'''

with open('train_gpu_direct.py', 'w') as f:
    f.write(training_code)

print("✓ Direct training script created")

✓ Direct training script created


In [32]:
!python train_gpu_direct.py

2025-12-06 17:52:57.810582: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765043577.831343   82786 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765043577.837527   82786 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765043577.853833   82786 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765043577.853858   82786 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765043577.853864   82786 computation_placer.cc:177] computation placer alr