In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, resnet50
from tensorflow.keras.preprocessing import image
from tqdm import tqdm

In [None]:
# CONFIG
path='/content/drive/MyDrive/14648881b93c11f0/dataset/Train/'
CSV_PATH = path+"sliders.csv"       # adjust path
IMAGE_DIR = path+"images"           # images named <id>.jpg / .png

IMG_SIZE = 256
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

In [None]:
import os
# Define the output directory path within your mounted Google Drive
OUT_EMB_DIR = path+'embeddings_tf'

# Create the folder
os.makedirs(OUT_EMB_DIR, exist_ok=True)  # where to save per-id embeddings (.npy)


In [None]:
df = pd.read_csv(CSV_PATH)
ids = df['id_global'].astype(str).tolist()

In [None]:
df.head()

Unnamed: 0,copyCreationTime,captureTime,touchTime,id_global,grayscale,aperture,flashFired,focalLength,isoSpeedRating,shutterSpeed,Temperature,Tint,currTemp,currTint
0,-63113817600,2024-01-14T16:56:50.67,741426608.1,C68C8010-495C-4427-9F4D-664C2D71EFAD,0,2.970854,1,100.0,1250,7.965784,4150,2,6317,4
1,-63113817600,2023-05-28T20:08:51.87,741426607.1,8EFC0EC0-0936-41CC-81BD-513B35D2CB23,0,7.61471,1,24.0,500,3.0,4700,4,5767,13
2,-63113817600,2023-06-09T20:54:30.13,741426607.2,4A28220F-024E-4637-80ED-B4533578AFEB,0,3.61471,1,125.0,1000,7.321928,5000,12,5496,6
3,-63113817600,2022-06-10T13:02:12.98,741426606.5,05A76E40-9B2C-40FD-95D4-EF976598640C,0,2.970854,0,40.0,320,7.965784,3150,9,3730,12
4,-63113817600,2023-09-30T17:08:05,741426607.8,B63A179E-232C-4133-BB24-8784B60DECEE,0,2.970854,0,55.0,800,7.643856,3633,4,3661,-6


In [None]:
# Build model (ResNet50 without top, global avg pool)
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE, IMG_SIZE,3))
base_model.trainable = False  # we just want pretrained features

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
from PIL import Image
def load_tiff_image(img_path):
    """
    Uses Pillow (PIL) to open TIFF images safely and convert to RGB.
    Handles grayscale or multi-channel TIFFs.
    """
    img = Image.open(img_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize((IMG_SIZE, IMG_SIZE))
    img_array = np.array(img, dtype=np.float32)
    img_array = resnet50.preprocess_input(img_array)
    return img_array

In [None]:
ids

['C68C8010-495C-4427-9F4D-664C2D71EFAD',
 '8EFC0EC0-0936-41CC-81BD-513B35D2CB23',
 '4A28220F-024E-4637-80ED-B4533578AFEB',
 '05A76E40-9B2C-40FD-95D4-EF976598640C',
 'B63A179E-232C-4133-BB24-8784B60DECEE',
 'D625DC86-F48E-47B1-B0CB-9C047392F51C',
 'CCEAEBCF-2519-4B65-A823-F56B7298BBA7',
 '8042A8A4-2248-40F6-A2A6-7F4CD0A3E777',
 '6757810D-5447-49C8-8FAA-171EA8FC7886',
 '8AC002DD-18F9-481F-B9C0-447C639727F5',
 'B242DC6E-8AE6-4C20-8D54-AC9ECD8DEA39',
 '35844446-9880-44B3-9489-2FEC5C7A3F7E',
 '975F13D1-2EA2-4F85-A7DB-C0AFBC9E60FC',
 '10F7C552-211B-4D2A-B5A9-AE89CA471F10',
 '59B525CD-A83C-4BBF-A16C-FFAF03EC4C1D',
 '420FD19F-A144-47D4-AE21-3E9754AA6599',
 '0E7927BC-3BBD-4944-9881-1257008CCC58',
 '7F254DE9-579B-4689-AC37-094C0112FAA9',
 'F9239C10-58EF-41F9-BFA5-19A7DDE90C39',
 '4C31447A-4871-4B73-B2B4-0E9E48C45711',
 'C24E2B55-3542-4ABE-AC30-4261A2DBCCF6',
 'C7F0A894-145B-487C-8498-EAC598A13CC1',
 '547D4043-1BD5-47D0-98A9-112423E5E814',
 'FDD73466-ED4B-4A82-8526-CF58737856A1',
 '1CDE6C4A-1491-

In [None]:
# Build model for embedding extraction
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False

In [None]:
# Collect (id, image_path) pairs
id_paths = []
for id_ in ids:
    tiff_path = os.path.join(IMAGE_DIR, f"{id_}.tiff")
    if not os.path.exists(tiff_path):
        tiff_path = os.path.join(IMAGE_DIR, f"{id_}.tif")  # in case of .tif extension
    if os.path.exists(tiff_path):
        id_paths.append((id_, tiff_path))
    else:
        print(f"[WARN] No TIFF image found for id {id_}, skipping.")

In [None]:
# Iterate in batches and extract embeddings
for i in tqdm(range(0, len(id_paths), BATCH_SIZE)):
    batch = id_paths[i:i + BATCH_SIZE]
    imgs = np.stack([load_tiff_image(p) for (_, p) in batch], axis=0)  # shape (B, H, W, 3)
    feats = base_model.predict(imgs, verbose=0)
    for (id_, _), emb in zip(batch, feats):
        np.save(os.path.join(OUT_EMB_DIR, f"{id_}.npy"), emb)

print(f"✅ Done! Saved embeddings for {len(id_paths)} TIFF images in {OUT_EMB_DIR}")

100%|██████████| 80/80 [11:28<00:00,  8.61s/it]

✅ Done! Saved embeddings for 2538 TIFF images in /content/drive/MyDrive/14648881b93c11f0/dataset/Train/embeddings_tf





In [None]:
import os
import numpy as np
import pandas as pd
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib


In [None]:
CSV_PATH = path+"sliders.csv"
EMB_DIR = path+"embeddings_tf"   # must match extract_embeddings_tf.py output
MODEL_DIR = path+"models_tf"
os.makedirs(MODEL_DIR, exist_ok=True)


In [None]:
df = pd.read_csv(CSV_PATH)
df['id'] = df['id_global'].astype(str)

In [None]:
emb_files = glob(os.path.join(EMB_DIR, "*.npy"))
emb_map = {os.path.splitext(os.path.basename(p))[0]: np.load(p) for p in emb_files}


In [None]:
if len(emb_map) == 0:
    raise ValueError("No embeddings found in EMB_DIR")
emb_dim = next(iter(emb_map.values())).shape[0]

In [None]:
def get_emb_for_id(id_):
    return emb_map.get(id_, np.zeros(emb_dim, dtype=np.float32))  # or np.nan and drop rows

embs = np.stack([get_emb_for_id(i) for i in df['id'].astype(str).tolist()], axis=0)  # shape (N, emb_dim)


In [None]:
PCA_DIM=256
pca = PCA(n_components=PCA_DIM, random_state=42)
embs = pca.fit_transform(embs)
joblib.dump(pca, os.path.join(MODEL_DIR, "pca_emb.pkl"))
emb_dim = PCA_DIM
embs=embs[:,:10]

In [None]:
emb_cols = [f"img_emb_{i}" for i in range(embs.shape[1])]

emb_df = pd.DataFrame(embs, columns=emb_cols)
# Merge with original df (keep tabular features)
df_embed = pd.concat([df.reset_index(drop=True), emb_df.reset_index(drop=True)], axis=1)

# Drop rows missing targets (adjust column names)
df_embed = df_embed.dropna(subset=['Temperature', 'Tint'])

In [None]:
exclude = set(['id_global', 'Temperature', 'Tint','copyCreationTime','captureTime'])
tabular_cols = [c for c in df.columns if c not in exclude]
# you can adjust tabular_cols if there are meta columns to drop
feature_cols = emb_cols + tabular_cols

X = df_embed[feature_cols].fillna(0).values
y = df_embed[['Temperature', 'Tint']].values


In [None]:
X_val

array([[-0.63591257, -1.70000911, -1.63241631, ...,  0.03133029,
        -1.24777576, -0.39728228],
       [-0.52502156, -1.15592084, -0.49384917, ..., -0.6985239 ,
        -0.66654511,  0.55333864],
       [-0.47471726,  2.05060952, -1.80470961, ..., -0.6985239 ,
        -0.69247362,  1.0286491 ],
       ...,
       [-0.44939442, -1.9687015 ,  0.03734817, ..., -0.34304236,
        -0.06910915,  0.1968558 ],
       [ 3.16995708,  1.04270642, -1.27254097, ...,  1.13555714,
        -0.50989373,  0.1968558 ],
       [ 0.72154376,  1.66808851,  1.43546827, ..., -0.34304236,
         1.21111077,  1.50395956]])

In [None]:

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale (important for TF MLP)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
joblib.dump(scaler, os.path.join(MODEL_DIR, "scaler.pkl"))

['/content/drive/MyDrive/14648881b93c11f0/dataset/Train/models_tf/scaler.pkl']

In [None]:
import xgboost as xgb
params = {
        "objective": "reg:squarederror",
        "tree_method": "hist",
        "learning_rate": 0.05,
        "max_depth": 6,
        "n_estimators": 1000,
        "random_state": 42,
    }

model_temp = xgb.XGBRegressor(**params)
model_temp.fit(X_train, y_train[:,0], eval_set=[(X_val, y_val[:,0])], verbose=50)

model_tint = xgb.XGBRegressor(**params)
model_tint.fit(X_train, y_train[:,1], eval_set=[(X_val, y_val[:,1])], verbose=50)

pred_temp = model_temp.predict(X_val)
pred_tint = model_tint.predict(X_val)
mae_temp = mean_absolute_error(y_val[:,0], pred_temp)
mae_tint = mean_absolute_error(y_val[:,1], pred_tint)
print(f"XGB -> Temp MAE: {mae_temp:.4f}")
print(f"XGB -> Tint MAE: {mae_tint:.4f}")

# Save models
joblib.dump(model_temp, os.path.join(MODEL_DIR, "xgb_temp.pkl"))
joblib.dump(model_tint, os.path.join(MODEL_DIR, "xgb_tint.pkl"))

[0]	validation_0-rmse:2241.58605
[50]	validation_0-rmse:2094.73858
[100]	validation_0-rmse:2086.10413
[150]	validation_0-rmse:2082.46704
[200]	validation_0-rmse:2081.91307
[250]	validation_0-rmse:2081.69218
[300]	validation_0-rmse:2079.47316
[350]	validation_0-rmse:2077.69919
[400]	validation_0-rmse:2076.49528
[450]	validation_0-rmse:2076.34868
[500]	validation_0-rmse:2076.12239
[550]	validation_0-rmse:2075.81497
[600]	validation_0-rmse:2075.63230
[650]	validation_0-rmse:2075.17839
[700]	validation_0-rmse:2074.91147
[750]	validation_0-rmse:2074.35369
[800]	validation_0-rmse:2074.16629
[850]	validation_0-rmse:2074.21882
[900]	validation_0-rmse:2074.03227
[950]	validation_0-rmse:2074.15116
[999]	validation_0-rmse:2074.25817
[0]	validation_0-rmse:9.88230
[50]	validation_0-rmse:7.34207
[100]	validation_0-rmse:7.15674
[150]	validation_0-rmse:7.12962
[200]	validation_0-rmse:7.12998
[250]	validation_0-rmse:7.12861
[300]	validation_0-rmse:7.11424
[350]	validation_0-rmse:7.10648
[400]	validatio

['/content/drive/MyDrive/14648881b93c11f0/dataset/Train/models_tf/xgb_tint.pkl']

In [None]:
path2='/content/drive/MyDrive/14648881b93c11f0/dataset/Validation/'
VAL_IMAGE_DIR=path2+'images'
VAL_DATA=path2+'sliders_input.csv'
EMBEDDING= path2+'embeddings_tf_val'
IMG_SIZE = 256
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
os.makedirs(EMBEDDING,exist_ok=True)

In [None]:
df1 = pd.read_csv(VAL_DATA)
ids = df1['id_global'].astype(str).tolist()

In [None]:
id_paths = []
for id_ in ids:
    tiff_path = os.path.join(VAL_IMAGE_DIR, f"{id_}.tiff")
    if not os.path.exists(tiff_path):
        tiff_path = os.path.join(VAL_IMAGE_DIR, f"{id_}.tif")  # in case of .tif extension
    if os.path.exists(tiff_path):
        id_paths.append((id_, tiff_path))
    else:
        print(f"[WARN] No TIFF image found for id {id_}, skipping.")

In [None]:
for i in tqdm(range(0, len(id_paths), BATCH_SIZE)):
    batch = id_paths[i:i + BATCH_SIZE]
    imgs = np.stack([load_tiff_image(p) for (_, p) in batch], axis=0)  # shape (B, H, W, 3)
    feats = base_model.predict(imgs, verbose=0)
    for (id_, _), emb in zip(batch, feats):
        np.save(os.path.join(EMBEDDING, f"{id_}.npy"), emb)

print(f"✅ Done! Saved embeddings for {len(id_paths)} TIFF images in {EMBEDDING}")

100%|██████████| 16/16 [03:52<00:00, 14.54s/it]

✅ Done! Saved embeddings for 493 TIFF images in /content/drive/MyDrive/14648881b93c11f0/dataset/Validation/embeddings_tf_val





In [None]:

emb_files = glob(os.path.join(EMBEDDING, "*.npy"))
emb_map = {os.path.splitext(os.path.basename(p))[0]: np.load(p) for p in emb_files}

In [None]:
if len(emb_map) == 0:
    raise ValueError("No embeddings found in EMB_DIR")
emb_dim = next(iter(emb_map.values())).shape[0]

In [None]:
def get_emb_for_id(id_):
    return emb_map.get(id_, np.zeros(emb_dim, dtype=np.float32))  # or np.nan and drop rows

embs = np.stack([get_emb_for_id(i) for i in df1['id_global'].astype(str).tolist()], axis=0)  # shape (N, emb_dim)


In [None]:
PCA_DIM=256
pca = PCA(n_components=PCA_DIM, random_state=42)
embs = pca.fit_transform(embs)
joblib.dump(pca, os.path.join(MODEL_DIR, "pca_emb.pkl"))
emb_dim = PCA_DIM
embs=embs[:,:10]

In [None]:
df1.head(2)

Unnamed: 0,copyCreationTime,captureTime,id_global,grayscale,hasDevelopAdjustmentsEx,aperture,flashFired,focalLength,isoSpeedRating,shutterSpeed,currTemp,currTint
0,-63113817600,2025-09-27T16:14:53,EB5BEE31-8D4F-450A-8BDD-27C762C75AA6,0,1,4.0,0,21.2,800,4.906891,6613,14
1,-63113817600,2025-09-27T16:14:57,DE666E1F-0433-4958-AEC0-9A0CC0F81036,0,1,4.0,0,16.0,800,4.906891,6613,14


In [None]:
emb_cols = [f"img_emb_{i}" for i in range(embs.shape[1])]

emb_df = pd.DataFrame(embs, columns=emb_cols)
# Merge with original df (keep tabular features)
df_embed = pd.concat([df1.reset_index(drop=True), emb_df.reset_index(drop=True)], axis=1)



In [None]:
exclude = set(['id_global', 'Temperature', 'Tint','copyCreationTime','captureTime'])
tabular_cols = [c for c in df1.columns if c not in exclude]
# you can adjust tabular_cols if there are meta columns to drop
feature_cols = emb_cols + tabular_cols

X = df_embed[feature_cols].fillna(0).values



# Scale (important for TF MLP)
scaler = StandardScaler()
X_val = scaler.fit_transform(X)
X_val = scaler.transform(X_val)

In [None]:
X_val.shape

(493, 19)

In [None]:
pred_temp = model_temp.predict(X_val)
pred_tint = model_tint.predict(X_val)

Unnamed: 0,copyCreationTime,captureTime,id_global,grayscale,hasDevelopAdjustmentsEx,aperture,flashFired,focalLength,isoSpeedRating,shutterSpeed,currTemp,currTint
0,-63113817600,2025-09-27T16:14:53,EB5BEE31-8D4F-450A-8BDD-27C762C75AA6,0,1,4.0,0,21.2,800,4.906891,6613,14


In [None]:
a=np.array(df1['id_global'])
pred_temp=(np.round(pred_temp))
pred_tint=(np.round(pred_tint))

In [None]:
data = {'id_global': a, 'Temperature': pred_temp.astype(int), 'Tint': pred_tint.astype(int)}

In [None]:
data

{'id_global': array(['EB5BEE31-8D4F-450A-8BDD-27C762C75AA6',
        'DE666E1F-0433-4958-AEC0-9A0CC0F81036',
        'F6A6EA9C-A5C2-4BBA-9812-5CE52B818CB6',
        'BCC39DEF-598C-491A-A3CA-14A249717F36',
        '390ED94E-0066-4822-99B9-8F1568BDFBF5',
        '4577FF1A-9D78-403E-939E-76A3D9893757',
        '6523B2F7-4E3B-41A1-B35D-D550857AC1C5',
        '4AA9F823-799F-4B28-AF63-2C582D8C6806',
        '8B6439B2-38EE-458C-9FF6-92712B83E524',
        '95B5422D-7FCB-4676-AAA3-BFA1C675D888',
        '2B1B20FF-A8FC-437E-9339-87EC0516B2E6',
        'B54CF8B5-A7AD-4AEA-AD55-EAAEEC0C9EE8',
        '12B4F594-8845-46C1-AADD-552E7C9228EF',
        '86307902-19DE-4634-97B9-585CE3DF8FB2',
        'C6237A6E-15BD-455E-9FFC-2FD5AE030403',
        '6C8B2D5C-8F68-4D74-A239-4DABC65EAD11',
        'D85BBA71-1B41-45EB-8529-ED8844D0C0DE',
        '533A3C08-82E1-472D-9BA8-4DB407155EEE',
        'ACCA07C0-86BB-4265-95D6-D29D6A7E7B8D',
        '17B5609F-1326-4495-A6F3-BB2E1A70160B',
        'F72A0A20-88C6-4CBA

In [None]:
data=pd.DataFrame(data)

In [None]:
data

Unnamed: 0,id_global,Temperature,Tint
0,EB5BEE31-8D4F-450A-8BDD-27C762C75AA6,4377,-2
1,DE666E1F-0433-4958-AEC0-9A0CC0F81036,4207,-3
2,F6A6EA9C-A5C2-4BBA-9812-5CE52B818CB6,4236,-2
3,BCC39DEF-598C-491A-A3CA-14A249717F36,4294,-3
4,390ED94E-0066-4822-99B9-8F1568BDFBF5,4127,-4
...,...,...,...
488,DCB1B4A6-1ECC-4C78-AAC7-9B414C0E363A,4379,-4
489,38C6BE37-8BC4-4CB2-ADBA-EE1169CA7C44,4437,-2
490,2BE3C9DA-B26A-42E0-98A6-D5FFB471FAA6,4438,-2
491,51EC18AF-040F-4CE9-A565-EABFA6C88240,4337,-3


In [None]:
data.to_csv('aftershoot_final_result.csv',index=False)