In [1]:
# ==========================================
# 1. SETUP & MOUNT DRIVE
# ==========================================
import os
from google.colab import drive

# Install ML Libraries
print("üì¶ Installing Dependencies...")
!pip install -q sentence-transformers scikit-learn joblib matplotlib seaborn

# Mount Google Drive
print("üîó Mounting Google Drive...")
drive.mount('/content/drive')

# Create Project Folder
PROJECT_PATH = "/content/drive/MyDrive/Projects/Image-Editing-by-Natural-Language-Constraints"
if not os.path.exists(PROJECT_PATH):
    os.makedirs(PROJECT_PATH)
    print(f"üìÇ Created project folder: {PROJECT_PATH}")
else:
    print(f"üìÇ Found project folder: {PROJECT_PATH}")

üì¶ Installing Dependencies...
üîó Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üìÇ Found project folder: /content/drive/MyDrive/Projects/Image-Editing-by-Natural-Language-Constraints


In [2]:
# ==========================================
# 2. GENERATE SYNTHETIC EXPERT DATA (FINAL TUNED VERSION)
# ==========================================
import random
import pandas as pd

# Define Logic Rules
# Params: [Strength, Guide, CNet_Scale, Use_CNet(1/0), Dilate, Blur, Mask_Strat_ID]
# Mask_Strat_ID: 0=Standard, 1=Box, 2=Inverse

templates = [
    # 1. REMOVAL -> High Dilation (20px), Standard Mask
    # Logic: We need to cover the object + edges to let AI fill it empty
    {"text": "remove the {obj}", "type": "obj", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},
    {"text": "delete {obj}", "type": "obj", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},
    {"text": "clean background", "type": "bg", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},
    {"text": "empty space", "type": "bg", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},

    # 2. DESTRUCTION/REPLACEMENT -> Box Mask, No ControlNet
    # Logic: Box mask destroys the shape completely (good for "obliterate")
    {"text": "bitten {obj}", "type": "obj", "params": [1.0, 8.0, 0.0, 0, 0, 0, 1]},
    {"text": "broken {obj}", "type": "obj", "params": [1.0, 8.0, 0.0, 0, 0, 0, 1]},
    {"text": "obliterate {obj}", "type": "obj", "params": [1.0, 9.0, 0.0, 0, 0, 0, 1]},
    {"text": "crushed {obj}", "type": "obj", "params": [1.0, 9.0, 0.0, 0, 0, 0, 1]},

    # 3. BACKGROUND CHANGE -> Inverse Mask
    # Logic: Select object, INVERT it to get background. High strength to replace scene.
    {"text": "{obj} on the moon", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "{obj} standing on mars", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "{obj} in a snowy forest", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "change background to city", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "background of a beach", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},

    # 4. SPECIES SWAP -> Lower ControlNet (Was 0.15, Now 0.10)
    # FIX: "Weird Dog" - Lowering CNet gives SDXL more freedom to reshape the animal.
    {"text": "turn {obj} into a dog", "type": "obj", "params": [0.95, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "make {obj} a cat", "type": "obj", "params": [0.95, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "transform {obj} into a lion", "type": "obj", "params": [0.95, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "swap {obj} with a bear", "type": "obj", "params": [0.95, 7.5, 0.05, 1, 0, 5, 0]}, # Explicit swap

    # 5. TEXTURE/COLOR -> Very Low ControlNet (Was 0.35, Now 0.10)
    # FIX: "Stubborn Color" - 0.35 was still too high for pure color shifts on smooth objects.
    # We lower it to 0.10 to allow the paint to change while keeping the vague shape.
    {"text": "blue {obj}", "type": "obj", "params": [0.92, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "metallic {obj}", "type": "obj", "params": [0.92, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "shiny gold {obj}", "type": "obj", "params": [0.92, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "{obj} made of crystal", "type": "obj", "params": [0.92, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "red {obj}", "type": "obj", "params": [0.92, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "change {obj} color to green", "type": "obj", "params": [0.92, 7.5, 0.10, 1, 0, 5, 0]},
]

objects = ["cat", "dog", "car", "apple", "man", "woman", "house", "chair"]

print("üé≤ Generating 15,000 examples (Final Tuned Version)...")
X_raw = []
y_raw = []

for _ in range(15000):
    t = random.choice(templates)
    obj = random.choice(objects)

    # 1. Fill the template (e.g., "Man on the moon")
    if "{obj}" in t["text"]:
        prompt_text = t['text'].replace('{obj}', obj)
    else:
        prompt_text = t['text']

    # 2. Determine Target for Brain
    # If type is 'bg', we force the brain to see 'target: background'
    if t["type"] == "bg":
        target_token = "background"
    else:
        target_token = obj

    input_str = f"{prompt_text} | target: {target_token}"

    X_raw.append(input_str)
    y_raw.append(t["params"])

# Preview Data
df_preview = pd.DataFrame(y_raw, columns=["Str", "Guide", "CNet", "UseCNet", "Dilate", "Blur", "MaskID"])
df_preview["Input Text"] = X_raw
print(f"‚úÖ Generated {len(df_preview)} rows.")
print("\nüîé Data Sample:")
display(df_preview.sample(5))

üé≤ Generating 15,000 examples (Final Tuned Version)...
‚úÖ Generated 15000 rows.

üîé Data Sample:


Unnamed: 0,Str,Guide,CNet,UseCNet,Dilate,Blur,MaskID,Input Text
13688,1.0,8.0,0.0,0,0,0,1,broken man | target: man
7965,1.0,7.5,0.0,0,20,5,0,clean background | target: background
13869,1.0,9.0,0.0,0,2,5,2,car on the moon | target: background
4081,0.95,7.5,0.1,1,0,5,0,transform cat into a lion | target: cat
9251,1.0,9.0,0.0,0,0,0,1,crushed cat | target: cat


In [3]:
# ==========================================
# 3. VECTORIZE TEXT (THE "EYES")
# ==========================================
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm

print("‚è≥ Loading Embedding Model...")
embedder = SentenceTransformer('all-MiniLM-L6-v2')

print("üî¢ Converting text to vectors...")
# Batch encode to show progress bar
batch_size = 500
X_vectors = []

for i in tqdm(range(0, len(X_raw), batch_size)):
    batch_texts = X_raw[i : i + batch_size]
    batch_vecs = embedder.encode(batch_texts)
    X_vectors.append(batch_vecs)

X_vectors = np.vstack(X_vectors)
y_all = np.array(y_raw)

print(f"\n‚úÖ Vectorization Complete. Shape: {X_vectors.shape}")



‚è≥ Loading Embedding Model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


üî¢ Converting text to vectors...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [01:16<00:00,  2.54s/it]


‚úÖ Vectorization Complete. Shape: (15000, 384)





In [4]:
# ==========================================
# 4. TRAIN & VALIDATE MODELS
# ==========================================
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score

# --- SPLITTING TARGETS ---
# We have 7 columns in y_all. We must split them for the two brains.

# Brain 1 (Regressor): Predicts continuous numbers
# Columns: 0(Str), 1(Guide), 2(CNet), 4(Dilate), 5(Blur)
y_cont = y_all[:, [0, 1, 2, 4, 5]]

# Brain 2 (Classifier): Predicts discrete choices
# Columns: 3(UseCNet), 6(MaskID)
y_disc = y_all[:, [3, 6]]

# Split Data (80% Train, 20% Test)
X_train, X_test, y_cont_train, y_cont_test, y_disc_train, y_disc_test = train_test_split(
    X_vectors, y_cont, y_disc, test_size=0.2, random_state=42
)

print("üèÉ Training Random Forest Models...")
regressor = RandomForestRegressor(n_estimators=50, n_jobs=-1)
classifier = RandomForestClassifier(n_estimators=50, n_jobs=-1)

regressor.fit(X_train, y_cont_train)
classifier.fit(X_train, y_disc_train)

# --- VALIDATION ---
print("\nüìä Validation Results:")

# 1. Continuous Metrics
cont_preds = regressor.predict(X_test)
cont_score = r2_score(y_cont_test, cont_preds)
print(f"   üìà Continuous Params (Str, CNet, etc) R¬≤:  {cont_score:.4f} (Target: > 0.95)")

# 2. Discrete Metrics
disc_preds = classifier.predict(X_test)

# Check 'Use ControlNet' (Column 0 of disc_preds)
acc_cnet = accuracy_score(y_disc_test[:, 0], disc_preds[:, 0])
print(f"   üéØ Use ControlNet (T/F) Accuracy:         {acc_cnet:.4f} (Target: > 0.99)")

# Check 'Mask Strategy' (Column 1 of disc_preds)
acc_mask = accuracy_score(y_disc_test[:, 1], disc_preds[:, 1])
print(f"   üé≠ Mask Strategy (Std/Box/Inv) Accuracy:  {acc_mask:.4f} (Target: > 0.99)")

üèÉ Training Random Forest Models...

üìä Validation Results:
   üìà Continuous Params (Str, CNet, etc) R¬≤:  1.0000 (Target: > 0.95)
   üéØ Use ControlNet (T/F) Accuracy:         1.0000 (Target: > 0.99)
   üé≠ Mask Strategy (Std/Box/Inv) Accuracy:  1.0000 (Target: > 0.99)


In [7]:
# ==========================================
# 5. SANITY CHECK (TEST THE BRAIN)
# ==========================================
test_prompts = [
    # 1. REMOVAL (Expect: Standard Mask, High Dilation > 15px, CNet ~0.0)
    ("obliterate the apple", "apple"),      # Should trigger Box mask logic if "obliterate" is destruction
    ("remove the chair", "chair"),
    ("delete the right cat", "cat"),

    # 2. BACKGROUND (Expect: Inverse Mask, Str=1.0, CNet ~0.0)
    ("man standing on mars", "background"),
    ("change background to a neon city", "background"),
    ("cat in a snowy forest", "background"),

    # 3. TEXTURE/COLOR (Expect: Standard Mask, Low CNet ~0.10)
    # CRITICAL CHECK: Ensure CNet is NOT high (0.8) anymore!
    ("make the apple red", "apple"),
    ("shiny gold car", "car"),
    ("turn the cat blue", "cat"),

    # 4. SPECIES SWAP (Expect: Standard Mask, Low CNet ~0.10)
    ("turn the dog into a cat", "dog"),
    ("swap the apple with an orange", "apple"),
    ("transform the man into a bear", "man"),

    # 5. DESTRUCTION (Expect: Box Mask, Str=1.0)
    ("crushed car", "car"),
    ("bitten apple", "apple")
]

print("ü§ñ TESTING NEW BRAIN PREDICTIONS:\n")

for p, t in test_prompts:
    # 1. Encode
    vec = embedder.encode([f"{p} | target: {t}"])

    # 2. Predict
    cont = regressor.predict(vec)[0]
    disc = classifier.predict(vec)[0]

    # 3. Decode
    strat_name = ["Standard", "Box", "Inverse"][int(disc[1])]

    print(f"Prompt: '{p}'")
    print(f"Target: '{t}'")
    print(f"   -> Mask:       {strat_name}")
    print(f"   -> Strength:   {cont[0]:.2f}")
    print(f"   -> ControlNet: {cont[2]:.2f} (Goal: Low for Swaps/Colors)")
    print(f"   -> Dilation:   {int(cont[3])}px")
    print("---")

ü§ñ TESTING NEW BRAIN PREDICTIONS:

Prompt: 'obliterate the apple'
Target: 'apple'
   -> Mask:       Box
   -> Strength:   0.97
   -> ControlNet: 0.03 (Goal: Low for Swaps/Colors)
   -> Dilation:   0px
---
Prompt: 'remove the chair'
Target: 'chair'
   -> Mask:       Standard
   -> Strength:   1.00
   -> ControlNet: 0.00 (Goal: Low for Swaps/Colors)
   -> Dilation:   20px
---
Prompt: 'delete the right cat'
Target: 'cat'
   -> Mask:       Standard
   -> Strength:   1.00
   -> ControlNet: 0.00 (Goal: Low for Swaps/Colors)
   -> Dilation:   20px
---
Prompt: 'man standing on mars'
Target: 'background'
   -> Mask:       Inverse
   -> Strength:   1.00
   -> ControlNet: 0.00 (Goal: Low for Swaps/Colors)
   -> Dilation:   2px
---
Prompt: 'change background to a neon city'
Target: 'background'
   -> Mask:       Inverse
   -> Strength:   0.99
   -> ControlNet: 0.01 (Goal: Low for Swaps/Colors)
   -> Dilation:   1px
---
Prompt: 'cat in a snowy forest'
Target: 'background'
   -> Mask:       Invers

In [8]:
# ==========================================
# 6. SAVE TO GOOGLE DRIVE
# ==========================================
import pickle
import shutil

# Local Save
print("üíæ Saving locally...")
with open("brain_regressor_hybrid.pkl", "wb") as f: pickle.dump(regressor, f)
with open("brain_classifier_hybrid.pkl", "wb") as f: pickle.dump(classifier, f)

# Drive Save
print(f"üöö Copying to {PROJECT_PATH}...")
shutil.copy("brain_regressor_hybrid.pkl", f"{PROJECT_PATH}/brain_regressor_hybrid.pkl")
shutil.copy("brain_classifier_hybrid.pkl", f"{PROJECT_PATH}/brain_classifier_hybrid.pkl")

# Verify
if os.path.exists(f"{PROJECT_PATH}/brain_regressor_hybrid.pkl"):
    print("\n‚úÖ SUCCESS! Brain files are safe in Google Drive.")
else:
    print("\n‚ùå ERROR: Copy failed.")

üíæ Saving locally...
üöö Copying to /content/drive/MyDrive/Projects/Image-Editing-by-Natural-Language-Constraints...

‚úÖ SUCCESS! Brain files are safe in Google Drive.
