In [1]:
# ==========================================
# 1. SETUP & MOUNT DRIVE
# ==========================================
import os
from google.colab import drive

# Install ML Libraries
print("üì¶ Installing Dependencies...")
!pip install -q sentence-transformers scikit-learn joblib matplotlib seaborn

# Mount Google Drive
print("üîó Mounting Google Drive...")
drive.mount('/content/drive')

# Create Project Folder
PROJECT_PATH = "/content/drive/MyDrive/Projects/Image-Editing-by-Natural-Language-Constraints"
if not os.path.exists(PROJECT_PATH):
    os.makedirs(PROJECT_PATH)
    print(f"üìÇ Created project folder: {PROJECT_PATH}")
else:
    print(f"üìÇ Found project folder: {PROJECT_PATH}")

üì¶ Installing Dependencies...
üîó Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üìÇ Found project folder: /content/drive/MyDrive/Projects/Image-Editing-by-Natural-Language-Constraints


In [2]:
# ==========================================
# 2. GENERATE SYNTHETIC EXPERT DATA (TUNED FIX)
# ==========================================
import random
import pandas as pd

# Define Logic Rules
# Params: [Strength, Guide, CNet_Scale, Use_CNet(1/0), Dilate, Blur, Mask_Strat_ID]
# Mask_Strat_ID: 0=Standard, 1=Box, 2=Inverse

templates = [
    # 1. REMOVAL -> High Dilation (20px)
    {"text": "remove the {obj}", "type": "obj", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},
    {"text": "delete {obj}", "type": "obj", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},
    {"text": "clean background", "type": "bg", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},
    {"text": "empty space", "type": "bg", "params": [1.0, 7.5, 0.0, 0, 20, 5, 0]},

    # 2. DESTRUCTION -> Box Mask (For "obliterate", "crush")
    {"text": "bitten {obj}", "type": "obj", "params": [1.0, 8.0, 0.0, 0, 0, 0, 1]},
    {"text": "obliterate {obj}", "type": "obj", "params": [1.0, 9.0, 0.0, 0, 0, 0, 1]},
    {"text": "crushed {obj}", "type": "obj", "params": [1.0, 9.0, 0.0, 0, 0, 0, 1]},

    # 3. BACKGROUND -> Inverse Mask (CRITICAL UPDATES HERE)
    # Logic: We include "{obj}" so the brain learns that sentences starting with "Man..." or "Cat..."
    # can STILL be background tasks if the user intent is "location".
    {"text": "{obj} on the moon", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "{obj} standing on mars", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "{obj} in a snowy forest", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "change background to city", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},
    {"text": "background of a beach", "type": "bg", "params": [1.0, 9.0, 0.0, 0, 2, 5, 2]},

    # 4. SPECIES SWAP -> Lower ControlNet (Was 0.15, Now 0.10)
    # FIX: "Weird Dog" - Lowering CNet gives SDXL more freedom to reshape the animal.
    {"text": "turn {obj} into a dog", "type": "obj", "params": [0.95, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "make {obj} a cat", "type": "obj", "params": [0.95, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "transform {obj} into a lion", "type": "obj", "params": [0.95, 7.5, 0.10, 1, 0, 5, 0]},
    {"text": "swap {obj} with a bear", "type": "obj", "params": [0.95, 7.5, 0.05, 1, 0, 5, 0]}, # Explicit swap

    # 5. TEXTURE -> Moderate ControlNet (Was 0.8, Now 0.35)
    # FIX: "Stubborn Apple" - 0.8 locked the color. 0.35 keeps the shape but allows color change.
    {"text": "blue {obj}", "type": "obj", "params": [0.90, 7.5, 0.35, 1, 0, 5, 0]},
    {"text": "metallic {obj}", "type": "obj", "params": [0.90, 7.5, 0.35, 1, 0, 5, 0]},
    {"text": "shiny gold {obj}", "type": "obj", "params": [0.90, 7.5, 0.35, 1, 0, 5, 0]},
    {"text": "{obj} made of crystal", "type": "obj", "params": [0.90, 7.5, 0.35, 1, 0, 5, 0]},
]

objects = ["cat", "dog", "car", "apple", "man", "woman", "house", "chair"]

print("üé≤ Generating 15,000 examples (Tuned for Flexibility)...")
X_raw = []
y_raw = []

for _ in range(15000):
    t = random.choice(templates)
    obj = random.choice(objects)

    # 1. Fill the template (e.g., "Man on the moon")
    if "{obj}" in t["text"]:
        prompt_text = t['text'].replace('{obj}', obj)
    else:
        prompt_text = t['text']

    # 2. Determine Target for Brain
    # If type is 'bg', we force the brain to see 'target: background'
    if t["type"] == "bg":
        target_token = "background"
    else:
        target_token = obj

    input_str = f"{prompt_text} | target: {target_token}"

    X_raw.append(input_str)
    y_raw.append(t["params"])

# Preview Data
df_preview = pd.DataFrame(y_raw, columns=["Str", "Guide", "CNet", "UseCNet", "Dilate", "Blur", "MaskID"])
df_preview["Input Text"] = X_raw
print(f"‚úÖ Generated {len(df_preview)} rows.")
print("\nüîé Data Sample:")
display(df_preview.sample(5))

üé≤ Generating 15,000 examples (Tuned for Flexibility)...
‚úÖ Generated 15000 rows.

üîé Data Sample:


Unnamed: 0,Str,Guide,CNet,UseCNet,Dilate,Blur,MaskID,Input Text
13200,1.0,7.5,0.0,0,20,5,0,remove the man | target: man
6314,0.95,7.5,0.1,1,0,5,0,turn car into a dog | target: car
8707,0.9,7.5,0.35,1,0,5,0,metallic cat | target: cat
1426,0.95,7.5,0.05,1,0,5,0,swap car with a bear | target: car
2258,0.9,7.5,0.35,1,0,5,0,blue dog | target: dog


In [3]:
# ==========================================
# 3. VECTORIZE TEXT (THE "EYES")
# ==========================================
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm

print("‚è≥ Loading Embedding Model...")
embedder = SentenceTransformer('all-MiniLM-L6-v2')

print("üî¢ Converting text to vectors...")
# Batch encode to show progress bar
batch_size = 500
X_vectors = []

for i in tqdm(range(0, len(X_raw), batch_size)):
    batch_texts = X_raw[i : i + batch_size]
    batch_vecs = embedder.encode(batch_texts)
    X_vectors.append(batch_vecs)

X_vectors = np.vstack(X_vectors)
y_all = np.array(y_raw)

print(f"\n‚úÖ Vectorization Complete. Shape: {X_vectors.shape}")



‚è≥ Loading Embedding Model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


üî¢ Converting text to vectors...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [01:18<00:00,  2.60s/it]


‚úÖ Vectorization Complete. Shape: (15000, 384)





In [4]:
# ==========================================
# 4. TRAIN & VALIDATE MODELS
# ==========================================
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score

# --- SPLITTING TARGETS ---
# We have 7 columns in y_all. We must split them for the two brains.

# Brain 1 (Regressor): Predicts continuous numbers
# Columns: 0(Str), 1(Guide), 2(CNet), 4(Dilate), 5(Blur)
y_cont = y_all[:, [0, 1, 2, 4, 5]]

# Brain 2 (Classifier): Predicts discrete choices
# Columns: 3(UseCNet), 6(MaskID)
y_disc = y_all[:, [3, 6]]

# Split Data (80% Train, 20% Test)
X_train, X_test, y_cont_train, y_cont_test, y_disc_train, y_disc_test = train_test_split(
    X_vectors, y_cont, y_disc, test_size=0.2, random_state=42
)

print("üèÉ Training Random Forest Models...")
regressor = RandomForestRegressor(n_estimators=50, n_jobs=-1)
classifier = RandomForestClassifier(n_estimators=50, n_jobs=-1)

regressor.fit(X_train, y_cont_train)
classifier.fit(X_train, y_disc_train)

# --- VALIDATION ---
print("\nüìä Validation Results:")

# 1. Continuous Metrics
cont_preds = regressor.predict(X_test)
cont_score = r2_score(y_cont_test, cont_preds)
print(f"   üìà Continuous Params (Str, CNet, etc) R¬≤:  {cont_score:.4f} (Target: > 0.95)")

# 2. Discrete Metrics
disc_preds = classifier.predict(X_test)

# Check 'Use ControlNet' (Column 0 of disc_preds)
acc_cnet = accuracy_score(y_disc_test[:, 0], disc_preds[:, 0])
print(f"   üéØ Use ControlNet (T/F) Accuracy:         {acc_cnet:.4f} (Target: > 0.99)")

# Check 'Mask Strategy' (Column 1 of disc_preds)
acc_mask = accuracy_score(y_disc_test[:, 1], disc_preds[:, 1])
print(f"   üé≠ Mask Strategy (Std/Box/Inv) Accuracy:  {acc_mask:.4f} (Target: > 0.99)")

üèÉ Training Random Forest Models...

üìä Validation Results:
   üìà Continuous Params (Str, CNet, etc) R¬≤:  1.0000 (Target: > 0.95)
   üéØ Use ControlNet (T/F) Accuracy:         1.0000 (Target: > 0.99)
   üé≠ Mask Strategy (Std/Box/Inv) Accuracy:  1.0000 (Target: > 0.99)


In [5]:
# ==========================================
# 5. SANITY CHECK (TEST THE BRAIN)
# ==========================================
test_prompts = [
    ("obliterate the apple", "apple"),    # Expect: Box Mask, High Strength
    ("vanish the cat", "cat"),            # Expect: Standard Mask, High Dilation
    ("man standing on mars", "background"), # Expect: Inverse Mask
    ("make the car shiny", "car")         # Expect: Standard Mask, High CNet
]

print("ü§ñ TESTING NEW BRAIN:\n")

for p, t in test_prompts:
    # 1. Encode
    vec = embedder.encode([f"{p} | target: {t}"])

    # 2. Predict
    cont = regressor.predict(vec)[0]
    disc = classifier.predict(vec)[0]

    # 3. Decode
    strat_name = ["Standard", "Box", "Inverse"][int(disc[1])]

    print(f"Prompt: '{p}'")
    print(f"   -> Mask: {strat_name}")
    print(f"   -> Dilation: {int(cont[3])}px")
    print(f"   -> ControlNet: {cont[2]:.2f}")
    print("---")

ü§ñ TESTING NEW BRAIN:

Prompt: 'obliterate the apple'
   -> Mask: Box
   -> Dilation: 0px
   -> ControlNet: 0.03
---
Prompt: 'vanish the cat'
   -> Mask: Standard
   -> Dilation: 5px
   -> ControlNet: 0.08
---
Prompt: 'man standing on mars'
   -> Mask: Inverse
   -> Dilation: 2px
   -> ControlNet: 0.00
---
Prompt: 'make the car shiny'
   -> Mask: Standard
   -> Dilation: 0px
   -> ControlNet: 0.32
---


In [7]:
# ==========================================
# 6. SAVE TO GOOGLE DRIVE
# ==========================================
import pickle
import shutil

# Local Save
print("üíæ Saving locally...")
with open("brain_regressor_hybrid.pkl", "wb") as f: pickle.dump(regressor, f)
with open("brain_classifier_hybrid.pkl", "wb") as f: pickle.dump(classifier, f)

# Drive Save
print(f"üöö Copying to {PROJECT_PATH}...")
shutil.copy("brain_regressor_hybrid.pkl", f"{PROJECT_PATH}/brain_regressor_hybrid.pkl")
shutil.copy("brain_classifier_hybrid.pkl", f"{PROJECT_PATH}/brain_classifier_hybrid.pkl")

# Verify
if os.path.exists(f"{PROJECT_PATH}/brain_regressor_hybrid.pkl"):
    print("\n‚úÖ SUCCESS! Brain files are safe in Google Drive.")
else:
    print("\n‚ùå ERROR: Copy failed.")

üíæ Saving locally...
üöö Copying to /content/drive/MyDrive/Projects/Image-Editing-by-Natural-Language-Constraints...

‚úÖ SUCCESS! Brain files are safe in Google Drive.
