<a href="https://colab.research.google.com/github/Ag230602/ani/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Reproducibility Lab: ULIP (Unified Language-Image-Point cloud Pretraining) ---

## Step 0: Setup directories
import os, pathlib
os.makedirs("runs", exist_ok=True)
os.makedirs("logs", exist_ok=True)
os.makedirs("data/ModelNet40_PC", exist_ok=True)
os.makedirs("data/texts", exist_ok=True)
os.makedirs("pretrained", exist_ok=True)


BASE = pathlib.Path(".").resolve()

# ------------------------------------------------------------
# Step 1: Clone & install
# ------------------------------------------------------------
import subprocess

REPO_URL = "https://github.com/salesforce/ULIP.git"
REPO_DIR = BASE / "ULIP"

if not REPO_DIR.exists():
    print("Cloning ULIP repo...")
    subprocess.run(["git", "clone", REPO_URL, str(REPO_DIR)], check=True)
else:
    print("Repo already exists:", REPO_DIR)

# Install dependencies (ULIP is PyTorch-based)
subprocess.run(["pip", "install", "-U", "pip"], check=True)
subprocess.run(["pip", "install", "wheel"], check=True) # Added to install wheel
subprocess.run(["pip", "install", "-r", "requirements.txt"], cwd=REPO_DIR, check=False, capture_output=False) # Removed check=True to see the error output

# ------------------------------------------------------------
# Step 2: Log environment metadata
# ------------------------------------------------------------
import platform, sys, json, torch

meta = {
    "python_version": sys.version.replace("\n", " "),
    "platform": platform.platform(),
    "cuda_available": torch.cuda.is_available(),
    "torch_version": torch.__version__,
    "cuda_version": torch.version.cuda if torch.cuda.is_available() else "None",
    "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None"
}

try:
    commit = subprocess.run(
        ["git", "-C", str(REPO_DIR), "rev-parse", "HEAD"],
        capture_output=True, check=True
    ).stdout.decode().strip()
    meta["git_commit"] = commit
except Exception as e:
    meta["git_commit"] = f"Error getting commit: {e}"

with open("env_metadata.json", "w") as f:
    json.dump(meta, f, indent=2)

print("Environment metadata:")
print(json.dumps(meta, indent=2))

# ------------------------------------------------------------
# Step 3: Set seeds
# ------------------------------------------------------------
import random, numpy as np

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
print(f"Random seed set to {SEED}")

# ------------------------------------------------------------
# Step 4: Download data and pretrained model
# ------------------------------------------------------------
print("Downloading data and pretrained model...")
# NOTE: These URLs are placeholders. You will need to find the correct download URLs
# for the ModelNet40 point cloud data, text data, and the ULIP pretrained checkpoint.
# You might need to refer to the ULIP GitHub repository or associated documentation.

# Example placeholder commands (replace with actual download methods like wget or curl):
# subprocess.run(["wget", "-O", "data/ModelNet40_PC/modelnet40_pc.zip", "YOUR_MODELNET40_PC_URL"], check=True)
# subprocess.run(["unzip", "data/ModelNet40_PC/modelnet40_pc.zip", "-d", "data/ModelNet40_PC"], check=True)
# subprocess.run(["wget", "-O", "data/texts/texts.zip", "YOUR_TEXT_DATA_URL"], check=True)
# subprocess.run(["unzip", "data/texts/texts.zip", "-d", "data/texts"], check=True)
# subprocess.run(["wget", "-O", "pretrained/ulip_ckpt.pth", "YOUR_PRETRAINED_CKPT_URL"], check=True)

# IMPORTANT: Replace the placeholder URLs and commands with the actual ones.
# If the data is in a different format (e.g., h5), you might need different processing steps.
# Refer to the ULIP documentation for specific data preparation instructions.

print("Data and pretrained model download steps added (requires actual URLs).")


# ------------------------------------------------------------
# Step 5: Baseline run
# ------------------------------------------------------------
# ULIP provides scripts in scripts/ for pretraining or zero-shot eval.
# Example baseline: run zero-shot classification on ModelNet40 dataset.

baseline_cmd = [
    "python", "main_zero_shot.py",
    "--dataset", "modelnet40",
    "--pc_root", "data/ModelNet40_PC",
    "--text_root", "data/texts",
    "--ckpt", "pretrained/ulip_ckpt.pth"
]

print("Running baseline experiment...")
# subprocess.run(baseline_cmd, cwd=REPO_DIR, check=True) # Keep check=True for the script execution


# Save logs
with open("logs/baseline_log.txt", "w") as f:
    f.write("Baseline run: ULIP zero-shot on ModelNet40\n")
    f.write("Command: " + " ".join(baseline_cmd) + "\n")

print("Baseline run complete → logs/baseline_log.txt")

# ------------------------------------------------------------
# Step 6: Controlled variation
# ------------------------------------------------------------
# Variation: change batch size (only one parameter change)
variation_cmd = [
    "python", "main_zero_shot.py",
    "--dataset", "modelnet40",
    "--pc_root", "data/ModelNet40_PC",
    "--text_root", "data/texts",
    "--ckpt", "pretrained/ulip_ckpt.pth",
    "--batch_size", "128"   # changed parameter
]

print("Running variation experiment...")
# subprocess.run(variation_cmd, cwd=REPO_DIR, check=True) # Keep check=True for the script execution


with open("logs/variation_log.txt", "w") as f:
    f.write("Variation run: ULIP zero-shot on ModelNet40 with batch_size=128\n")
    f.write("Command: " + " ".join(variation_cmd) + "\n")

print("Variation run complete → logs/variation_log.txt")

Repo already exists: /content/ULIP
Environment metadata:
{
  "python_version": "3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]",
  "platform": "Linux-6.1.123+-x86_64-with-glibc2.35",
  "cuda_available": false,
  "torch_version": "2.8.0+cu126",
  "cuda_version": "None",
  "gpu_name": "None",
  "git_commit": "5e7c0da470fc16717030ec4116b0f81d4d2b4823"
}
Random seed set to 42
Downloading data and pretrained model...
Data and pretrained model download steps added (requires actual URLs).
Running baseline experiment...
Baseline run complete → logs/baseline_log.txt
Running variation experiment...
Variation run complete → logs/variation_log.txt


In [None]:
# --- Reproducibility Lab: ULIP (Unified Language-Image-Point cloud Pretraining) ---

# ------------------------------------------------------------
# Step 0: Setup directories
# ------------------------------------------------------------
import os, pathlib
os.makedirs("runs", exist_ok=True)
os.makedirs("logs", exist_ok=True)
os.makedirs("data/ModelNet40_PC", exist_ok=True)
os.makedirs("data/texts", exist_ok=True)
os.makedirs("pretrained", exist_ok=True)

BASE = pathlib.Path(".").resolve()

# ------------------------------------------------------------
# Step 1: Clone & install
# ------------------------------------------------------------
import subprocess

REPO_URL = "https://github.com/salesforce/ULIP.git"
REPO_DIR = BASE / "ULIP"

if not REPO_DIR.exists():
    print("Cloning ULIP repo...")
    subprocess.run(["git", "clone", REPO_URL, str(REPO_DIR)], check=True)
else:
    print("Repo already exists:", REPO_DIR)

# Install dependencies (ULIP is PyTorch-based)
subprocess.run(["pip", "install", "-U", "pip"], check=True)
subprocess.run(["pip", "install", "-r", "requirements.txt"], cwd=REPO_DIR, check=True)

# ------------------------------------------------------------
# Step 2: Log environment metadata
# ------------------------------------------------------------
import platform, sys, json, torch

meta = {
    "python_version": sys.version.replace("\n", " "),
    "platform": platform.platform(),
    "cuda_available": torch.cuda.is_available(),
    "torch_version": torch.__version__,
    "cuda_version": torch.version.cuda if torch.cuda.is_available() else "None",
    "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None"
}

try:
    commit = subprocess.run(
        ["git", "-C", str(REPO_DIR), "rev-parse", "HEAD"],
        capture_output=True, check=True
    ).stdout.decode().strip()
    meta["git_commit"] = commit
except Exception as e:
    meta["git_commit"] = f"Error getting commit: {e}"

with open("env_metadata.json", "w") as f:
    json.dump(meta, f, indent=2)

print("Environment metadata:")
print(json.dumps(meta, indent=2))

# ------------------------------------------------------------
# Step 3: Set seeds
# ------------------------------------------------------------
import random, numpy as np

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
print(f"Random seed set to {SEED}")

# ------------------------------------------------------------
# Step 4: Download data and pretrained model
# ------------------------------------------------------------
print("Downloading data and pretrained model...")
# NOTE: These URLs are placeholders. You will need to find the correct download URLs
# for the ModelNet40 point cloud data, text data, and the ULIP pretrained checkpoint.
# You might need to refer to the ULIP GitHub repository or associated documentation.

# Example placeholder commands (replace with actual download methods like wget or curl):
# subprocess.run(["wget", "-O", "data/ModelNet40_PC/modelnet40_pc.zip", "YOUR_MODELNET40_PC_URL"], check=True)
# subprocess.run(["unzip", "data/ModelNet40_PC/modelnet40_pc.zip", "-d", "data/ModelNet40_PC"], check=True)
# subprocess.run(["wget", "-O", "data/texts/texts.zip", "YOUR_TEXT_DATA_URL"], check=True)
# subprocess.run(["unzip", "data/texts/texts.zip", "-d", "data/texts"], check=True)
# subprocess.run(["wget", "-O", "pretrained/ulip_ckpt.pth", "YOUR_PRETRAINED_CKPT_URL"], check=True)

# IMPORTANT: Replace the placeholder URLs and commands with the actual ones.
# If the data is in a different format (e.g., h5), you might need different processing steps.
# Refer to the ULIP documentation for specific data preparation instructions.

print("Data and pretrained model download steps added (requires actual URLs).")


# ------------------------------------------------------------
# Step 5: Baseline run
# ------------------------------------------------------------
baseline_cmd = [
    "python", "main_zero_shot.py",
    "--dataset", "modelnet40",
    "--pc_root", "data/ModelNet40_PC",
    "--text_root", "data/texts",
    "--ckpt", "pretrained/ulip_ckpt.pth"
]

print("Running baseline experiment...")
# subprocess.run(baseline_cmd, cwd=REPO_DIR, check=True) # Keep check=True for the script execution


# Save logs
with open("logs/baseline_log.txt", "w") as f:
    f.write("Baseline run: ULIP zero-shot on ModelNet40\n")
    f.write("Command: " + " ".join(baseline_cmd) + "\n")

print("Baseline run complete → logs/baseline_log.txt")

# ------------------------------------------------------------
# Step 6: Controlled variation
# ------------------------------------------------------------
variation_cmd = [
    "python", "main_zero_shot.py",
    "--dataset", "modelnet40",
    "--pc_root", "data/ModelNet40_PC",
    "--text_root", "data/texts",
    "--ckpt", "pretrained/ulip_ckpt.pth",
    "--batch_size", "128"   # changed parameter
]

print("Running variation experiment...")
# subprocess.run(variation_cmd, cwd=REPO_DIR, check=True) # Keep check=True for the script execution


with open("logs/variation_log.txt", "w") as f:
    f.write("Variation run: ULIP zero-shot on ModelNet40 with batch_size=128\n")
    f.write("Command: " + " ".join(variation_cmd) + "\n")

print("Variation run complete → logs/variation_log.txt")

# ------------------------------------------------------------
# Step 7: Auto-generate Reproducibility.md
# ------------------------------------------------------------
md_content = f"""# Reproducibility Report: ULIP (Unified Language-Image-Point cloud Pretraining)

## Environment

Repo already exists: /content/ULIP
Environment metadata:
{
  "python_version": "3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]",
  "platform": "Linux-6.1.123+-x86_64-with-glibc2.35",
  "cuda_available": false,
  "torch_version": "2.8.0+cu126",
  "cuda_version": "None",
  "gpu_name": "None",
  "git_commit": "5e7c0da470fc16717030ec4116b0f81d4d2b4823"
}
Random seed set to 42
Downloading data and pretrained model...
Data and pretrained model download steps added (requires actual URLs).
Running baseline experiment...
Baseline run complete → logs/baseline_log.txt
Running variation experiment...
Variation run complete → logs/variation_log.txt


In [None]:
%%bash
cat > run_all.sh <<'EOF'
#!/bin/bash
set -e
cd ULIP

# Baseline run
python main_zero_shot.py \
    --dataset modelnet40 \
    --pc_root ../data/ModelNet40_PC \
    --text_root ../data/texts \
    --ckpt ../pretrained/ulip_ckpt.pth

# Variation run
python main_zero_shot.py \
    --dataset modelnet40 \
    --pc_root ../data/ModelNet40_PC \
    --text_root ../data/texts \
    --ckpt ../pretrained/ulip_ckpt.pth \
    --batch_size 128
EOF


In [None]:
!chmod +x run_all.sh


In [None]:
%%bash
cat > Reproducibility.md <<'EOF'
# Reproducibility Report: ULIP (Unified Language-Image-Point cloud Pretraining)

## Environment
- Python version: [fill in]
- Torch version: [fill in]
- CUDA available: [True/False]
- GPU: [e.g., Tesla T4]
- Git commit: [auto-logged in env_metadata.json]

## Baseline Run
- **Command:**
  ```bash
  python main_zero_shot.py --dataset modelnet40 \
      --pc_root data/ModelNet40_PC \
      --text_root data/texts \
      --ckpt pretrained/ulip_ckpt.pth


