<a href="https://colab.research.google.com/github/JonFeli/supervised-soup-project/blob/main/notebooks/colab_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Setup Code
# @markdown This cell is to set up paths and dependencies and clone our repo. 
# This cell can be copy and pasted to the start of every new colab notebook. 
# Note on the latest changes: I have added a ColabCache folder on our shared drive, to save the dependency files. 
# This way the install of the requirements should be much faster, even across sessions. 

# mounting google drive to access the training data
from google.colab import drive
drive.mount('/content/drive')

# path for shared dataset
import os
os.environ["DATA_PATH"] = '/content/drive/MyDrive/SupervisedSoupData/ImageNetSubset'
DATA_PATH = os.getenv("DATA_PATH")

# verify path
if os.path.exists(DATA_PATH):
    print("Dataset found at:", DATA_PATH)
    print("Contents:", os.listdir(DATA_PATH))
else:
    print("Dataset path not found. Please check if you have setup your Drive shortcut properly (see guide on confluence: https://stud-team-rn9zsvdn.atlassian.net/wiki/pages/resumedraft.action?draftId=6586396&draftShareId=6aea0c7c-2591-45b1-a0f8-f3db9e25e222).")


# integrating github by cloning our repo
!git clone https://github.com/NeuralSpiral/supervised-soup-project.git
%cd /content/supervised-soup-project

# to install the dependencies
# !pip install -r requirements.txt
CACHE_PATH = "/content/drive/MyDrive/SupervisedSoupData/ColabCache/pip"

!pip install --no-index --find-links={CACHE_PATH} -r requirements.txt
!pip install -e .

# run short import test
!python tests/setup_test.py

# add path to our src folder
# import sys
# sys.path.append('/content/supervised-soup-project/supervised_soup')

# now we can import the code from our main folder if we need it for the notebook (e.g. dataloader, model), e.g.:
# from supervised_soup import dataloader



In [None]:
# check device and CUDA
import torch
print("CUDA available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")


In [None]:
# @title Test Shared Drive Access
import os
from PIL import Image
import matplotlib.pyplot as plt

DATA_PATH = os.getenv("DATA_PATH", "/content/drive/MyDrive/SupervisedSoupData/ImageNetSubset")

print(f"Checking dataset access at: {DATA_PATH}")

# check if folder exists
if not os.path.exists(DATA_PATH):
    print(" Data folder not found. Did you set up the drive shortcut?")
else:
    print("Data folder found.")
    train_dir = os.path.join(DATA_PATH, "train")

    if not os.path.exists(train_dir):
        print("'train' folder not found.")


In [None]:
# @title Pull git updates
# to pull updates run this code:
!git pull origin main


From https://github.com/JonFeli/supervised-soup-project
 * branch            main       -> FETCH_HEAD
Already up to date.


In [None]:
# wandb login
import wandb
wandb.login()


# Saving Changes
If you want to save changes made in a notebook on Colab you can either:
- go to: File->Save a copy in Github
- or run the cell below

In [None]:
# @title Push changes to github
# to push changes to github run this code:
!git add .
!git commit -m "Your message"
!git push origin branch_name

In [None]:
# @title How to cache dependencies on drive for faster install time


%cd /content/supervised-soup-project

# download and cache all wheel files
!pip download -r requirements.txt -d "/content/drive/MyDrive/SupervisedSoupData/ColabCache/pip"


In [None]:
# @title How to update cache if dependencies change

!pip download -r requirements.txt -d "/content/drive/MyDrive/SupervisedSoupData/ColabCache/pip" --upgrade


In [None]:
!python --version