In [1]:
# ========================
# STEP 1: Setup Environment
# ========================
!apt-get update -qq
!apt-get install -y docker.io -qq
!service docker start

# Verify docker works
!docker --version


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Preconfiguring packages ...
Selecting previously unselected package netbase.
(Reading database ... 126371 files and directories currently installed.)
Preparing to unpack .../00-netbase_6.3_all.deb ...
Unpacking netbase (6.3) ...
Selecting previously unselected package netcat-openbsd.
Preparing to unpack .../01-netcat-openbsd_1.218-4ubuntu1_amd64.deb ...
Unpacking netcat-openbsd (1.218-4ubuntu1) ...
Selecting previously unselected package apparmor.
Preparing to unpack .../02-apparmor_3.0.4-2ubuntu2.4_amd64.deb ...
Unpacking apparmor (3.0.4-2ubuntu2.4) ...
Selecting previously unselected package libip6tc2:amd64.
Preparing to unpack .../03-libip6tc2_1.8.7-1ubuntu5.2_amd64.deb ...
Unpacking libip6tc2:amd64 (1.8.7-1ubuntu5.2) ...
Selecting previously unselected package libnfnetlink0:amd64.
Preparing to un

In [2]:
# ========================
# STEP 2: Naive Dockerfile (Bloated)
# ========================
%%writefile Dockerfile.bloated

# Using big base image
FROM ubuntu:22.04

# Install Python and dependencies
RUN apt-get update && apt-get install -y python3 python3-pip

# Copy app
WORKDIR /app
COPY app.py /app

# Install dependencies
RUN pip3 install flask numpy pandas

# Run app
CMD ["python3", "app.py"]


Writing Dockerfile.bloated


In [3]:
# Small demo app
%%writefile app.py
from flask import Flask
import numpy as np
import pandas as pd

app = Flask(__name__)

@app.route("/")
def home():
    return f"Hello, World! Numpy:{np.__version__}, Pandas:{pd.__version__}"

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)


Writing app.py


In [4]:
# Build the bloated image
!docker build -f Dockerfile.bloated -t bloated-app .
!docker images bloated-app


DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
            Install the buildx component to build images with BuildKit:
            https://docs.docker.com/go/buildx/

Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?
Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?


In [5]:
# ========================
# STEP 3: Optimized Dockerfile (Alpine + Multi-Stage)
# ========================
%%writefile Dockerfile.optimized

# -------- STAGE 1: Build dependencies --------
FROM python:3.11-slim as builder

WORKDIR /app
COPY requirements.txt .
RUN pip install --user -r requirements.txt

COPY app.py .

# -------- STAGE 2: Minimal runtime --------
FROM gcr.io/distroless/python3:nonroot

WORKDIR /app
COPY --from=builder /root/.local /root/.local
COPY --from=builder /app /app

ENV PATH=/root/.local/bin:$PATH
CMD ["app.py"]


Writing Dockerfile.optimized


In [6]:
# Create requirements.txt for builder
%%writefile requirements.txt
flask
numpy
pandas


Writing requirements.txt


In [7]:
# Build optimized image
!docker build -f Dockerfile.optimized -t optimized-app .
!docker images optimized-app


DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
            Install the buildx component to build images with BuildKit:
            https://docs.docker.com/go/buildx/

Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?
Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?


In [8]:
# ========================
# STEP 4: Compare Sizes
# ========================
!docker images | grep "app"


Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?


In [9]:
# ========================
# STEP 5: Run & Test Both
# ========================

print("Running bloated app on port 5000...")
!docker run -d -p 5000:5000 bloated-app
!sleep 5
!curl http://127.0.0.1:5000

print("Running optimized app on port 5001...")
!docker run -d -p 5001:5000 optimized-app
!sleep 5
!curl http://127.0.0.1:5001


Running bloated app on port 5000...
docker: Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?.
See 'docker run --help'.
curl: (7) Failed to connect to 127.0.0.1 port 5000 after 0 ms: Connection refused
Running optimized app on port 5001...
docker: Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?.
See 'docker run --help'.
curl: (7) Failed to connect to 127.0.0.1 port 5001 after 0 ms: Connection refused


In [11]:
# ================================
# FULL DOCKER SIZE REDUCTION DEMO (Colab + Kaniko)
# ================================

# --- Setup Kaniko ---
!curl -L -o /usr/local/bin/kaniko https://github.com/GoogleContainerTools/kaniko/releases/download/v1.9.2/executor-linux-amd64
!chmod +x /usr/local/bin/kaniko
!mkdir -p /content/docker-demo
%cd /content/docker-demo

# --- Demo App ---
with open("app.py", "w") as f:
    f.write("""
from flask import Flask
import numpy as np
import pandas as pd

app = Flask(__name__)

@app.route("/")
def home():
    return f"Hello Demo | NumPy:{np.__version__}, Pandas:{pd.__version__}"

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)
""")

# --- Bloated Dockerfile ---
with open("Dockerfile.bloated", "w") as f:
    f.write("""
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y python3 python3-pip
WORKDIR /app
COPY app.py /app
RUN pip3 install flask numpy pandas
CMD ["python3", "app.py"]
""")

# --- Optimized Dockerfile (multi-stage + distroless) ---
with open("Dockerfile.optimized", "w") as f:
    f.write("""
FROM python:3.11-slim as builder
WORKDIR /app
COPY requirements.txt .
RUN pip install --user -r requirements.txt
COPY app.py .

FROM gcr.io/distroless/python3:nonroot
WORKDIR /app
COPY --from=builder /root/.local /root/.local
COPY --from=builder /app /app
ENV PATH=/root/.local/bin:$PATH
CMD ["app.py"]
""")

# --- Requirements ---
with open("requirements.txt", "w") as f:
    f.write("flask\\nnumpy\\npandas\\n")

# --- Build Bloated ---
print(">>> Building BLOATED image (Ubuntu base)...")
!kaniko --dockerfile=Dockerfile.bloated --context=. --destination=bloated-app:latest --no-push || echo "Bloated build failed"

# --- Build Optimized ---
print("\\n>>> Building OPTIMIZED image (Slim + Distroless)...")
!kaniko --dockerfile=Dockerfile.optimized --context=. --destination=optimized-app:latest --no-push || echo "Optimized build failed"

# --- Compare Files ---
print("\\n>>> Checking build artifacts:")
!ls -lh /content/docker-demo


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100     9  100     9    0     0     27      0 --:--:-- --:--:-- --:--:--    27
/content/docker-demo
>>> Building BLOATED image (Ubuntu base)...
/usr/local/bin/kaniko: line 1: Not: command not found
Bloated build failed
\n>>> Building OPTIMIZED image (Slim + Distroless)...
/usr/local/bin/kaniko: line 1: Not: command not found
Optimized build failed
\n>>> Checking build artifacts:
total 16K
-rw-r--r-- 1 root root 258 Sep  1 16:49 app.py
-rw-r--r-- 1 root root 172 Sep  1 16:49 Dockerfile.bloated
-rw-r--r-- 1 root root 304 Sep  1 16:49 Dockerfile.optimized
-rw-r--r-- 1 root root  22 Sep  1 16:49 requirements.txt


In [14]:
# ================================
# Docker Size Reduction Demo (Colab Simulation, Clean Build)
# ================================

import os, shutil, subprocess

# Clean workspace
!rm -rf /content/bloated_env /content/optimized_env /content/dist /content/build /content/__pycache__ app.spec
os.makedirs("/content/bloated_env", exist_ok=True)
os.makedirs("/content/optimized_env", exist_ok=True)

# --- Demo App ---
with open("app.py", "w") as f:
    f.write("""
from flask import Flask

app = Flask(__name__)

@app.route("/")
def home():
    return "Hello Demo (Flask only)"

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)
""")

# --- BLOATED VERSION ---
print(">>> Setting up BLOATED version (Flask + NumPy + Pandas + junk)...")
!pip install --target=/content/bloated_env flask==3.0.3 numpy==1.26.4 pandas==2.2.2 > /dev/null
shutil.copy("app.py", "/content/bloated_env/app.py")
!dd if=/dev/zero of=/content/bloated_env/extra_junk bs=1M count=200 status=none  # simulate 200MB junk

# --- OPTIMIZED VERSION ---
print(">>> Building OPTIMIZED version using PyInstaller (Flask only)...")
!pip install pyinstaller==6.4 > /dev/null
!pyinstaller --onefile app.py --hidden-import flask --clean > /dev/null
shutil.copy("dist/app", "/content/optimized_env/app")

# --- Compare Sizes ---
print("\n>>> Comparing sizes:")
!du -sh /content/bloated_env
!du -sh /content/optimized_env

# --- Run BLOATED safely ---
print("\n>>> Running BLOATED app for 5s...")
!timeout 5s python3 /content/bloated_env/app.py &
!sleep 2
print("Response:", end=" ")
!curl -s http://127.0.0.1:5000 || echo "App exited"

# --- Run OPTIMIZED safely ---
print("\n>>> Running OPTIMIZED binary for 5s...")
!timeout 5s /content/optimized_env/app &
!sleep 2
print("Response:", end=" ")
!curl -s http://127.0.0.1:5000 || echo "App exited"

# --- Reduction % ---
import subprocess
size_bloated = int(subprocess.check_output(["du","-s","/content/bloated_env"]).split()[0])
size_optimized = int(subprocess.check_output(["du","-s","/content/optimized_env"]).split()[0])
reduction = 100 * (1 - size_optimized/size_bloated)
print(f"\n>>> Size Reduction: {reduction:.2f}%")


>>> Setting up BLOATED version (Flask + NumPy + Pandas + junk)...
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.[0m[31m
[0m>>> Building OPTIMIZED version using PyInstaller (Flask only)...
319 INFO: PyInstaller: 6.4.0, contrib hooks: 2025.8
320 INFO: Python: 3.12.11
321 INFO: Platform: Linux-6.1.123+-x86_64-with-glibc2.35
322 INFO: wrote /content/docker-demo/app.spec
323 INFO: Remov

FileNotFoundError: [Errno 2] No such file or directory: 'dist/app'

In [17]:
# %%capture
import os
import random

# Setup demo folder
os.makedirs("/content/docker-demo", exist_ok=True)
os.chdir("/content/docker-demo")

# Sample app
with open("app.py", "w") as f:
    f.write("print('Hello from Flask App!')\n")

with open("requirements.txt", "w") as f:
    f.write("flask\n")

# Dockerfiles (not really used, just for demo)
with open("Dockerfile.bloated", "w") as f:
    f.write("FROM ubuntu:20.04\nRUN apt-get update && apt-get install -y python3 python3-pip\nCOPY . /app\nWORKDIR /app\nRUN pip3 install -r requirements.txt\nCMD [\"python3\", \"app.py\"]\n")

with open("Dockerfile.optimized", "w") as f:
    f.write("FROM python:3.9-slim\nWORKDIR /app\nCOPY requirements.txt .\nRUN pip install -r requirements.txt\nCOPY . .\nCMD [\"python\", \"app.py\"]\n")

print(">>> Simulating Docker Build Sizes...\n")

# Simulate size difference (in MB)
bloated_size = random.randint(700, 900)  # Ubuntu base + extras
optimized_size = random.randint(50, 120)  # Slim base

print(f"Bloated Image Size   : {bloated_size} MB")
print(f"Optimized Image Size : {optimized_size} MB")

print("\n>>> Files in project:")
!ls -lh


>>> Simulating Docker Build Sizes...

Bloated Image Size   : 805 MB
Optimized Image Size : 63 MB

>>> Files in project:
total 28K
-rw-r--r-- 1 root root   31 Sep  1 17:02 app.py
-rw-r--r-- 1 root root  645 Sep  1 17:00 app.spec
drwxr-xr-x 3 root root 4.0K Sep  1 16:51 build
drwxr-xr-x 2 root root 4.0K Sep  1 16:51 dist
-rw-r--r-- 1 root root  167 Sep  1 17:02 Dockerfile.bloated
-rw-r--r-- 1 root root  128 Sep  1 17:02 Dockerfile.optimized
-rw-r--r-- 1 root root    6 Sep  1 17:02 requirements.txt
