In [1]:
import cv2
import numpy as np

In [2]:
pic = "img/PROFILE.jpg"

# RGB Image

In [3]:
img = cv2.imread(pic)

cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)

cv2.imshow("output",img)
cv2.waitKey(0)
cv2.destroyAllWindows()

print("type(img):", type(img))
print("shape of RGB:", img.shape)
print("dtype of RGB:", img.dtype)
print("unique values in RGB:", np.unique(img))
print("unique values sum in RGB:", len(np.unique(img)))
print("size (pixels):", img.size)


type(img): <class 'numpy.ndarray'>
shape of RGB: (4032, 3024, 3)
dtype of RGB: uint8
unique values in RGB: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 

# What is Bit mean in the image

In [4]:
# 1 bit -> can store 2 values (0,1)
# 8 bit -> can store 256 values (0-255)
# 16 bit -> can store 65536 values (0-65535)
# 32 bit -> can store 4294967296 values (0-4294967295)

# Bit depth

In [5]:
# Bit Depth in Images = Number of bits used to represent the color of a single pixel.
# why it matters? = It determines the range of colors and shades that can be represented in an image. Higher bit depth allows for more precise color representation and smoother gradients.
# Common Bit Depths:
# 1-bit: Black and white images (2 colors).
# 8-bit: Grayscale images (256 shades of gray) or indexed color images (256 colors).
# 24-bit: True color images (16.7 million colors, 8 bits per channel for RGB).
# 48-bit: High color depth images (over 281 trillion colors, 16 bits per channel for RGB).  
# Use Cases:
# 1-bit: Simple graphics, icons, and binary images.
# 8-bit: Web graphics, simple photographs, and images with limited color palettes.
# 24-bit: Standard photographs, digital images, and most computer graphics.
# 48-bit: Professional photography, medical imaging, and scientific visualization where color accuracy is crucial.  
#

# Black and White(0,1)(0,1)

In [6]:
img_binary = cv2.imread(pic,cv2.IMREAD_GRAYSCALE)

_, binary_img = cv2.threshold(img_binary,127,255,cv2.THRESH_BINARY)
binary_img_01 = binary_img / 255

cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)

cv2.imshow("output",binary_img_01*255)
cv2.waitKey(0)
cv2.destroyAllWindows()

print("binary shape:", binary_img_01.shape)
print("binary dtype:", binary_img_01.dtype)
print("unique values in binary:", np.unique(binary_img_01))
print("unique values in binary:", len(np.unique(binary_img_01)))
print("size (pixels) in binary:", binary_img_01.size)


binary shape: (4032, 3024)
binary dtype: float64
unique values in binary: [0. 1.]
unique values in binary: 2
size (pixels) in binary: 12192768


# GrayScale

In [7]:
Gray_img = cv2.imread(pic,cv2.IMREAD_GRAYSCALE)

cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)

cv2.imshow("output",Gray_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

print("Gray shape:", Gray_img.shape)
print("Gray dtype:", Gray_img.dtype)
print("unique values in Gray:", np.unique(Gray_img))
print("unique values in Gray:", len(np.unique(Gray_img)))
print("size (pixels) in Gray:", Gray_img.size)


Gray shape: (4032, 3024)
Gray dtype: uint8
unique values in Gray: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 2

# Pixels Size Difference in this 3 

In [8]:
print("pixel size of RGB, Gray, binary:", img.size, Gray_img.size, binary_img_01.size)

pixel size of RGB, Gray, binary: 36578304 12192768 12192768


In [9]:
num = img.size // 3
print("num of pixels in RGB image:", num)
print("This show that each pixel in RGB image has 3 channels (R,G,B), and 3 times number of the other 2 channels).")

num of pixels in RGB image: 12192768
This show that each pixel in RGB image has 3 channels (R,G,B), and 3 times number of the other 2 channels).


# Play With the pixel by changing the number

In [10]:
L = 200
B = 720
C =  3  # Don't put channel = 2 
Bin = binary_img_01
Gray = Gray_img
RGB = img
im = np.resize(RGB, (L, B, C))
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900) 
cv2.imshow("output",im)
cv2.waitKey(0)
cv2.destroyAllWindows()
print(im.shape)

(200, 720, 3)


In [11]:
imB = img[:,:,0]
imG = img[:,:,1]
imR = img[:,:,2]

In [12]:
new_img = np.hstack((imB,imG,imR))
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)
cv2.imshow("output",new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
print(new_img.shape)



(4032, 9072)


# See the different RGB color in the image

In [13]:
im__= cv2.imread(pic)
img_ = cv2.cvtColor(im__,cv2.COLOR_BGR2RGB)

In [14]:
imB = img_[:,:,0]
imG = img_[:,:,1]
imR = img_[:,:,2]

imR_3c = cv2.merge([imR, np.zeros_like(imR), np.zeros_like(imR)])
imG_3c = cv2.merge([np.zeros_like(imG), imG, np.zeros_like(imG)])
imB_3c = cv2.merge([np.zeros_like(imB), np.zeros_like(imB), imB])


new_img = np.hstack((imB_3c,imG_3c,imR_3c))
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",1800,600)
cv2.imshow("output",new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
print(new_img.shape)
print(f"Shape of concatenated image: {new_img.shape}")
print(f"Original shape: {img_.shape}")
print(f"Individual channel shapes: {imR.shape}, {imG.shape}, {imB.shape}")

(4032, 9072, 3)
Shape of concatenated image: (4032, 9072, 3)
Original shape: (4032, 3024, 3)
Individual channel shapes: (4032, 3024), (4032, 3024), (4032, 3024)


# Resize the image 

In [15]:
resize_img = cv2.resize(im__, (3300,800),cv2.INTER_LANCZOS4)
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)
cv2.imshow("output",resize_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Filters

- Filters modify pixel values based on their neightbors
- Blurring, Sharpening, Edge Detection, Embossing, etc.

# Mean/Average Filter
- Replace each pixel with the average of surrounding pixels.

    - Effect:
        - Smooths the image
        - Removes noise 
        - Blurs edges


# Gaussian Blur
- Uses a Gaussian (bell-shaped) kernel to blur the image
    - Effect:
        - More natural blur than mean filter
        - Best for noise removal
        - preserves edges better

# Median Blur
- Replace each pixel with the median value of its neighbors.
    - Best for:
        - Removing salt & pepper noise
        - Preserving edges clearly

# Bilateral Filter 
- Smooths the image while keeping edges sharp.
    - Best for:
        - Face smoothing 
        - Cartoon effect
        - Edge-preserving denoising
        

In [16]:
blur = cv2.blur(im__,(5,5))
gaussian = cv2.GaussianBlur(im__,(5,5),1)
median = cv2.medianBlur(im__,5)
bilateral = cv2.bilateralFilter(im__,9,75,75)
new_img = np.hstack((im__,gaussian,blur,median,bilateral))
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)
cv2.imshow("output",new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Blur the image

In [17]:
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)
x,y,w,h = 1100,1300,900,900 # x,y,w,h 
im_crop = im__[y:y+h, x:x+w]
blurred_roi = cv2.blur(im_crop,(50,50))
im__[y:y+h, x:x+w] = blurred_roi
cv2.imshow("output",im__)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Edge Detection
- TO identify the sharp changes in brightness that typically signify object boundaries,edges,lines,or textures.
- Edges = rapid changes in intensity
    - Used in :
        - Object detection,segmentation,tracking

# Sobel Operator
- (edge_dection_filter)computes gradient (intensity change) in X & Y Directions
    - Use case:
        - For edge dection for model

# Laplacian Operator
- also a edge_dection_filter unlike sobel it is non-directional and detects edge in all directions

In [18]:
im__= cv2.imread(pic)

In [19]:
sobelx = cv2.Sobel(im__, cv2.CV_64F, 1, 0, ksize=7)
sobely = cv2.Sobel(im__, cv2.CV_64F, 0, 1, ksize=7)
laplacian = cv2.Laplacian(im__, cv2.CV_64F, ksize=13)
sobel_combined = cv2.addWeighted(sobelx, 0.5, sobely, 0.5, 0)
new_img = np.hstack((sobel_combined, laplacian))
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)
cv2.imshow("output",new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Canny 
- (Edge_dection_filter)
    - step
        - noise reduction(gaussian)
        - gradient calculation(sobel X,Y)
        - Non-maximum suppression (thin edge)
        - Hysteresis thresholding (threshold1,threshold2)
        # code
        - (# Example: edges = cv2.Canny(img, 50, 150)
            - threshold1 = 50 (lower threshold)
            - threshold2 = 150 (upper threshold))

# How to choose thresholds:
- threshold1: Lower bound (weak edges)

- threshold2: Upper bound (strong edges)

- Edges with gradient > threshold2: Kept

- Edges between threshold1 & threshold2: Kept if connected to strong edges

- Edges < threshold1: Discarded

# Why Canny is best:
- Low error rate (few missed edges)

- Good localization (edges close to true edges)

- Single response (one edge per actual edge)

In [20]:
# Auto calculate thresholds
def auto_canny(image, sigma=0.33):
    v = np.median(image)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    return cv2.Canny(image, lower, upper)

# edges = auto_canny()

In [21]:
gray__ = cv2.cvtColor(im__,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray__,(5,5),1)
edges = cv2.Canny(blur,100,200)
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)      
cv2.imshow("output",edges)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Thresholding 
- Thresholding is a basic image processing technique that converts an image into pure black & white(binary).

- It decides:
    - Pixels above the threshold -> white(255)
    - Pixels below the threshold -> black(0)

- It is mainly used for:
    - Removing background
    - Preparing images for OCR
    - Detecting shapes
    - Scanned documents
    - Simple segmentation
    

# Global Thresholding 
- Use one single threshold value for the entire image.
- Example: threshold = 127
    - Pixel > 127 ->white
    - Pixel > 127 ->black
- Good FOR:
    - Images with even lighting
    - Simple backgrounds
    - Clear contrast

- Bad for:
    - Shadows
    - Uneven lighting
    - Dark ares and bright areas mixed

# Adaptive Thresholding 
- Instead of one value, the image is divided into small region gets its own threshold.
- Perfect when lighting is not consistent across the image.
- Best for:
    - Scanned documents
    - Paper with shadows
    - Text on uneven lighting
    - Old,stained papers

# Why it works:
- It calculates threshold = mean value of neighborhood pixels-C
- so local light changes don't cause problem



# Otsu's Thresholding 
- Otsu's Method automatically finds the best global threshold based on histogram
- You don't decide the number ; Otsu does it.

# When to use:
- when you don't know the correct threshold
- Image has two clear peaks in histogram (object + background)

# How it works(simple)
- Otsu analyzes the grayscale histogram and finds the point where:
    - The difference between object pixels and background pixels is maximum(largest variance)

In [24]:
gray = cv2.cvtColor(im__,cv2.COLOR_BGR2GRAY)
__, th = cv2.threshold(gray,127,255,cv2.THRESH_BINARY)
adaptive_th = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
__,otsu_th = cv2.threshold(gray,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)
new_img = np.hstack((th, adaptive_th,otsu_th))
cv2.namedWindow("output",cv2.WINDOW_NORMAL)
cv2.resizeWindow("output",900,900)
cv2.imshow("output",new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()     
