In [1]:
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
from sklearn.cluster import KMeans
import math
import time

In [2]:
#Parameters
learning_rate=0.001
k=4
num_of_frames = 999
threshold = 0.85

In [3]:
#Input Video Details 
cap = cv2.VideoCapture('umcp.mpg')
width = round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) )
height = round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) )

#Output video Details
for_video = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'XVID'), 30, (width,height), 0)


In [4]:
#Function to extract 3-dimension frame np array
def make_frames(cap):
    #getting the height width and frame rate and number of pixels from the video
    frame_matrix = np.zeros((height,width,num_of_frames))
    frame_count = 0
    while True:
        success, frame = cap.read()
        if success:
            frame_grey = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            frame_matrix[:,:,frame_count] = frame_grey
            frame_count=frame_count+1
        else:
            break
    return frame_matrix

In [5]:
def kmean_init(frame_matrix):
    firstframe = frame_matrix[:,:,0]
    firstframe = firstframe.reshape(-1,1)
    
    #Using the Scikit k means library
    kmodel = KMeans(n_clusters=k)
    kmodel.fit(firstframe)

    #Using the cluster centres as means, inertia(which is mse)/number of pixels as variance(high number), 
    #weights count as initial weights
    
    mus = kmodel.cluster_centers_
    inertia = kmodel.inertia_/84480
    labels = kmodel.labels_ 
    weight = np.bincount(labels)
    weight = weight / len(labels)

    means = np.zeros((84480,k))
    variances = np.zeros((84480,k))
    weights = np.zeros((84480,k))

    for i in range(0,k):
            means[:,i]=np.full(84480, mus[i])
            variances[:,i]=np.full(84480, inertia)
            weights[:,i]=np.full(84480, weight[i])
    return means,weights,variances

In [6]:
def adaptive_GMM(frame_matrix,means,weights,variances):
    fg_frame_matrix = frame_matrix
    bg_frame_matrix = frame_matrix
    for t in range(1,frame_matrix.shape[2]-1):
    # for t in range(1,200):
        frame = frame_matrix[:,:,t].reshape(-1,1)
        fg_frame = frame
        for i in range(frame.shape[0]):
            num_of_matched = 0
            is_foreground = True
            distance = np.zeros(k) # a distance array
            sum_of_weights=0
            for j in range(k):
                distance[j] = frame[i] - means[i,j]
                 #Matching condition : distance < 2.5σ
                if(abs(distance[j]) < (2.5)*variances[i,j]**(0.5)):
                    rho = learning_rate*(math.exp(-((frame[i]-means[i,j])**2)/(2*variances[i,j]))/(0.398*(variances[i,j]**0.5)))
                    #updating the means, weights, variances if gaussian is matched
                    weights[i,j] = (1-learning_rate)*weights[i,j] + learning_rate
                    means[i,j] = ((1-rho)*means[i,j]) + (rho*frame[i])
                    variances[i,j] = ((1-rho)*variances[i,j]) + (rho*(frame[i]-means[i,j])*(frame[i]-means[i,j]))
                    num_of_matched += 1
                else:
                    #if gaussian is not matched updating just weight
                    weights[i,j] = (1-learning_rate)*weights[i,j]
                sum_of_weights += weights[i,j]
            
            #an array to sort the weights/sigma ratio
            sorting_index=[]

            #normalising the weights
            for j in range(k):
                weights[i,j] = weights[i,j]/sum_of_weights
                ratio = weights[i,j]/(variances[i,j]**(0.5))
                sorting_index.append((ratio,j))
            
            #sorting in descending order
            sorting_index.sort(reverse=True)
            
            #If no match found replacing the least probable Gaussian with a mean=pixel value, high variance, weight is already small
            if num_of_matched == 0:
                idx = sorting_index[k-1][1]
                means[i,idx] = frame[i]
                variances[i,idx] = 999
            
            #finding the B Gaussians comprising the Background
            threshold_sum=0
            B=0
            for j in range(k):
                threshold_sum += weights[i,sorting_index[j][1]]
                if threshold_sum > threshold:
                    break
                B = B+1
            
            #if the pixel matches any Background Gaussians then it's pixel value set to 0(black) else 255(white)
            for j in range(B):
                if (abs(frame[i] - means[i,sorting_index[j][1]])) <  (2.5 * (variances[i,sorting_index[j][1]]**(0.5))):
                    is_foreground = False

            if is_foreground:
                fg_frame[i] = 255
            else:
                fg_frame[i] = 0

        fg_frame = fg_frame.reshape(height,width)
        #Obtaining frames and video
        cv2.imwrite(f'fg_frame_folder/{str(t)}.jpg',fg_frame)
        for_video.write(np.uint8(fg_frame))
    return fg_frame

In [None]:
#main function:
# creating frame matrix
frame_matrix = make_frames(cap)
#initializing KMeans
means,weights,variances = kmean_init(frame_matrix)
#Creating foreground frams using adaptive GMM algo.
fg_frame = adaptive_GMM(frame_matrix,means,weights,variances)

  super()._check_params_vs_input(X, default_n_init=10)


0.0595171332359314 1
0.120111350218455 2
0.19096364180246989 3
0.2595369021097819 4
0.3256421605745951 5
0.38780066967010496 6
0.4585893670717875 7
0.6554091493288676 8
0.9218544562657675 9
1.007665999730428 10
1.0728747248649597 11
1.138648529847463 12
1.2435908595720926 13
1.3622647841771445 14
1.4731018582979838 15
1.6146745125452677 16
1.696442198753357 17
1.7820087154706319 18
1.8588404337565103 19
1.9473477244377135 20
2.060035792986552 21
2.1554732879002887 22
2.3723522901535032 23
2.698834494749705 24
3.0528764923413596 25
3.3083728233973186 26
3.442226541042328 27
3.804072117805481 28
3.9208229541778565 29
4.039537219206492 30
4.1255381345748905 31
4.2082224726676944 32
4.329567539691925 33
4.51127081712087 34
4.6392142136891685 35
4.773836839199066 36
4.869250146547953 37
4.957652350266774 38
5.0808455149332685 39
5.147391418615977 40
5.244740919272105 41
5.32785484790802 42
5.427608776092529 43
5.53380331993103 44
5.616519320011139 45
5.725404528776805 46
5.812382558981578 4

49.75254168113073 372
49.99231122334798 373
50.314113732179 374
50.68771344820659 375
50.995925629138945 376
51.34498243331909 377
51.72547304232915 378
52.076753449440005 379
52.361246355374654 380
52.49927715460459 381
52.63139249881109 382
52.78077935775121 383
52.870444452762605 384
52.98349858125051 385
53.06147762934367 386
53.13379682699839 387
53.205701073010765 388
53.2857072075208 389
53.357593242327376 390
53.43118324279785 391
53.504791561762495 392
53.59230372111003 393
53.67363024950028 394
53.8424529115359 395
54.19675358136495 396
54.67645820379257 397
54.992229135831195 398
55.293946504592896 399
55.59579476912816 400
55.945334164301556 401
56.248671571413674 402
56.5678808093071 403
56.87740463813146 404
57.1533191760381 405
57.44538024266561 406
57.7605384906133 407
57.85702774524689 408
57.95972136259079 409
58.05107369820277 410
58.150080716609956 411
58.22085426648458 412
58.320047740141554 413
58.443280716737114 414
58.64872791369756 415
58.864095799128215 416
59