## Streamlit App with Kmeans and Continual Learning

In [None]:
!pip install streamlit

!pip install pyngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting streamlit
  Downloading streamlit-1.21.0-py2.py3-none-any.whl (9.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting blinker>=1.0.0
  Downloading blinker-1.6.2-py3-none-any.whl (13 kB)
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.8.1b0-py2.py3-none-any.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m50.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pympler>=0.9
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[

In [None]:
import csv
# create a new storage for unseen images
with open('storage_new_imgs.csv', mode='w') as file:
    writer = csv.writer(file)

In [None]:
%%writefile app.py
import streamlit as st
import csv
import argparse
import random
import sys
import os
import time
import math
import logging
import numpy as np
import pickle
import tensorflow
from PIL import Image


# Importing the model and methods for transfer learning
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img

def extract_features(target_file):
    """
    This function preprocesses the input target image
    and loades the VGG 16 model to extract features.
    return: Extracted features with dimenions 4096
    """
    # DL model for transfer learning
    model = VGG16()
    model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
    # load the image as a 224x224 array
    img = load_img(target_file, target_size=(224, 224))
    # convert from image to numpy array
    img = img_to_array(img)
    # reshape the data for the model reshape(num_of_samples, dim 1, dim 2, channels)
    reshaped_img = img.reshape(1, 224, 224, 3)
    # prepare image for model
    imgx = preprocess_input(reshaped_img)
    # get the feature vector
    features = model.predict(imgx, use_multiprocessing=True)
    return features

def preprocess_input_img_for_kmeans(input_target_img_path):
    """
    This function uses the extract features function to preprocess
    and extract the features from the target input image and reduces
    the high dimensional features (4096) to 50.
    return: Reduced features with dimensions 50
    """
    # Extracting the features from the target input image
    img_feat = extract_features(input_target_img_path)
    # Loading PCA pkl file
    pca_pkl = "pca_luciferase.pkl"
    pca = pickle.load(open(pca_pkl, "rb"))
    # Reducing high dimensionality
    reduced_feat = pca.transform(img_feat)
    return reduced_feat

def load_model():
    """
    This function open a pkl file and and loads the ML model
    return: ML model
    """
    with open('kmeans_luciferase_cl.pkl', 'rb') as f:
      model = pickle.load(f)
      return model

def save_new_model(model):
    """
    This function open the pkl file to write updated ML model
    return: None
    """
    with open('kmeans_luciferase_cl.pkl', 'wb') as f:
      pickle.dump(model, f)

# Stremlit app's code starts below:
st.title('Unsupervised Clustering of Luciferase Images')
st.markdown("Streamlit web application to identify cluster ID of a given plant image 🌱🌿🍃 using K-means")
uploaded_file = st.file_uploader('upload image', type = 'jpg')
if uploaded_file is not None:
    # Displays uploaded image
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image', use_column_width=True)
    # Preprocess input image for K-Means
    st.markdown('Preprocessing uploaded image...')
    reduced_feat = preprocess_input_img_for_kmeans(uploaded_file)
    # Open CSV file for appending
    with open('storage_new_imgs.csv', 'a', newline='') as csvfile:
      writer = csv.writer(csvfile)
      writer.writerow(reduced_feat[0])
    model = load_model()
    # Checks number of unseen images stored and takes required action
    with open('storage_new_imgs.csv', 'r') as csvfile:
      csvreader = csv.reader(csvfile)
      new_img_feat_count = sum(1 for row in csvreader)
    if new_img_feat_count < 3:
      output_prediction_arr = model.predict(reduced_feat)
      cnt_of_predictions = 2
      distance_from_centroids = model.transform(reduced_feat)
      output_prediction = output_prediction_arr[0] 
      loc_centroid_1 = model.cluster_centers_[0]
      loc_centroid_2 = model.cluster_centers_[1]
      loc_centroid_3 = model.cluster_centers_[2]
      success_msg = f'''
                    Prediction generated from original model!

                    Current number of new images collected: {new_img_feat_count}

                    Distance from centroids: {distance_from_centroids}

                    Uploaded image belongs to cluster: {output_prediction}/{cnt_of_predictions}
                    '''
      st.success(success_msg)
    
    # model update happens if number of unseen images reaches 3
    else:
      existing_file_path = "/content/kmeans_luciferase.pkl"
      prev_weights = model.cluster_centers_
      st.markdown('Intiating continual learning...')
      time.sleep(0.01)
      st.markdown('Learning from new set of images...')
      # reading all 3 unseen images from csv file
      with open('storage_new_imgs.csv') as csv_file:
        csv_reader = csv.reader(csv_file)
        new_img_feat_lst = []
        for row in csv_reader:
          float_row = [float(value) for value in row]
          new_img_feat_lst.append(float_row)
      # updating model on 3 new images
      model.fit(new_img_feat_lst[:])
      new_weights = model.cluster_centers_
      weight_decay = 0.5
      penalized_weights = (1 - weight_decay) * new_weights + weight_decay * prev_weights
      model.cluster_centers_ = penalized_weights
      
      # overwrite the kmeans_luciferase_cl.pkl file with the updated model
      # Check for existing model and delete it
      if os.path.exists(existing_file_path):
        os.remove(existing_file_path)

      # saving new version of model
      save_new_model(model)

      # loading new version of model
      model = load_model()
      
      # clears list and csv of new image features
      new_img_feat_lst.clear()
      with open('storage_new_imgs.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([]) 

      # outputs prediction result
      output_prediction_arr = model.predict(reduced_feat.astype(np.float64))
      cnt_of_predictions = 2
      output_prediction = output_prediction_arr[0]
      distance_from_centroids = model.transform(reduced_feat.astype(np.float64))
      new_loc_centroid_1 = model.cluster_centers_[0]
      new_loc_centroid_2 = model.cluster_centers_[1]
      new_loc_centroid_3 = model.cluster_centers_[2]

      success_msg = f'''
                    Prediction generated from updated model trained on unseen set of 3 images

                    Distance from centroids: {distance_from_centroids}

                    Uploaded image belongs to cluster: {output_prediction}/{cnt_of_predictions}
                    '''
      st.success(success_msg)


Writing app.py


In [None]:
!streamlit run app.py&>/dev/null&

In [None]:
!ngrok authtoken 2NRXnARWJZFxh7ReAePMyKr33CQ_7gdZTBPyz4SWTpBpY1itv

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip

--2023-04-26 18:09:33--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 18.205.222.128, 54.237.133.81, 54.161.241.46, ...
Connecting to bin.equinox.io (bin.equinox.io)|18.205.222.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13921656 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2023-04-26 18:09:35 (14.7 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13921656/13921656]



In [None]:
!unzip ngrok-stable-linux-amd64.zip

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   


In [None]:
get_ipython().system_raw('./ngrok http 8501 &')

In [None]:
! curl -s http://localhost:4040/api/tunnels | python3 -c "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

https://968d-34-82-216-147.ngrok-free.app


In [None]:
!streamlit run /content/app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8502[0m
[34m  External URL: [0m[1mhttp://34.82.216.147:8502[0m
[0m
