<a href="https://colab.research.google.com/github/MarMarhoun/freelance_work/blob/main/side_projects/NLP_projs/eda_streamlit/outlier_detect.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Outlier detection using streamlit and tensorflow

the code for outlier detection using Streamlit and TensorFlow, you can create a user-friendly app that allows users to upload a dataset, preprocess the data, train an autoencoder for anomaly detection, and display the results. Here's an example of how you can create an app using the previously provided code as a starting point:

First, install Streamlit if you haven't already: `!pip install streamlit`


Next, create a new Python script and import the required libraries:

In [None]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from PIL import Image
import streamlit as st
import warnings

warnings.filterwarnings("ignore")

Define helper functions for loading data and creating the autoencoder:


In [None]:
def load_data(data_dir):
    # Add your function to load and preprocess the data

def create_autoencoder(input_shape, latent_dim):
    # Add your function to create and compile the autoencoder

In [None]:
def main_app():
    st.title("Outlier Detection using Autoencoder and TensorFlow")

    # File upload
    uploaded_file = st.file_uploader("Upload dataset (csv or image folder)", type="csv|zip")
    if uploaded_file is not None:
        data_dir = "./data"
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        with open(os.path.join(data_dir, "data.csv"), "wb") as f:
            f.write(uploaded_file.getbuffer())

        # Load data
        data = load_data(data_dir)

        # Preprocess data
        # Add your preprocessing steps here

        # Train-test split
        X_train, X_test = train_test_split(data, test_size=0.2, random_state=42)

        # Train autoencoder
        input_shape = X_train.shape[1:]
        latent_dim = 16
        autoencoder = create_autoencoder(input_shape, latent_dim)
        autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, shuffle=True, validation_data=(X_test, X_test))

        # Evaluate the autoencoder
        X_test_reconstructed = autoencoder.predict(X_test)
        distances = pairwise_distances(X_test, X_test_reconstructed)

        # Display results
        st.header("Results")
        st.write("Mean reconstruction error:", np.mean(distances))
        st.write("Standard deviation of the reconstruction error:", np.std(distances))

        st.subheader("Top 10 images with the highest reconstruction errors:")
        for idx in distances.argsort()[::-1][:10]:
            st.image(Image.open(os.path.join(data_dir, "images", f"{idx}.png")), caption=f"Original image")
            st.image(Image.open(os.path.join(data_dir, "images", f"{idx}_reconstructed.png")), caption=f"Reconstructed image")

if __name__ == "__main__":
    main_app()

Run the Streamlit app:


In [None]:
streamlit run app.py

This example provides a simple Streamlit app for outlier detection using an autoencoder. You can further customize the app by adding more features, such as different preprocessing options, model architectures, and visualizations.



Based on the provided content, I can see that the initial code focused on creating a Streamlit app for a REST API for outlier detection. I'll build upon that code and add advanced features using TensorFlow for outlier detection.

First, make sure you have TensorFlow installed: pip install tensorflow.

Next, create a new directory tf_outlier_detection and inside, create a Python script named app.py. After that, create another directory named models.

Here's the enhanced Streamlit app code:

In [None]:
import streamlit as st
import requests
import pandas as pd
import json
import tensorflow as tf
import numpy as np
import plotly.express as px
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import pairwise_distances

# Initialize model and load saved weights
@st.experimental_memo
def load_model(model_path):
    model = Model(inputs=Input(shape=(784,)), outputs=Dense(32, activation='relu')(Dense(16, activation='relu')))
    model.load_weights(model_path)
    return model

# Detect outliers using TensorFlow Autoencoder
def detect_outliers_tf(model, data):
    data = np.reshape(data, (1, 784))
    output = model.predict(data)
    reconstructed = np.reshape(output, (28, 28))
    return np.mean(pairwise_distances(np.reshape(data, (784,)), reconstructed))

# Define Streamlit app
def main():
    st.title("Outlier Detection using Autoencoder and TensorFlow")

    # Method selection
    method = st.selectbox(label="Choose the method", options=["TensorFlow Autoencoder"])

    if method == "TensorFlow Autoencoder":
        st.write("### TensorFlow Autoencoder")

        # Load pre-trained model
        model_path = "./models/tf_autoencoder_model.h5"
        model = load_model(model_path)

        # Image upload
        uploaded_image = st.file_uploader("Upload an image", type="jpg|jpeg|png")
        if uploaded_image is not None:
            img = Image.open(uploaded_image)
            img = img.resize((28, 28))
            img_array = np.array(img)
            img_array = img_array.reshape(-1, 784)

            # Detect outliers and display result
            distance = detect_outliers_tf(model, img_array)
            st.subheader("Result")
            st.write(f"Reconstruction error: {distance:.3f}")

            # Display image and reconstructed image
            col1, col2 = st.beta_columns(2)
            with col1:
                st.write("Original image")
                st.image(img, caption="Uploaded Image", use_column_width=True)
            with col2:
                st.write("Reconstructed image")
                st.image(img.resize((28, 28)), caption="Reconstructed Image", use_column_width=True)

if __name__ == "__main__":
    main()

Now create the TensorFlow autoencoder model and save the weights:



In [None]:
import os
import numpy as np
import tensorflow as tf
import streamlit as st
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

# Download pre-trained model weights
def download_weights(url, filename):
    if not os.path.exists(filename):
        response = requests.get(url)
        with open(filename, "wb") as f:
            f.write(response.content)

# Download dataset
def download_dataset(url, filename):
    if not os.path.exists(filename):
        response = requests.get(url)
        with open(filename, "wb") as f:
            f.write(response.content)

# Load dataset
def load_dataset(filename):
    data = np.load(filename)
    return data['images'], data['labels']

# Build the autoencoder model
def build_autoencoder(input_shape):
    input_layer = Input(shape=input_shape)
    encoded = Dense(16, activation='relu')(input_layer)
    decoded = Dense(input_shape[0] * input_shape[1], activation='sigmoid')(encoded)
    output_layer = tf.keras.layers.Reshape(input_shape)(decoded)
    model = Model(input_layer, output_layer)
    return model

# Train the autoencoder model
def train_autoencoder(model, dataset, batch_size):
    x_train, y_train = dataset
    x_train = x_train.reshape(-1, 784)
    callback = ModelCheckpoint('autoencoder_weights.h5', save_best_only=True)
    model.compile(loss='binary_crossentropy', optimizer='adam')
    model.fit(x_train, x_train, epochs=50, batch_size=batch_size, callbacks=[callback], validation_split=0.2)

# Detect outliers
def detect_outliers(model, image):
    image = np.expand_dims(image, axis=0)
    output = model.predict(image)
    input_image = image.reshape(-1, 28, 28)
    reconstructed = output.reshape(-1, 28, 28)
    distance = np.mean(np.abs(input_image - reconstructed))
    return distance

# Streamlit app
def main():
    # Title
    st.title("Outlier Detection using Streamlit and TensorFlow")

    # Download dataset
    url = "https://raw.githubusercontent.com/susanli2016/us-accident-data/master/accidents.npz"
    filename = "accidents.npz"
    download_dataset(url, filename)

    # Load dataset
    images, labels = load_dataset(filename)
    train_data, test_data = train_test_split(images, test_size=0.2, random_state=42)

    # Build autoencoder model
    input_shape = (28, 28, 1)
    model = build_autoencoder(input_shape)

    # Train autoencoder model
    batch_size = 32
    train_autoencoder(model, (train_data, train_data), batch_size)
    model.load_weights('autoencoder_weights.h5')

    # Upload image
    st.header("Upload a grayscale image of 28x28 pixels")
    uploaded_image = st.file_uploader("Choose an image", type="jpg")

    if uploaded_image is not None:
        # Display uploaded image
        image = Image.open(uploaded_image)
        image = image.convert('L')
        image = image.resize((28, 28))
        st.image(image, caption='Uploaded Image', use_column_width=True)

        # Detect outliers and show result
        distance = detect_outliers(model, np.expand_dims(np.array(image), axis=-1))
        st.write("")
        st.write("### Outlier Detection Result")
        st.write("The reconstruction error is:", round(distance, 3))

        # Show whether the image is an outlier or not
        if distance > 0.2:
            st.write("The image is an outlier!")
        else:
            st.write("The image is not an outlier!")

if __name__ == "__main__":
    main()