In [1]:
# Install Streamlit and required ML/audio libraries
!pip install -q streamlit
!pip install -q scikit-learn pandas numpy librosa joblib

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.0/9.0 MB[0m [31m79.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
# app.py

import streamlit as st
import numpy as np
import pandas as pd
import joblib
import librosa
import os
import io

# --- Configuration & Model Loading ---

# Load pre-trained models and preprocessing objects (Replace with your actual paths)
@st.cache_resource
def load_models():
    # Model Dictionary: {Task: {ModelName: ModelObject}}
    models = {
        'audio': {},
        'defect': {}
    }

    # Audio Models (Binary Classification)
    try:
        # Load all 4 models for runtime selection
        models['audio']['Logistic Regression'] = joblib.load('audio_lr_model.joblib')
        models['audio']['SVM'] = joblib.load('audio_svm_model.joblib')
        models['audio']['Perceptron'] = joblib.load('audio_perceptron_model.joblib')
        models['audio']['DNN'] = joblib.load('audio_dnn_model.joblib')
        models['audio']['scaler'] = joblib.load('audio_feature_scaler.joblib') # Load Scaler
    except FileNotFoundError:
        st.error("Audio models or scaler not found. Please ensure joblib files are in the directory.")

    # Software Defect Models (Multi-label Classification)
    try:
        # Load models for runtime selection
        models['defect']['Logistic Regression'] = joblib.load('defect_lr_model.joblib')
        models['defect']['SVM'] = joblib.load('defect_svm_model.joblib')
        models['defect']['Perceptron'] = joblib.load('defect_perceptron_model.joblib')
        models['defect']['DNN'] = joblib.load('defect_dnn_model.joblib')
        models['defect']['scaler'] = joblib.load('defect_feature_scaler.joblib') # Load Scaler/Vectorizer
        # Example defect labels (replace with your actual labels)
        models['defect']['labels'] = ['Bug', 'Regression', 'Enhancement', 'Security']
    except FileNotFoundError:
        st.error("Defect models or scaler not found. Please ensure joblib files are in the directory.")

    return models

# --- Core Prediction Functions ---

def extract_audio_features(uploaded_file, scaler):
    """Placeholder for robust audio feature extraction (e.g., MFCCs)."""
    try:
        # Load audio data using librosa
        audio_data, sr = librosa.load(io.BytesIO(uploaded_file.getvalue()), sr=None, duration=2)

        # Example feature: MFCC (you must ensure this matches your training features)
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=13)
        features = np.mean(mfccs.T, axis=0) # Simple mean aggregation

        # Scale features
        features = scaler.transform(features.reshape(1, -1))
        return features.reshape(1, -1)
    except Exception as e:
        st.error(f"Error processing audio file: {e}")
        return None

def predict_audio(model, features):
    """Predicts Bonafide or Deepfake and returns confidence."""
    # Prediction
    prediction = model.predict(features)[0]

    # Confidence Score (Probability for LogReg/DNN, Distance for SVM)
    try:
        # For models that support predict_proba (LogReg, DNN)
        proba = model.predict_proba(features)[0]
        # Assuming 0 is Bonafide, 1 is Deepfake
        classes = ['Bonafide', 'Deepfake']
        result = classes[prediction]
        confidence = proba[prediction]
        all_scores = dict(zip(classes, proba))
    except AttributeError:
        # For models without predict_proba (SVM, Perceptron) - use decision function
        decision_value = model.decision_function(features)[0]
        result = "Bonafide" if decision_value < 0 else "Deepfake"
        # Confidence is calculated from the magnitude of the decision value
        confidence = 1 / (1 + np.exp(-decision_value)) # Sigmoid approximation for confidence
        all_scores = {result: confidence}

    return result, confidence, all_scores

def predict_defect(model, feature_vector, defect_labels):
    """Predicts multi-label defect types and returns confidence."""

    # Feature vector is expected to be a pre-processed NumPy array/list

    # Prediction (will be a multi-hot encoded array for multi-label)
    predictions = model.predict(feature_vector)[0]

    # Prediction Confidence/Probability Scores
    try:
        # For models that support predict_proba
        probabilities = model.predict_proba(feature_vector)[0]

        predicted_labels = [label for i, label in enumerate(defect_labels) if predictions[i] == 1]

        all_scores = dict(zip(defect_labels, probabilities))

    except AttributeError:
        # For models without predict_proba, you may need a workaround
        # For Perceptron, confidence is often based on the absolute value of the output (distance from hyperplane)
        predicted_labels = [label for i, label in enumerate(defect_labels) if predictions[i] == 1]
        all_scores = {label: 0.0 for label in defect_labels} # Placeholder for models without easy score
        probabilities = predictions

    return predicted_labels, all_scores

# --- Streamlit UI Implementation ---

st.set_page_config(
    page_title="AI Model Selector & Prediction App",
    layout="wide",
    initial_sidebar_state="expanded"
)

st.title("üó£Ô∏è Multi-Task AI Prediction Platform")
st.markdown("A unified app for **Audio Deepfake Detection** and **Software Defect Prediction** with runtime model selection.")

# Load all models once
MODELS = load_models()

# Sidebar for Model Selection (Must be Deployed feature)
st.sidebar.title("‚öôÔ∏è Model Configuration")
task_selection = st.sidebar.selectbox(
    "Select Prediction Task",
    ("Audio Deepfake Detection", "Software Defect Prediction")
)
model_options = list(MODELS['audio'].keys()) if 'audio' in MODELS and MODELS['audio'] else []
selected_model_name = st.sidebar.selectbox(
    "Select Model at Runtime (Model)",
    model_options
)

# --- Main Content Layout ---

if task_selection == "Audio Deepfake Detection":
    st.header("Upload an Audio File: Bonafide or Deepfake")
    st.info("Upload a `.wav` or `.mp3` file to predict if it is genuine (Bonafide) or synthetic (Deepfake).")

    # 1. Upload an audio file
    uploaded_file = st.file_uploader("Choose an Audio File", type=['wav', 'mp3'])

    if uploaded_file is not None:
        # Play the audio file
        st.audio(uploaded_file, format='audio/wav')

        # Get the selected model and scaler
        model = MODELS['audio'].get(selected_model_name)
        scaler = MODELS['audio'].get('scaler')

        if model and scaler:
            if st.button("Predict Audio Class"):
                with st.spinner('Extracting features and predicting...'):
                    # 2. Extract features
                    features = extract_audio_features(uploaded_file, scaler)

                    if features is not None:
                        # 3. Predict
                        result, confidence, all_scores = predict_audio(model, features)

                        st.subheader(f"Prediction Result (Model: {selected_model_name})")

                        if result == 'Deepfake':
                            st.error(f"‚ö†Ô∏è Predicted Class: **{result}**")
                        else:
                            st.success(f"‚úÖ Predicted Class: **{result}**")

                        # 4. Display prediction confidence/probability scores
                        st.markdown("### Prediction Confidence Scores")

                        # Display all class probabilities
                        for cls, score in all_scores.items():
                            st.metric(f"Probability of {cls}", f"{score:.4f}")
                            st.progress(score)
        else:
            st.warning("Please ensure all audio models are correctly loaded.")

elif task_selection == "Software Defect Prediction":
    st.header("Enter Software Defect Feature Vector: Predict Multiple Labels")
    st.info("Enter the pre-processed feature vector (e.g., 5-10 numerical values) to predict multiple defect labels.")

    # Get the selected model and scaler
    model_options_defect = list(MODELS['defect'].keys() - {'scaler', 'labels'}) if 'defect' in MODELS else []
    selected_model_name_defect = st.sidebar.selectbox(
        "Select Model at Runtime (Defect)",
        model_options_defect
    )

    model = MODELS['defect'].get(selected_model_name_defect)
    # NOTE: For simplicity, the user enters the VECTOR. If they entered raw text,
    # you would need to load a TfidfVectorizer here.

    # 1. Enter software defect feature vector
    st.markdown("Enter your feature vector as comma-separated numbers (e.g., `0.5, 1.2, -0.8, ...`)")

    feature_input = st.text_input("Defect Feature Vector", value="0.1, 0.9, 0.4, 0.2, 0.7")

    if st.button("Predict Defect Labels"):
        if feature_input:
            try:
                # Convert string input to a numpy array (1xN vector)
                feature_vector_list = [float(x.strip()) for x in feature_input.split(',')]
                feature_vector = np.array(feature_vector_list).reshape(1, -1)

                defect_labels = MODELS['defect'].get('labels', [])

                if model and defect_labels:
                    with st.spinner('Predicting defect labels...'):
                        # 2. Predict multiple defect labels
                        predicted_labels, all_scores = predict_defect(model, feature_vector, defect_labels)

                        st.subheader(f"Predicted Defect Labels (Model: {selected_model_name_defect})")

                        if predicted_labels:
                            st.success("Detected Labels: " + ", ".join([f"**{label}**" for label in predicted_labels]))
                        else:
                            st.info("No primary defect labels predicted.")

                        # 3. Display prediction confidence/probability scores
                        st.markdown("### Label Probability Scores")

                        col1, col2 = st.columns(2)
                        for i, (label, score) in enumerate(all_scores.items()):
                            col = col1 if i < len(all_scores) / 2 else col2
                            col.metric(f"P({label})", f"{score:.4f}")
                            col.progress(score)
                else:
                    st.warning("Please ensure all defect models and labels are correctly loaded.")

            except ValueError:
                st.error("Invalid input format. Please enter comma-separated numbers only.")

# --- Clean and professional UI is mandatory ---
st.sidebar.markdown("---")
st.sidebar.markdown("Built with **Streamlit** for a Clean UI.")

2025-12-07 18:25:46.182 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-12-07 18:25:46.317 Session state does not function when running a script without `streamlit run`


DeltaGenerator(_root_container=1, _parent=DeltaGenerator())