In [None]:
!pip install flask matplotlib scikit-learn




In [None]:
from flask import Flask, render_template_string, request, jsonify
from google.colab.output import eval_js
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from io import BytesIO
import base64
import threading
from sklearn.utils.extmath import row_norms
from sklearn.utils.validation import check_random_state
from sklearn.metrics.pairwise import euclidean_distances
##Q&A section
# Add Q&A functionality to the Flask application.
from flask import Flask, render_template_string, request, jsonify
##

# Initialize Flask app
app = Flask(__name__)




# Q&A Data
# Multiple-choice Q&A data
qa_data = [
    {
        "question": "What is K-means?",
        "options": [
            "A clustering algorithm used to partition data into groups.",
            "A supervised learning algorithm for classification.",
            "A method for dimensionality reduction.",
            "A technique for regression analysis."
        ],
        "answer": "A clustering algorithm used to partition data into groups."
    },
    {
        "question": "How does K-means work?",
        "options": [
            "Assigns data points to clusters and updates centroids iteratively.",
            "Performs hierarchical clustering based on distance.",
            "Uses principal components to separate clusters.",
            "Calculates regression lines for each group."
        ],
        "answer": "Assigns data points to clusters and updates centroids iteratively."
    },
    {
        "question": "What is clustering?",
        "options": [
            "A supervised learning method for classification.",
            "An unsupervised learning technique to group similar data.",
            "A method for reducing data size.",
            "A statistical test for hypothesis validation."
        ],
        "answer": "An unsupervised learning technique to group similar data."
    },
    {
        "question": "How to choose the number of clusters?",
        "options": [
            "Using the elbow method or silhouette analysis.",
            "By randomly assigning values.",
            "Using regression analysis.",
            "By minimizing variance directly."
        ],
        "answer": "Using the elbow method or silhouette analysis."
    }
]

#Landing Page

# HTML Template for Landing Page
LANDING_PAGE_TEMPLATE = '''
<!DOCTYPE html>
<html>
<head>
    <title>Landing Page</title>
</head>
<body>
    <h1>Welcome to the Machine Learning Application</h1>
    <p>Select a feature to explore:</p>
    <ul>
        <li><a href="/qna">Test Your Knowledge (Q&A)</a></li>
        <li><a href="/clustering">Perform K-Means Clustering</a></li>
    </ul>
</body>
</html>
'''



# HTML Template for Q&A page with multiple-choice questions
QNA_TEMPLATE = '''
<!DOCTYPE html>
<html>
<head>
    <title>Q&A Page</title>
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
</head>
<body>
    <h1>Test Your Knowledge</h1>
    <div id="question-container"></div>
    <h2>Your Answer:</h2>
    <div id="result" style="font-weight: bold; color: green;"></div>

    <script>
        const questions = {{ questions | safe }};
        let currentQuestionIndex = 0;

        function loadQuestion() {
            const question = questions[currentQuestionIndex];
            let optionsHtml = '';
            question.options.forEach((option, index) => {
                optionsHtml += `<div>
                    <input type="radio" name="option" value="${option}" id="option${index}">
                    <label for="option${index}">${option}</label>
                </div>`;
            });

            $('#question-container').html(`
                <h2>${question.question}</h2>
                ${optionsHtml}
                <button onclick="submitAnswer()">Submit</button>
            `);
        }

        function submitAnswer() {
            const selectedOption = $('input[name="option"]:checked').val();
            const question = questions[currentQuestionIndex];

            if (!selectedOption) {
                alert('Please select an answer!');
                return;
            }

            if (selectedOption === question.answer) {
                $('#result').text('Correct!');
            } else {
                $('#result').text(`Wrong! The correct answer is: ${question.answer}`);
            }

            currentQuestionIndex++;

            if (currentQuestionIndex < questions.length) {
                setTimeout(() => {
                    $('#result').text('');
                    loadQuestion();
                }, 2000);
            } else {
                $('#question-container').html('<h2>You have completed the quiz!</h2>');
            }
        }

        $(document).ready(function() {
            loadQuestion();
        });
    </script>
</body>
</html>
'''





# Global variable to store raw data
raw_data = None

# Generate random data with proper spread based on difficulty
def generate_data(n_samples=300, n_features=2, centers=3, difficulty=1.0):
    global raw_data
    data, _ = make_blobs(n_samples=n_samples, centers=centers, cluster_std=difficulty, random_state=42)

    # Scale the data for extreme difficulty
    if difficulty > 1.5:  # Extreme difficulty
        scale_factor = difficulty * 500  # Higher factor for extreme spread
        data *= scale_factor
    elif difficulty > 1.0:  # Hard difficulty
        scale_factor = difficulty * 200  # Medium-high spread
        data *= scale_factor

    raw_data = data
    return raw_data



def initialize_kmeans_pp(data, n_clusters, random_state=None):
    """Custom implementation to simulate KMeans++ initialization."""
    random_state = check_random_state(random_state)
    n_samples, _ = data.shape

    # Initialize the centroids list and select the first centroid randomly
    centroids = np.empty((n_clusters, data.shape[1]), dtype=data.dtype)
    initial_idx = random_state.randint(n_samples)
    centroids[0] = data[initial_idx]

    # Compute the distances from the first centroid
    closest_dist_sq = euclidean_distances(centroids[0, np.newaxis], data, squared=True).flatten()
    current_potential = closest_dist_sq.sum()

    # Choose each subsequent centroid
    for c in range(1, n_clusters):
        # Sample a new centroid with a probability proportional to the distance squared
        probabilities = closest_dist_sq / current_potential
        new_idx = random_state.choice(n_samples, p=probabilities)
        centroids[c] = data[new_idx]

        # Update distances and potential
        new_dist_sq = euclidean_distances(centroids[c, np.newaxis], data, squared=True).flatten()
        closest_dist_sq = np.minimum(closest_dist_sq, new_dist_sq)
        current_potential = closest_dist_sq.sum()

    return centroids

def perform_kmeans(data, n_clusters, init_method='random', max_iter=300):
    # Fit KMeans
    kmeans = KMeans(n_clusters=n_clusters, init=init_method, max_iter=max_iter, n_init=1, random_state=42)

    if init_method == 'k-means++':
        # Get KMeans++ initial centroids using custom implementation
        initial_centroids = initialize_kmeans_pp(data, n_clusters, random_state=42)
    else:
        # For Random, select initial centroids randomly
        initial_centroids = data[np.random.choice(data.shape[0], n_clusters, replace=False)]

    kmeans.fit(data)
    return kmeans.cluster_centers_, kmeans.labels_, initial_centroids




# Plot raw data
def plot_data(data, title):
    plt.figure(figsize=(6, 4))
    plt.scatter(data[:, 0], data[:, 1], c='gray', s=20)
    plt.title(title)
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")

    # Convert plot to image
    buf = BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    buf.close()
    plt.close()
    return img_base64

# Plot clusters and centroids
def plot_initial_and_final_centroids(data, initial_centroids, final_centroids, labels, title):
    plt.figure(figsize=(8, 6))

    # Plot data points
    for i in range(len(data)):
        plt.scatter(data[i][0], data[i][1], c=f"C{labels[i]}", s=20)

    # Plot initial centroids
    plt.scatter(initial_centroids[:, 0], initial_centroids[:, 1], c='orange', marker='x', s=200, label='Initial Centroids')

    # Plot final centroids
    plt.scatter(final_centroids[:, 0], final_centroids[:, 1], c='red', marker='o', s=200, label='Final Centroids')

    plt.title(title)
    plt.legend()
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")

    # Convert plot to image
    buf = BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    buf.close()
    plt.close()
    return img_base64


# HTML Template with Redesigned Layout
CLUSTERING_PAGE_TEMPLATE = '''
<!DOCTYPE html>
<html>
<head>
    <title>K-means Clustering</title>
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
    <style>
        #spinner {
            display: none;
            position: fixed;
            top: 50%;
            left: 50%;
            transform: translate(-50%, -50%);
            z-index: 1000;
        }
        #spinner img {
            width: 100px;
        }
        .form-row {
            display: flex;
            align-items: center;
            margin-bottom: 10px;
        }
        .form-row label {
            margin-right: 10px;
        }
        .slider-container {
            flex-grow: 1;
            margin-left: 10px;
        }
        .slider-label {
            display: flex;
            justify-content: space-between;
            margin-top: 5px;
        }
    </style>
</head>
<body>
    <h1>K-means Clustering Application</h1>
      <div class="form-row">
          <label for="n_samples">Number of Data Points:</label>
          <input type="number" id="n_samples" name="n_samples" value="300" style="width: 80px;">
    </div>
      <div class="form-row">
          <label for="difficulty">Noise Level:</label>
          <div id="difficulty-options">
              <label><input type="radio" name="difficulty" value="0.5" checked> Stage 1</label>
              <label><input type="radio" name="difficulty" value="1.0"> Stage 2</label>
              <label><input type="radio" name="difficulty" value="1.5"> Stage 3</label>
              <label><input type="radio" name="difficulty" value="2.0"> Stage 4</label>
          </div>
      </div>
        <label for="n_clusters">Number of Clusters (k):</label>
        <input type="number" id="n_clusters" name="n_clusters" value="3"><br><br>
        <label for="init_method">Initialization Method:</label>
        <select id="init_method" name="init_method">
            <option value="random">Random</option>
            <option value="k-means++">K-means++</option>
        </select><br><br>
        <button type="button" onclick="generateRawData()">Generate Raw Data</button>
        <button type="button" onclick="performClustering()">Perform K-means Clustering</button>
    </form>
    <h2>Output:</h2>
    <div style="display: flex; justify-content: space-around;">
        <div id="raw-data"></div>
        <div id="clustered-data"></div>
    </div>
    <div id="spinner">
        <img src="https://i.gifer.com/ZZ5H.gif" alt="Loading...">
    </div>

    <script>
        function showSpinner() {
            $('#spinner').show();
        }

        function hideSpinner() {
            $('#spinner').hide();
        }

        function generateRawData() {
          showSpinner();
          let n_samples = $('#n_samples').val();
          let difficulty = $('input[name="difficulty"]:checked').val(); // Get selected difficulty
          $.post('/generate_data', {n_samples: n_samples, difficulty: difficulty}, function(response) {
              $('#raw-data').html('<h3>Raw Data</h3><img src="data:image/png;base64,' + response.image + '" alt="Raw Data Visualization">');
              $('#clustered-data').html(''); // Clear clustered data
              hideSpinner();
          });
      }


        function performClustering() {
            showSpinner();
            let n_clusters = $('#n_clusters').val();
            let init_method = $('#init_method').val();
            $.post('/perform_clustering', {n_clusters: n_clusters, init_method: init_method}, function(response) {
                if (response.error) {
                    alert(response.error);
                } else {
                    $('#clustered-data').html('<h3>K-means Clustering</h3><img src="data:image/png;base64,' + response.image + '" alt="Clustered Data Visualization">');
                }
                hideSpinner();
            });
        }
    </script>
</body>
</html>
'''

# Route for Landing Page
@app.route('/', methods=['GET'])
def landing_page():
    return render_template_string(LANDING_PAGE_TEMPLATE)


# Route for Q&A page with multiple-choice questions
@app.route('/qna', methods=['GET'])
def qna_page():
    return render_template_string(QNA_TEMPLATE, questions=qa_data)


# Route for Clustering Page
@app.route('/clustering', methods=['GET'])
def clustering_page():
    return render_template_string(CLUSTERING_PAGE_TEMPLATE)


@app.route('/generate_data', methods=['POST'])
def generate_raw_data():
    global raw_data
    n_samples = int(request.form['n_samples'])
    difficulty = float(request.form['difficulty'])
    data = generate_data(n_samples=n_samples, difficulty=difficulty)
    img_base64 = plot_data(data, title="Raw Data")
    return jsonify(image=img_base64)

@app.route('/perform_clustering', methods=['POST'])
def perform_clustering():
    global raw_data
    if raw_data is None:
        return jsonify(error="Please generate raw data first!")

    n_clusters = int(request.form['n_clusters'])
    init_method = request.form['init_method']
    final_centroids, labels, initial_centroids = perform_kmeans(raw_data, n_clusters=n_clusters, init_method=init_method)



    # Plot initial and final centroids
    img_base64 = plot_initial_and_final_centroids(
        raw_data,
        initial_centroids,
        final_centroids,
        labels,
        title=f"K-means Clustering ({init_method} Initialization)"
    )
    return jsonify(image=img_base64)


# Run Flask app in a separate thread
def run_app():
    app.run(port=5000, debug=False)

thread = threading.Thread(target=run_app)
thread.start()

# Display the app's proxy URL
print("Your app is running. Access it through the following URL:")
print(eval_js("google.colab.kernel.proxyPort(5000)"))


Your app is running. Access it through the following URL:
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


https://z93jukbo9wq-496ff2e9c6d22116-5000-colab.googleusercontent.com/
