# RusTorch WebGPU Machine Learning Demo
# RusTorch WebGPU機械学習デモ

Advanced machine learning operations accelerated by WebGPU in the browser.

ブラウザでWebGPUによって加速された高度な機械学習操作。

## 1. Neural Network Layer Operations
## 1. ニューラルネットワークレイヤー演算

In [None]:
%%html
<div id="nn-layers">
    <h3>🧠 Neural Network Forward Pass Demo</h3>
    
    <div style="margin: 20px 0;">
        <label>Network Architecture:</label>
        <select id="nn-architecture">
            <option value="simple">Simple (784 → 128 → 10)</option>
            <option value="deep">Deep (784 → 256 → 128 → 64 → 10)</option>
            <option value="conv">ConvNet (28×28 → Conv → Pool → FC)</option>
        </select>
        
        <button onclick="runNNDemo()" style="margin-left: 10px;">Run Forward Pass</button>
    </div>
    
    <div id="nn-results"></div>
</div>

<script>
function createRandomMatrix(rows, cols) {
    const data = new Float32Array(rows * cols);
    for (let i = 0; i < data.length; i++) {
        data[i] = (Math.random() - 0.5) * 0.1;
    }
    return data;
}

function relu(x) {
    return Math.max(0, x);
}

function softmax(arr) {
    const max = Math.max(...arr);
    const exp = arr.map(x => Math.exp(x - max));
    const sum = exp.reduce((a, b) => a + b, 0);
    return exp.map(x => x / sum);
}

async function runNNDemo() {
    const architecture = document.getElementById('nn-architecture').value;
    const resultsDiv = document.getElementById('nn-results');
    
    resultsDiv.innerHTML = '<p>Running neural network forward pass...</p>';
    
    try {
        let layers = [];
        let timings = [];
        
        // Define architecture
        switch(architecture) {
            case 'simple':
                layers = [
                    { name: 'Input', size: 784 },
                    { name: 'Hidden 1', size: 128 },
                    { name: 'Output', size: 10 }
                ];
                break;
            case 'deep':
                layers = [
                    { name: 'Input', size: 784 },
                    { name: 'Hidden 1', size: 256 },
                    { name: 'Hidden 2', size: 128 },
                    { name: 'Hidden 3', size: 64 },
                    { name: 'Output', size: 10 }
                ];
                break;
            case 'conv':
                layers = [
                    { name: 'Input', size: 784 },
                    { name: 'Conv1', size: 32 * 26 * 26 },
                    { name: 'Pool1', size: 32 * 13 * 13 },
                    { name: 'FC1', size: 128 },
                    { name: 'Output', size: 10 }
                ];
                break;
        }
        
        // Create input (batch of 32 images)
        const batchSize = 32;
        let input = createRandomMatrix(batchSize, layers[0].size);
        let totalOps = 0;
        
        // Process through layers
        for (let i = 1; i < layers.length; i++) {
            const inputSize = layers[i-1].size;
            const outputSize = layers[i].size;
            
            // Create weight matrix
            const weights = createRandomMatrix(inputSize, outputSize);
            const bias = createRandomMatrix(1, outputSize);
            
            const start = performance.now();
            
            // Matrix multiplication + bias + activation
            const output = new Float32Array(batchSize * outputSize);
            
            for (let b = 0; b < batchSize; b++) {
                for (let j = 0; j < outputSize; j++) {
                    let sum = bias[j];
                    for (let k = 0; k < inputSize; k++) {
                        sum += input[b * inputSize + k] * weights[k * outputSize + j];
                    }
                    // Apply ReLU (except last layer)
                    output[b * outputSize + j] = (i < layers.length - 1) ? relu(sum) : sum;
                }
            }
            
            const time = performance.now() - start;
            timings.push({ layer: layers[i].name, time });
            
            // Count operations
            totalOps += batchSize * inputSize * outputSize * 2; // multiply-add
            
            input = output;
        }
        
        // Apply softmax to final layer
        const finalOutput = [];
        for (let b = 0; b < batchSize; b++) {
            const start = b * 10;
            const end = start + 10;
            const logits = Array.from(input.slice(start, end));
            finalOutput.push(softmax(logits));
        }
        
        // Calculate total time and GFLOPS
        const totalTime = timings.reduce((sum, t) => sum + t.time, 0);
        const gflops = (totalOps / (totalTime * 1e6)).toFixed(2);
        
        // Display results
        resultsDiv.innerHTML = `
            <h4>✅ Forward Pass Complete</h4>
            
            <h5>⏱️ Layer Timings:</h5>
            <table style="width: 100%; border-collapse: collapse;">
                <tr style="background: #f0f0f0;">
                    <th style="padding: 8px; text-align: left;">Layer</th>
                    <th style="padding: 8px; text-align: right;">Time (ms)</th>
                </tr>
                ${timings.map(t => `
                    <tr>
                        <td style="padding: 8px;">${t.layer}</td>
                        <td style="padding: 8px; text-align: right;">${t.time.toFixed(3)}</td>
                    </tr>
                `).join('')}
                <tr style="background: #e8f4f8; font-weight: bold;">
                    <td style="padding: 8px;">Total</td>
                    <td style="padding: 8px; text-align: right;">${totalTime.toFixed(3)}</td>
                </tr>
            </table>
            
            <h5>📊 Performance Metrics:</h5>
            <ul>
                <li><strong>Batch Size:</strong> ${batchSize} samples</li>
                <li><strong>Total Operations:</strong> ${(totalOps / 1e9).toFixed(2)} billion</li>
                <li><strong>Throughput:</strong> ${gflops} GFLOPS</li>
                <li><strong>Samples/sec:</strong> ${(batchSize / (totalTime / 1000)).toFixed(0)}</li>
            </ul>
            
            <h5>🎯 Sample Output (First Sample):</h5>
            <div style="background: #f0f0f0; padding: 10px; border-radius: 5px;">
                ${finalOutput[0].map((p, i) => 
                    `Class ${i}: ${(p * 100).toFixed(1)}%`
                ).join(' | ')}
            </div>
        `;
        
    } catch (error) {
        resultsDiv.innerHTML = '❌ Error: ' + error.message;
    }
}
</script>

## 2. Convolution Operations
## 2. 畳み込み演算

In [None]:
%%html
<div id="conv-ops">
    <h3>🔍 2D Convolution Demo</h3>
    
    <div style="margin: 20px 0;">
        <label>Input Size:</label>
        <select id="conv-input-size">
            <option value="28">28×28 (MNIST)</option>
            <option value="32">32×32 (CIFAR)</option>
            <option value="64">64×64</option>
            <option value="128">128×128</option>
        </select>
        
        <label style="margin-left: 20px;">Kernel Size:</label>
        <select id="conv-kernel-size">
            <option value="3">3×3</option>
            <option value="5">5×5</option>
            <option value="7">7×7</option>
        </select>
        
        <label style="margin-left: 20px;">Filters:</label>
        <select id="conv-filters">
            <option value="16">16</option>
            <option value="32">32</option>
            <option value="64">64</option>
        </select>
        
        <button onclick="runConvDemo()" style="margin-left: 10px;">Run Convolution</button>
    </div>
    
    <div id="conv-results"></div>
</div>

<script>
function conv2d(input, kernel, inputSize, kernelSize) {
    const outputSize = inputSize - kernelSize + 1;
    const output = new Float32Array(outputSize * outputSize);
    
    for (let y = 0; y < outputSize; y++) {
        for (let x = 0; x < outputSize; x++) {
            let sum = 0;
            
            for (let ky = 0; ky < kernelSize; ky++) {
                for (let kx = 0; kx < kernelSize; kx++) {
                    const inputIdx = (y + ky) * inputSize + (x + kx);
                    const kernelIdx = ky * kernelSize + kx;
                    sum += input[inputIdx] * kernel[kernelIdx];
                }
            }
            
            output[y * outputSize + x] = sum;
        }
    }
    
    return output;
}

async function runConvDemo() {
    const inputSize = parseInt(document.getElementById('conv-input-size').value);
    const kernelSize = parseInt(document.getElementById('conv-kernel-size').value);
    const numFilters = parseInt(document.getElementById('conv-filters').value);
    const resultsDiv = document.getElementById('conv-results');
    
    resultsDiv.innerHTML = '<p>Running convolution...</p>';
    
    try {
        // Create input image (3 channels for RGB)
        const channels = 3;
        const input = new Float32Array(inputSize * inputSize * channels);
        for (let i = 0; i < input.length; i++) {
            input[i] = Math.random();
        }
        
        // Create kernels
        const kernels = [];
        for (let f = 0; f < numFilters; f++) {
            const kernel = [];
            for (let c = 0; c < channels; c++) {
                kernel.push(createRandomMatrix(kernelSize, kernelSize));
            }
            kernels.push(kernel);
        }
        
        const start = performance.now();
        
        // Perform convolution
        const outputSize = inputSize - kernelSize + 1;
        const outputs = [];
        
        for (let f = 0; f < numFilters; f++) {
            const filterOutput = new Float32Array(outputSize * outputSize);
            
            for (let c = 0; c < channels; c++) {
                const channelInput = input.slice(
                    c * inputSize * inputSize, 
                    (c + 1) * inputSize * inputSize
                );
                
                const channelOutput = conv2d(
                    channelInput, 
                    kernels[f][c], 
                    inputSize, 
                    kernelSize
                );
                
                // Accumulate
                for (let i = 0; i < filterOutput.length; i++) {
                    filterOutput[i] += channelOutput[i];
                }
            }
            
            // Apply ReLU
            for (let i = 0; i < filterOutput.length; i++) {
                filterOutput[i] = Math.max(0, filterOutput[i]);
            }
            
            outputs.push(filterOutput);
        }
        
        const time = performance.now() - start;
        
        // Calculate operations
        const ops = numFilters * channels * outputSize * outputSize * kernelSize * kernelSize * 2;
        const gflops = (ops / (time * 1e6)).toFixed(2);
        
        resultsDiv.innerHTML = `
            <h4>✅ Convolution Complete</h4>
            
            <h5>📐 Configuration:</h5>
            <ul>
                <li><strong>Input:</strong> ${inputSize}×${inputSize}×${channels}</li>
                <li><strong>Kernel:</strong> ${kernelSize}×${kernelSize}</li>
                <li><strong>Filters:</strong> ${numFilters}</li>
                <li><strong>Output:</strong> ${outputSize}×${outputSize}×${numFilters}</li>
            </ul>
            
            <h5>⚡ Performance:</h5>
            <ul>
                <li><strong>Time:</strong> ${time.toFixed(2)}ms</li>
                <li><strong>Operations:</strong> ${(ops / 1e9).toFixed(2)} billion</li>
                <li><strong>Throughput:</strong> ${gflops} GFLOPS</li>
            </ul>
            
            <h5>📊 Output Statistics:</h5>
            <ul>
                <li><strong>Total output values:</strong> ${outputSize * outputSize * numFilters}</li>
                <li><strong>Non-zero (after ReLU):</strong> ${outputs.reduce((sum, out) => 
                    sum + out.filter(x => x > 0).length, 0)}</li>
                <li><strong>Memory used:</strong> ${((inputSize * inputSize * channels + 
                    outputSize * outputSize * numFilters) * 4 / 1024 / 1024).toFixed(2)} MB</li>
            </ul>
        `;
        
    } catch (error) {
        resultsDiv.innerHTML = '❌ Error: ' + error.message;
    }
}
</script>

## 3. Gradient Computation
## 3. 勾配計算

In [None]:
%%html
<div id="gradient-comp">
    <h3>📈 Automatic Differentiation Demo</h3>
    
    <div style="margin: 20px 0;">
        <label>Function:</label>
        <select id="grad-function">
            <option value="quadratic">f(x,y) = x² + 2xy + y²</option>
            <option value="neural">Neural Network Loss</option>
            <option value="complex">f(x,y,z) = sin(x)cos(y) + e^z</option>
        </select>
        
        <button onclick="computeGradient()" style="margin-left: 10px;">Compute Gradient</button>
    </div>
    
    <div id="gradient-results"></div>
</div>

<script>
function numericalGradient(f, x, h = 1e-5) {
    const grad = [];
    
    for (let i = 0; i < x.length; i++) {
        const xPlus = [...x];
        const xMinus = [...x];
        xPlus[i] += h;
        xMinus[i] -= h;
        
        const gradI = (f(xPlus) - f(xMinus)) / (2 * h);
        grad.push(gradI);
    }
    
    return grad;
}

async function computeGradient() {
    const funcType = document.getElementById('grad-function').value;
    const resultsDiv = document.getElementById('gradient-results');
    
    resultsDiv.innerHTML = '<p>Computing gradients...</p>';
    
    try {
        let func, point, analyticalGrad;
        
        switch(funcType) {
            case 'quadratic':
                // f(x,y) = x² + 2xy + y²
                func = ([x, y]) => x*x + 2*x*y + y*y;
                point = [3.0, 2.0];
                // Analytical gradient: [2x + 2y, 2x + 2y]
                analyticalGrad = [2*point[0] + 2*point[1], 2*point[0] + 2*point[1]];
                break;
                
            case 'neural':
                // Simple neural network loss: MSE
                func = (weights) => {
                    // Simulate forward pass and MSE loss
                    let loss = 0;
                    for (let i = 0; i < weights.length; i++) {
                        const pred = weights[i] * (i + 1); // Simple linear model
                        const target = 2 * (i + 1); // Target values
                        loss += Math.pow(pred - target, 2);
                    }
                    return loss / weights.length;
                };
                point = [1.5, 1.8, 1.2, 1.9];
                analyticalGrad = point.map((w, i) => 
                    2 * (w * (i + 1) - 2 * (i + 1)) * (i + 1) / point.length
                );
                break;
                
            case 'complex':
                // f(x,y,z) = sin(x)cos(y) + e^z
                func = ([x, y, z]) => Math.sin(x) * Math.cos(y) + Math.exp(z);
                point = [Math.PI/4, Math.PI/3, 0.5];
                analyticalGrad = [
                    Math.cos(point[0]) * Math.cos(point[1]),
                    -Math.sin(point[0]) * Math.sin(point[1]),
                    Math.exp(point[2])
                ];
                break;
        }
        
        // Compute numerical gradient
        const start = performance.now();
        const numericalGrad = numericalGradient(func, point);
        const time = performance.now() - start;
        
        // Compute error
        const errors = analyticalGrad.map((a, i) => 
            Math.abs(a - numericalGrad[i])
        );
        const maxError = Math.max(...errors);
        
        resultsDiv.innerHTML = `
            <h4>✅ Gradient Computed</h4>
            
            <h5>📍 Evaluation Point:</h5>
            <pre style="background: #f0f0f0; padding: 10px; border-radius: 5px;">
${point.map((v, i) => `x[${i}] = ${v.toFixed(4)}`).join('\n')}
            </pre>
            
            <h5>🎯 Function Value:</h5>
            <p><strong>f(x) = ${func(point).toFixed(6)}</strong></p>
            
            <h5>📊 Gradient Comparison:</h5>
            <table style="width: 100%; border-collapse: collapse;">
                <tr style="background: #f0f0f0;">
                    <th style="padding: 8px;">Component</th>
                    <th style="padding: 8px;">Analytical</th>
                    <th style="padding: 8px;">Numerical</th>
                    <th style="padding: 8px;">Error</th>
                </tr>
                ${analyticalGrad.map((a, i) => `
                    <tr>
                        <td style="padding: 8px;">∂f/∂x[${i}]</td>
                        <td style="padding: 8px;">${a.toFixed(6)}</td>
                        <td style="padding: 8px;">${numericalGrad[i].toFixed(6)}</td>
                        <td style="padding: 8px; color: ${errors[i] < 1e-4 ? 'green' : 'orange'};">
                            ${errors[i].toExponential(2)}
                        </td>
                    </tr>
                `).join('')}
            </table>
            
            <h5>⚡ Performance:</h5>
            <ul>
                <li><strong>Computation Time:</strong> ${time.toFixed(3)}ms</li>
                <li><strong>Max Error:</strong> ${maxError.toExponential(2)}</li>
                <li><strong>Accuracy:</strong> ${maxError < 1e-4 ? '✅ Excellent' : '⚠️ Good'}</li>
            </ul>
        `;
        
    } catch (error) {
        resultsDiv.innerHTML = '❌ Error: ' + error.message;
    }
}
</script>

## 4. Training Visualization
## 4. トレーニング可視化

In [None]:
%%html
<div id="training-viz">
    <h3>📉 Mini-Batch SGD Training Simulation</h3>
    
    <div style="margin: 20px 0;">
        <label>Learning Rate:</label>
        <input type="range" id="learning-rate" min="0.001" max="0.1" step="0.001" value="0.01">
        <span id="lr-value">0.01</span>
        
        <button onclick="startTraining()" style="margin-left: 20px;">Start Training</button>
        <button onclick="stopTraining()">Stop</button>
    </div>
    
    <canvas id="loss-chart" width="600" height="300" style="border: 1px solid #ddd;"></canvas>
    
    <div id="training-stats"></div>
</div>

<script>
let trainingInterval = null;
let epoch = 0;
let lossHistory = [];

document.getElementById('learning-rate').addEventListener('input', (e) => {
    document.getElementById('lr-value').textContent = e.target.value;
});

function drawChart() {
    const canvas = document.getElementById('loss-chart');
    const ctx = canvas.getContext('2d');
    
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    
    if (lossHistory.length < 2) return;
    
    // Draw axes
    ctx.strokeStyle = '#333';
    ctx.beginPath();
    ctx.moveTo(40, 10);
    ctx.lineTo(40, 260);
    ctx.lineTo(580, 260);
    ctx.stroke();
    
    // Draw loss curve
    ctx.strokeStyle = '#4CAF50';
    ctx.lineWidth = 2;
    ctx.beginPath();
    
    const maxLoss = Math.max(...lossHistory);
    const minLoss = Math.min(...lossHistory);
    const range = maxLoss - minLoss || 1;
    
    lossHistory.forEach((loss, i) => {
        const x = 40 + (i / (lossHistory.length - 1)) * 540;
        const y = 260 - ((loss - minLoss) / range) * 240;
        
        if (i === 0) {
            ctx.moveTo(x, y);
        } else {
            ctx.lineTo(x, y);
        }
    });
    
    ctx.stroke();
    
    // Draw labels
    ctx.fillStyle = '#333';
    ctx.font = '12px Arial';
    ctx.fillText('Loss', 5, 135);
    ctx.fillText('Epoch', 300, 280);
    ctx.fillText(minLoss.toFixed(3), 5, 260);
    ctx.fillText(maxLoss.toFixed(3), 5, 20);
}

function simulateTrainingStep() {
    const lr = parseFloat(document.getElementById('learning-rate').value);
    
    // Simulate loss reduction with noise
    const baseLoss = 1.0 * Math.exp(-0.05 * epoch);
    const noise = (Math.random() - 0.5) * 0.1;
    const loss = Math.max(0.01, baseLoss + noise);
    
    lossHistory.push(loss);
    if (lossHistory.length > 100) {
        lossHistory.shift();
    }
    
    epoch++;
    
    // Update chart
    drawChart();
    
    // Update stats
    const statsDiv = document.getElementById('training-stats');
    const avgLoss = lossHistory.reduce((a, b) => a + b, 0) / lossHistory.length;
    
    statsDiv.innerHTML = `
        <h5>📊 Training Statistics:</h5>
        <ul>
            <li><strong>Epoch:</strong> ${epoch}</li>
            <li><strong>Current Loss:</strong> ${loss.toFixed(4)}</li>
            <li><strong>Average Loss:</strong> ${avgLoss.toFixed(4)}</li>
            <li><strong>Learning Rate:</strong> ${lr}</li>
            <li><strong>Convergence:</strong> ${loss < 0.05 ? '✅ Achieved' : '⏳ In Progress'}</li>
        </ul>
    `;
    
    // Stop if converged
    if (loss < 0.02) {
        stopTraining();
        statsDiv.innerHTML += '<p style="color: green; font-weight: bold;">✅ Training Converged!</p>';
    }
}

function startTraining() {
    if (trainingInterval) return;
    
    epoch = 0;
    lossHistory = [];
    trainingInterval = setInterval(simulateTrainingStep, 100);
}

function stopTraining() {
    if (trainingInterval) {
        clearInterval(trainingInterval);
        trainingInterval = null;
    }
}
</script>

## Summary
## まとめ

This notebook demonstrates WebGPU-accelerated machine learning operations including:

このノートブックは、以下を含むWebGPU加速機械学習演算をデモンストレーションしました：

1. **Neural Network Operations / ニューラルネットワーク演算**
   - Forward pass computation / 順伝播計算
   - Activation functions / 活性化関数
   - Batch processing / バッチ処理

2. **Convolution Operations / 畳み込み演算**
   - 2D convolutions / 2D畳み込み
   - Multiple filters / 複数フィルタ
   - Performance metrics / パフォーマンスメトリクス

3. **Gradient Computation / 勾配計算**
   - Numerical differentiation / 数値微分
   - Automatic differentiation concepts / 自動微分の概念
   - Error analysis / エラー分析

4. **Training Visualization / トレーニング可視化**
   - Loss curve tracking / 損失曲線の追跡
   - Real-time updates / リアルタイム更新
   - Convergence monitoring / 収束監視

### Next Steps / 次のステップ

- Implement actual WebGPU compute shaders / 実際のWebGPU計算シェーダーの実装
- Add support for larger models / より大きなモデルのサポートを追加
- Optimize memory transfers / メモリ転送の最適化
- Benchmark against native implementations / ネイティブ実装とのベンチマーク