In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import cupy as cp
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Activation functions (CuPy-compatible)
def sigmoid(x):
    return 1 / (1 + cp.exp(-cp.clip(x, -500, 500)))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exp_x = cp.exp(x - cp.max(x, axis=1, keepdims=True))
    return exp_x / cp.sum(exp_x, axis=1, keepdims=True)

# Cross-entropy loss (CuPy-compatible)
def cross_entropy_loss(y_true, y_pred):
    return -cp.sum(y_true * cp.log(y_pred + 1e-9)) / y_true.shape[0]

# Neural Network class (GPU-accelerated with CuPy)
class NeuralNetwork:
    def __init__(self, layers):
        self.layers = layers
        self.weights = []
        self.biases = []
        self.initialize_parameters()
    
    def initialize_parameters(self):
        for i in range(1, len(self.layers)):
            # Initialize weights and biases on GPU
            self.weights.append(cp.random.randn(self.layers[i], self.layers[i-1]) * 0.01)
            self.biases.append(cp.zeros((self.layers[i], 1)))
    
    def forward(self, X):
        self.activations = [X.T]
        self.z_values = []
        for i in range(len(self.weights)):
            z = cp.dot(self.weights[i], self.activations[-1]) + self.biases[i]
            self.z_values.append(z)
            if i < len(self.weights) - 1:
                a = sigmoid(z)
            else:
                a = softmax(z.T).T
            self.activations.append(a)
        return self.activations[-1].T
    
    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        delta = self.activations[-1] - y.T
        for i in range(len(self.weights) - 1, -1, -1):
            dw = cp.dot(delta, self.activations[i].T) / m
            db = cp.sum(delta, axis=1, keepdims=True) / m
            if i > 0:
                delta = cp.dot(self.weights[i].T, delta) * sigmoid_derivative(self.activations[i])
            self.weights[i] -= learning_rate * dw
            self.biases[i] -= learning_rate * db
    
    def train(self, X, y, epochs, learning_rate, batch_size):
        losses = []
        for epoch in range(epochs):
            for i in range(0, X.shape[0], batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, learning_rate)
            y_pred = self.forward(X)
            loss = cross_entropy_loss(y, y_pred)
            losses.append(float(loss.get()))  # Convert CuPy array to Python scalar
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
        return losses

# Load MNIST data from Kaggle dataset
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

# Prepare training data
X = train_df.drop('label', axis=1).values / 255.0  # Normalize pixel values
y = train_df['label'].values

# One-hot encode labels
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# Convert data to CuPy arrays for GPU
X = cp.array(X)
y = cp.array(y)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X.get(), y.get(), test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = cp.array(X_train), cp.array(X_val), cp.array(y_train), cp.array(y_val)

# Initialize and train neural network
nn = NeuralNetwork([784, 128, 10])  # 784 input, 128 hidden, 10 output
losses = nn.train(X_train, y_train, epochs=2500, learning_rate=0.1, batch_size=256)

# Evaluate on validation set
y_pred = nn.forward(X_val)
val_accuracy = cp.mean(cp.argmax(y_pred, axis=1) == cp.argmax(y_val, axis=1))
print(f"Validation Accuracy: {float(val_accuracy):.4f}")

# Generate predictions for test set (Kaggle submission)
X_test = cp.array(test_df.values / 255.0)
y_test_pred = nn.forward(X_test)
y_test_labels = cp.argmax(y_test_pred, axis=1).get()  # Convert to NumPy for submission

# Create submission file
submission = pd.DataFrame({'ImageId': cp.arange(1, len(y_test_labels) + 1).get(), 'Label': y_test_labels})
submission.to_csv('/kaggle/working/submission.csv', index=False)
print("Submission file created: /kaggle/working/submission.csv")