In [2]:
import zipfile
import pandas as pd
import os
from google.colab import drive

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Define path to ZIP file in your Google Drive
zip_path = '/content/drive/My Drive/sudoku.csv.zip'
extract_path = '/content/sudoku_data'  # where we'll extract the zip

# Step 3: Unzip the file
os.makedirs(extract_path, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Step 4: Find and read the extracted CSV
csv_file_path = os.path.join(extract_path, 'sudoku.csv')
sudoku_games = pd.read_csv(csv_file_path)

# Step 5: Display dataset info
print(f"\n✅ Dataset Info:")
print(f"Shape: {sudoku_games.shape}")
print(f"Columns: {sudoku_games.columns.tolist()}")
print(f"Memory usage: {sudoku_games.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

# Step 6: Show first few rows
print(f"\n🧩 First 5 rows:")
sudoku_games.head()

Mounted at /content/drive

✅ Dataset Info:
Shape: (9000000, 2)
Columns: ['puzzle', 'solution']
Memory usage: 2368.93 MB

🧩 First 5 rows:


Unnamed: 0,puzzle,solution
0,0700000430400096108006349000940520003584600200...,6795182435437296188216349577943521863584617292...
1,3010865040465210705000000014008000020803479000...,3719865248465213795924738614638197522853479167...
2,0483015603600080909106700030200009355090102006...,7483915623652487919126754834217869355894132766...
3,0083170000042051090000400703271609049014500000...,2983176457642851391539462783271689549814537266...
4,0408906300001368208007405190004670524500207002...,1428956379751368248367425193984671524513287962...


You want:
python# Sparse: digit 2 = [0,2,0,0,0,0,0,0,0,0]  # Put 2 at index 2
# Sparse: digit 4 = [0,0,0,0,4,0,0,0,0,0]  # Put 4 at index 4

for one msising element in one row


In [3]:
import numpy as np
from sklearn.model_selection import train_test_split

# Step 7: Process solutions into row dataset (limit to ~11k puzzles for 100k rows)
def create_row_dataset(sudoku_df, max_samples=100000):
    X, y = [], []
    samples_needed = max_samples // 9  # Each puzzle gives 9 rows

    for i, solution in enumerate(sudoku_df['solution']):
        if i >= samples_needed:
            break

        # Process each 9x9 grid row by row
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            input_row = [0] + row[1:]
            target = row[0]

            X.append(input_row)
            y.append(target)

    return np.array(X), np.array(y)

# Create dataset (100k samples)
X, y = create_row_dataset(sudoku_games, 100000)

# Convert to sparse encoding
X_sparse = np.zeros((len(X), 9, 10))
y_onehot = np.zeros((len(y), 9))

for i, row in enumerate(X):
    for j, num in enumerate(row):
        X_sparse[i, j, num] = num  # Put the digit value at its index position
    y_onehot[i, y[i]-1] = 1

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_sparse, y_onehot, test_size=0.2, random_state=42
)

print(f"✅ Dataset created: {len(X)} samples")

✅ Dataset created: 99999 samples


In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Simple Neural Network Model
model = models.Sequential([
   layers.Flatten(input_shape=(9, 10)),  # Flatten 9x10 to 90
   layers.Dense(128, activation='relu'),
   layers.Dense(64, activation='relu'),
   layers.Dense(9, activation='softmax')  # Output 9 classes (1-9)
])

# Compile
model.compile(
   optimizer='adam',
   loss='categorical_crossentropy',
   metrics=['accuracy']
)

# Train
history = model.fit(
   X_train, y_train,
   epochs=10,
   batch_size=32,
   validation_data=(X_test, y_test),
   verbose=1
)

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Test Accuracy: {test_acc:.4f}")

  super().__init__(**kwargs)


Epoch 1/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.8072 - loss: 0.5893 - val_accuracy: 1.0000 - val_loss: 4.6889e-04
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 1.0000 - loss: 2.9193e-04 - val_accuracy: 1.0000 - val_loss: 6.3196e-05
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 1.0000 - loss: 4.4526e-05 - val_accuracy: 1.0000 - val_loss: 1.3116e-05
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 1.0000 - loss: 9.3824e-06 - val_accuracy: 1.0000 - val_loss: 3.0033e-06
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 1.0000 - loss: 2.1670e-06 - val_accuracy: 1.0000 - val_loss: 7.1964e-07
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 1.0000 - loss: 5.2091e-07 - val_accuracy:

In [5]:
# Test Block - See how model performs
import random

def test_model_predictions(model, X_test, y_test, num_samples=10):
   print("🧪 Testing Model Predictions:\n")

   # Get random test samples
   indices = random.sample(range(len(X_test)), num_samples)

   for i, idx in enumerate(indices):
       # Get input and true answer
       input_row = X_test[idx]
       true_answer = np.argmax(y_test[idx]) + 1  # Convert back to 1-9

       # Convert one-hot back to numbers for display
       display_row = []
       for pos in input_row:
           display_row.append(np.argmax(pos))

       # Make prediction
       prediction = model.predict(input_row.reshape(1, 9, 10), verbose=0)
       predicted_answer = np.argmax(prediction) + 1
       confidence = np.max(prediction) * 100

       # Display result
       print(f"Test {i+1}:")
       print(f"Input:     {display_row}")
       print(f"True:      {true_answer}")
       print(f"Predicted: {predicted_answer} (confidence: {confidence:.1f}%)")
       print(f"{'✅ CORRECT' if predicted_answer == true_answer else '❌ WRONG'}")
       print("-" * 40)

# Run test
test_model_predictions(model, X_test, y_test)

# Overall accuracy
predictions = model.predict(X_test, verbose=0)
predicted_classes = np.argmax(predictions, axis=1) + 1
true_classes = np.argmax(y_test, axis=1) + 1
accuracy = np.mean(predicted_classes == true_classes)
print(f"\n🎯 Overall Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

🧪 Testing Model Predictions:

Test 1:
Input:     [np.int64(0), np.int64(9), np.int64(8), np.int64(6), np.int64(1), np.int64(3), np.int64(2), np.int64(5), np.int64(7)]
True:      4
Predicted: 4 (confidence: 100.0%)
✅ CORRECT
----------------------------------------
Test 2:
Input:     [np.int64(0), np.int64(1), np.int64(2), np.int64(8), np.int64(3), np.int64(4), np.int64(7), np.int64(9), np.int64(5)]
True:      6
Predicted: 6 (confidence: 100.0%)
✅ CORRECT
----------------------------------------
Test 3:
Input:     [np.int64(0), np.int64(5), np.int64(3), np.int64(2), np.int64(9), np.int64(7), np.int64(6), np.int64(4), np.int64(8)]
True:      1
Predicted: 1 (confidence: 100.0%)
✅ CORRECT
----------------------------------------
Test 4:
Input:     [np.int64(0), np.int64(8), np.int64(9), np.int64(1), np.int64(4), np.int64(7), np.int64(5), np.int64(2), np.int64(3)]
True:      6
Predicted: 6 (confidence: 100.0%)
✅ CORRECT
----------------------------------------
Test 5:
Input:     [np.int64(0

for two missing number in 2 rows


In [6]:
# Step 8: Process data for 2 missing numbers per row
def create_2_missing_row_dataset(sudoku_df, max_samples=100000):
    X, y = [], []
    samples_needed = max_samples // 9

    for i, solution in enumerate(sudoku_df['solution']):
        if i >= samples_needed:
            break

        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]

            # Randomly choose 2 positions to make empty
            empty_positions = random.sample(range(9), 2)
            missing_numbers = [row[pos] for pos in empty_positions]

            # Create input with 2 zeros
            input_row = row.copy()
            for pos in empty_positions:
                input_row[pos] = 0

            X.append(input_row)
            y.append(missing_numbers)  # Now we have 2 targets

    return np.array(X), np.array(y)

# Create new dataset
X_2missing, y_2missing = create_2_missing_row_dataset(sudoku_games, 100000)

# Convert to sparse encoding
X_2missing_sparse = np.zeros((len(X_2missing), 9, 10))
y_2missing_onehot = np.zeros((len(y_2missing), 2, 9))  # 2 outputs, each 9 classes

for i, row in enumerate(X_2missing):
    for j, num in enumerate(row):
        X_2missing_sparse[i, j, num] = num  # Put digit value at its index position

    # Two targets
    for k, target in enumerate(y_2missing[i]):
        y_2missing_onehot[i, k, target-1] = 1

# Reshape y for model (flatten 2x9 to 18)
y_2missing_flat = y_2missing_onehot.reshape(len(y_2missing_onehot), 18)

# Train/test split
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(
    X_2missing_sparse, y_2missing_flat, test_size=0.2, random_state=42
)

print(f"✅ 2-Missing Dataset: {len(X_2missing)} samples")
print(f"Input shape: {X_train_2.shape}")
print(f"Output shape: {y_train_2.shape}")  # Should be (samples, 18)

✅ 2-Missing Dataset: 99999 samples
Input shape: (79999, 9, 10)
Output shape: (79999, 18)


In [7]:
# New Model for 2 Missing Numbers
model_2missing = models.Sequential([
   layers.Input(shape=(9, 10)),
   layers.Flatten(),
   layers.Dense(256, activation='relu'),  # Bigger since more complex
   layers.Dropout(0.3),
   layers.Dense(128, activation='relu'),
   layers.Dropout(0.2),
   layers.Dense(64, activation='relu'),
   layers.Dense(18, activation='sigmoid')  # 18 outputs (2 numbers × 9 classes each)
])

# Compile with different loss for multi-output
model_2missing.compile(
   optimizer='adam',
   loss='binary_crossentropy',  # Better for multi-label
   metrics=['accuracy']
)

# Train
early_stop = tf.keras.callbacks.EarlyStopping(
   monitor='val_accuracy', patience=5, restore_best_weights=True
)

history_2 = model_2missing.fit(
   X_train_2, y_train_2,
   epochs=25,
   batch_size=64,
   validation_data=(X_test_2, y_test_2),
   callbacks=[early_stop],
   verbose=1
)

# Evaluate
test_loss, test_acc = model_2missing.evaluate(X_test_2, y_test_2, verbose=0)
print(f"\n✅ Test Accuracy: {test_acc:.4f}")

Epoch 1/25
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.1724 - loss: 0.2844 - val_accuracy: 0.2526 - val_loss: 0.1582
Epoch 2/25
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.2472 - loss: 0.1613 - val_accuracy: 0.2328 - val_loss: 0.1564
Epoch 3/25
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.2458 - loss: 0.1574 - val_accuracy: 0.2299 - val_loss: 0.1553
Epoch 4/25
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.2536 - loss: 0.1563 - val_accuracy: 0.2251 - val_loss: 0.1548
Epoch 5/25
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.2516 - loss: 0.1558 - val_accuracy: 0.2586 - val_loss: 0.1550
Epoch 6/25
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.2536 - loss: 0.1554 - val_accuracy: 0.2489 - val_loss: 0.1549
Epoch 7/25
[1m

In [10]:
# Test Block for 2 Missing Numbers
def test_2missing_predictions(model, X_test, y_test, num_samples=10):
   print("🧪 Testing 2-Missing Model Predictions:\n")

   indices = random.sample(range(len(X_test)), num_samples)

   for i, idx in enumerate(indices):
       # Get input
       input_row = X_test[idx]

       # Convert one-hot back to numbers for display
       display_row = []
       empty_positions = []
       for pos, one_hot in enumerate(input_row):
           num = np.argmax(one_hot)
           display_row.append(num)
           if num == 0:
               empty_positions.append(pos)

       # Get true answers (reshape from flat 18 back to 2x9)
       true_answers_onehot = y_test[idx].reshape(2, 9)
       true_answers = [np.argmax(true_answers_onehot[0]) + 1,
                      np.argmax(true_answers_onehot[1]) + 1]

       # Make prediction
       prediction = model.predict(input_row.reshape(1, 9, 10), verbose=0)
       pred_reshaped = prediction.reshape(2, 9)

       predicted_answers = [np.argmax(pred_reshaped[0]) + 1,
                          np.argmax(pred_reshaped[1]) + 1]
       confidences = [np.max(pred_reshaped[0]) * 100,
                     np.max(pred_reshaped[1]) * 100]

       # Check if correct
       correct = set(predicted_answers) == set(true_answers)

       print(f"Test {i+1}:")
       print(f"Input:      {display_row}")
       print(f"Empty at:   positions {empty_positions}")
       print(f"True:       {sorted(true_answers)}")
       print(f"Predicted:  {sorted(predicted_answers)} (conf: {confidences[0]:.1f}%, {confidences[1]:.1f}%)")
       print(f"{'✅ CORRECT' if correct else '❌ WRONG'}")
       print("-" * 50)

# Run test
test_2missing_predictions(model_2missing, X_test_2, y_test_2)

# Overall accuracy (both numbers must be correct)
predictions = model_2missing.predict(X_test_2, verbose=0)
pred_reshaped = predictions.reshape(-1, 2, 9)
true_reshaped = y_test_2.reshape(-1, 2, 9)

correct_count = 0
for i in range(len(pred_reshaped)):
   pred_nums = [np.argmax(pred_reshaped[i][0]) + 1, np.argmax(pred_reshaped[i][1]) + 1]
   true_nums = [np.argmax(true_reshaped[i][0]) + 1, np.argmax(true_reshaped[i][1]) + 1]
   if set(pred_nums) == set(true_nums):
       correct_count += 1

accuracy = correct_count / len(pred_reshaped)
print(f"\n🎯 Overall Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

🧪 Testing 2-Missing Model Predictions:

Test 1:
Input:      [np.int64(7), np.int64(0), np.int64(2), np.int64(5), np.int64(8), np.int64(6), np.int64(0), np.int64(3), np.int64(9)]
Empty at:   positions [1, 6]
True:       [np.int64(1), np.int64(4)]
Predicted:  [np.int64(1), np.int64(4)] (conf: 56.6%, 49.2%)
✅ CORRECT
--------------------------------------------------
Test 2:
Input:      [np.int64(9), np.int64(3), np.int64(6), np.int64(0), np.int64(0), np.int64(2), np.int64(8), np.int64(4), np.int64(7)]
Empty at:   positions [3, 4]
True:       [np.int64(1), np.int64(5)]
Predicted:  [np.int64(1), np.int64(5)] (conf: 50.8%, 51.0%)
✅ CORRECT
--------------------------------------------------
Test 3:
Input:      [np.int64(7), np.int64(1), np.int64(2), np.int64(6), np.int64(9), np.int64(0), np.int64(8), np.int64(0), np.int64(5)]
Empty at:   positions [5, 7]
True:       [np.int64(3), np.int64(4)]
Predicted:  [np.int64(3), np.int64(4)] (conf: 55.6%, 46.1%)
✅ CORRECT
------------------------------

✅ Goal
For each sample:

Input = 2 Sudoku rows (each with 1 digit missing, i.e., [0, d2, ..., d9])

Target = the missing digit of the first row

In [11]:
import numpy as np
import random
from sklearn.model_selection import train_test_split

def create_two_row_dataset(sudoku_df, max_samples=100000):
    X, y = [], []
    samples_needed = max_samples // 36  # Each puzzle generates 36 samples (9 rows × 4 pairs each)

    for i, solution in enumerate(sudoku_df['solution']):
        if i >= samples_needed:
            break

        # Convert solution to 9x9 grid
        rows = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            rows.append(row)

        # Create pairs from consecutive rows
        for j in range(8):  # rows 0-1, 1-2, ..., 7-8
            row1 = rows[j].copy()
            row2 = rows[j + 1].copy()

            # Generate multiple samples per row pair
            for _ in range(4):  # 4 samples per pair
                # Hide random positions
                pos1 = random.randint(0, 8)
                pos2 = random.randint(0, 8)

                # Store original values
                missing1 = row1[pos1]
                missing2 = row2[pos2]

                # Create input with hidden digits
                input_row1 = row1.copy()
                input_row2 = row2.copy()
                input_row1[pos1] = 0
                input_row2[pos2] = 0

                # Combine both rows (18 elements)
                combined_input = input_row1 + input_row2

                # Target: both missing digits
                target = [missing1, missing2]

                X.append(combined_input)
                y.append(target)

    return np.array(X), np.array(y)

# Create dataset
print("🔄 Creating two-row dataset...")
X, y = create_two_row_dataset(sudoku_games, max_samples=50000)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

# Sparse encode inputs (18 positions, 10 classes each: 0-9)
X_sparse = np.zeros((len(X), 18, 10))
for i, row in enumerate(X):
    for j, num in enumerate(row):
        X_sparse[i, j, num] = num  # Put digit value at its index position

# One-hot encode targets (2 digits, 9 classes each: 1-9)
y_onehot = np.zeros((len(y), 2, 9))
for i, targets in enumerate(y):
    y_onehot[i, 0, targets[0] - 1] = 1  # First missing digit (1-9 → 0-8)
    y_onehot[i, 1, targets[1] - 1] = 1  # Second missing digit (1-9 → 0-8)

# Reshape y for model training (flatten to 18 outputs)
y_flat = y_onehot.reshape(len(y_onehot), 18)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_sparse, y_flat, test_size=0.2, random_state=42
)

print(f"✅ Training data: X={X_train.shape}, y={y_train.shape}")
print(f"✅ Test data: X={X_test.shape}, y={y_test.shape}")
print(f"Sample input shape: {X_train[0].shape}")
print(f"Sample target shape: {y_train[0].shape}")

🔄 Creating two-row dataset...
Dataset shape: X=(44416, 18), y=(44416, 2)
✅ Training data: X=(35532, 18, 10), y=(35532, 18)
✅ Test data: X=(8884, 18, 10), y=(8884, 18)
Sample input shape: (18, 10)
Sample target shape: (18,)


In [12]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Neural Network for Two Sudoku Rows (predicts 2 missing digits)
model = models.Sequential([
    layers.Input(shape=(18, 10)),           # Input: 2 rows × 9 digits (each one-hot of size 10)
    layers.Flatten(),                       # Flatten 18x10 = 180 features
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(18, activation='sigmoid')  # Output: 18 values (2 rows × 9 digits each)
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',  # Changed from categorical_crossentropy
    metrics=['accuracy']
)

# Early stopping to prevent overfitting
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=3,
    restore_best_weights=True
)

# Train the model
print("🚀 Training model...")
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[early_stop],
    verbose=1
)

# Evaluate model performance
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Test Accuracy: {test_acc:.4f}")

print("\n📊 Model Summary:")
model.summary()

🚀 Training model...
Epoch 1/20
[1m556/556[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.1473 - loss: 0.3628 - val_accuracy: 0.4844 - val_loss: 0.0897
Epoch 2/20
[1m556/556[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.4774 - loss: 0.0897 - val_accuracy: 0.4518 - val_loss: 0.0068
Epoch 3/20
[1m556/556[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4808 - loss: 0.0153 - val_accuracy: 0.4499 - val_loss: 2.7556e-04
Epoch 4/20
[1m556/556[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4853 - loss: 0.0043 - val_accuracy: 0.3913 - val_loss: 5.5838e-05

✅ Test Accuracy: 0.4844

📊 Model Summary:


In [13]:
# Test Block - See dual predictions
import random
import numpy as np

def test_dual_predictions(model, X_test, y_test, num_samples=10):
    print("🧪 Testing Dual Prediction Model:\n")

    indices = random.sample(range(len(X_test)), num_samples)

    for i, idx in enumerate(indices):
        # Get input and true answers
        input_rows = X_test[idx]  # Shape: (18, 10)
        true_targets = y_test[idx]  # Shape: (18,) - flattened

        # Reshape true targets back to (2, 9)
        true_reshaped = true_targets.reshape(2, 9)
        true_digit1 = np.argmax(true_reshaped[0]) + 1  # First row missing digit
        true_digit2 = np.argmax(true_reshaped[1]) + 1  # Second row missing digit

        # Convert input back to display format
        row1 = [np.argmax(input_rows[j]) for j in range(9)]
        row2 = [np.argmax(input_rows[j]) for j in range(9, 18)]

        # Find missing positions (where digit is 0)
        missing_pos1 = row1.index(0) if 0 in row1 else -1
        missing_pos2 = row2.index(0) if 0 in row2 else -1

        # Make prediction
        prediction = model.predict(input_rows.reshape(1, 18, 10), verbose=0)
        pred_reshaped = prediction.reshape(2, 9)

        # Get predicted digits
        pred_digit1 = np.argmax(pred_reshaped[0]) + 1
        pred_digit2 = np.argmax(pred_reshaped[1]) + 1

        # Get confidence scores
        conf1 = np.max(pred_reshaped[0]) * 100
        conf2 = np.max(pred_reshaped[1]) * 100

        # Display results
        print(f"Test {i+1}:")
        print(f"Row 1:     {row1} (missing pos {missing_pos1})")
        print(f"Row 2:     {row2} (missing pos {missing_pos2})")
        print(f"True:      [{true_digit1}, {true_digit2}]")
        print(f"Predicted: [{pred_digit1}, {pred_digit2}]")
        print(f"Confidence: [{conf1:.1f}%, {conf2:.1f}%]")

        # Check correctness
        correct1 = pred_digit1 == true_digit1
        correct2 = pred_digit2 == true_digit2
        both_correct = correct1 and correct2

        print(f"Row 1: {'✅' if correct1 else '❌'} | Row 2: {'✅' if correct2 else '❌'} | Both: {'✅' if both_correct else '❌'}")
        print("-" * 50)

# Run test
test_dual_predictions(model, X_test, y_test)

# Overall accuracy for dual predictions
print("\n🎯 Overall Accuracy Analysis:")
predictions = model.predict(X_test, verbose=0)
pred_reshaped = predictions.reshape(-1, 2, 9)
true_reshaped = y_test.reshape(-1, 2, 9)

# Individual row accuracies
row1_pred = np.argmax(pred_reshaped[:, 0, :], axis=1) + 1
row1_true = np.argmax(true_reshaped[:, 0, :], axis=1) + 1
row1_acc = np.mean(row1_pred == row1_true)

row2_pred = np.argmax(pred_reshaped[:, 1, :], axis=1) + 1
row2_true = np.argmax(true_reshaped[:, 1, :], axis=1) + 1
row2_acc = np.mean(row2_pred == row2_true)

# Both correct accuracy
both_correct = (row1_pred == row1_true) & (row2_pred == row2_true)
both_acc = np.mean(both_correct)

print(f"Row 1 Accuracy: {row1_acc:.4f} ({row1_acc*100:.2f}%)")
print(f"Row 2 Accuracy: {row2_acc:.4f} ({row2_acc*100:.2f}%)")
print(f"Both Correct:   {both_acc:.4f} ({both_acc*100:.2f}%)")
print(f"Average:        {(row1_acc + row2_acc)/2:.4f} ({(row1_acc + row2_acc)*50:.2f}%)")

🧪 Testing Dual Prediction Model:

Test 1:
Row 1:     [np.int64(3), np.int64(2), np.int64(1), np.int64(0), np.int64(8), np.int64(4), np.int64(6), np.int64(5), np.int64(7)] (missing pos 3)
Row 2:     [np.int64(2), np.int64(3), np.int64(9), np.int64(6), np.int64(5), np.int64(1), np.int64(4), np.int64(0), np.int64(8)] (missing pos 7)
True:      [9, 7]
Predicted: [9, 7]
Confidence: [99.5%, 98.5%]
Row 1: ✅ | Row 2: ✅ | Both: ✅
--------------------------------------------------
Test 2:
Row 1:     [np.int64(3), np.int64(2), np.int64(4), np.int64(5), np.int64(0), np.int64(8), np.int64(7), np.int64(1), np.int64(9)] (missing pos 4)
Row 2:     [np.int64(6), np.int64(1), np.int64(7), np.int64(0), np.int64(3), np.int64(5), np.int64(2), np.int64(8), np.int64(4)] (missing pos 3)
True:      [6, 9]
Predicted: [6, 9]
Confidence: [94.1%, 99.5%]
Row 1: ✅ | Row 2: ✅ | Both: ✅
--------------------------------------------------
Test 3:
Row 1:     [np.int64(3), np.int64(5), np.int64(8), np.int64(4), np.int64(0

here is one row and one column combind like a L shape and of those total 17 numbers one is misisng

In [15]:
import numpy as np
import random
from sklearn.model_selection import train_test_split

def create_l_shape_dataset(sudoku_df, max_samples=100000):
    X, y = [], []
    samples_needed = max_samples // 81  # Each puzzle generates 81 samples (9x9 possible L-shapes)

    for i, solution in enumerate(sudoku_df['solution']):
        if i >= samples_needed:
            break

        # Convert solution to 9x9 grid
        grid = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            grid.append(row)

        # Generate L-shapes for each position in the grid
        for row_idx in range(9):
            for col_idx in range(9):
                # Get the complete row
                row_data = grid[row_idx].copy()

                # Get the complete column
                col_data = [grid[r][col_idx] for r in range(9)]

                # Create L-shape: row + column (intersection counted once)
                # Row: positions 0-8, Column: positions 9-16 (skip intersection at row_idx)
                l_shape = row_data + col_data[:row_idx] + col_data[row_idx+1:]
                # Result: 9 (row) + 8 (column excluding intersection) = 17 elements

                # Choose random position to hide (0-16)
                missing_pos = random.randint(0, 16)
                missing_digit = l_shape[missing_pos]

                # Create input with missing digit
                input_l = l_shape.copy()
                input_l[missing_pos] = 0

                X.append(input_l)
                y.append(missing_digit)

    return np.array(X), np.array(y)

# Create dataset
print("🔄 Creating L-shape dataset...")
X, y = create_l_shape_dataset(sudoku_games, max_samples=50000)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

# Sparse encode inputs (17 positions, 10 classes each: 0-9)
X_sparse = np.zeros((len(X), 17, 10))
for i, l_shape in enumerate(X):
    for j, num in enumerate(l_shape):
        X_sparse[i, j, num] = num  # Put digit value at its index position

# One-hot encode targets (9 classes: 1-9)
y_onehot = np.zeros((len(y), 9))
for i, target in enumerate(y):
    y_onehot[i, target - 1] = 1  # Map 1-9 to indices 0-8

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_sparse, y_onehot, test_size=0.2, random_state=42
)

print(f"✅ Training data: X={X_train.shape}, y={y_train.shape}")
print(f"✅ Test data: X={X_test.shape}, y={y_test.shape}")
print(f"Sample input shape: {X_train[0].shape}")  # Should be (17, 10)
print(f"Sample target shape: {y_train[0].shape}")  # Should be (9,)

🔄 Creating L-shape dataset...
Dataset shape: X=(49977, 17), y=(49977,)
✅ Training data: X=(39981, 17, 10), y=(39981, 9)
✅ Test data: X=(9996, 17, 10), y=(9996, 9)
Sample input shape: (17, 10)
Sample target shape: (9,)


In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam

# Build L-Shape Model
model = Sequential([
    # Input: (17, 10) sparse encoded L-shape
    Flatten(input_shape=(17, 10)),  # Flatten to 170 features

    # Hidden layers
    Dense(256, activation='relu'),
    Dropout(0.3),

    Dense(128, activation='relu'),
    Dropout(0.2),

    Dense(64, activation='relu'),

    # Output: 9 classes (digits 1-9)
    Dense(9, activation='softmax')
])

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Model summary
print("🏗️ L-Shape Model Architecture:")
model.summary()

# Train model
print("\n🚀 Training L-Shape Model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=15,
    batch_size=256,
    verbose=1
)

# Evaluate
print("\n📊 Final Evaluation:")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

# Save model
model.save('l_shape_sudoku_model.h5')
print("✅ Model saved as 'l_shape_sudoku_model.h5'")

🏗️ L-Shape Model Architecture:



🚀 Training L-Shape Model...
Epoch 1/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.1299 - loss: 2.3011 - val_accuracy: 0.2766 - val_loss: 2.0529
Epoch 2/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2810 - loss: 1.9484 - val_accuracy: 0.5501 - val_loss: 1.2536
Epoch 3/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5097 - loss: 1.3462 - val_accuracy: 0.6346 - val_loss: 0.9185
Epoch 4/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6163 - loss: 1.0195 - val_accuracy: 0.6788 - val_loss: 0.7319
Epoch 5/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6832 - loss: 0.8062 - val_accuracy: 0.7128 - val_loss: 0.6148
Epoch 6/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7281 - loss: 0.6671 - val_accuracy: 0.7362 - val_loss: 0.525



Test Accuracy: 0.7606 (76.06%)
✅ Model saved as 'l_shape_sudoku_model.h5'


In [17]:
import numpy as np
import random

def test_l_shape_model(model, sudoku_df, num_tests=10):
    print("🧪 Testing L-Shape Model Predictions:\n")

    correct_predictions = 0

    for test_num in range(num_tests):
        # Pick random puzzle
        puzzle_idx = random.randint(0, len(sudoku_df) - 1)
        solution = sudoku_df.iloc[puzzle_idx]['solution']

        # Convert to 9x9 grid
        grid = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            grid.append(row)

        # Pick random L-shape position
        row_idx = random.randint(0, 8)
        col_idx = random.randint(0, 8)

        # Create L-shape
        row_data = grid[row_idx].copy()
        col_data = [grid[r][col_idx] for r in range(9)]
        l_shape = row_data + col_data[:row_idx] + col_data[row_idx+1:]

        # Pick random position to hide
        missing_pos = random.randint(0, 16)
        true_digit = l_shape[missing_pos]

        # Create test input
        test_input = l_shape.copy()
        test_input[missing_pos] = 0

        # Sparse encode
        X_test_sparse = np.zeros((1, 17, 10))
        for j, num in enumerate(test_input):
            X_test_sparse[0, j, num] = num

        # Predict
        prediction = model.predict(X_test_sparse, verbose=0)
        predicted_digit = np.argmax(prediction[0]) + 1  # Convert back to 1-9
        confidence = np.max(prediction[0]) * 100

        # Check correctness
        is_correct = predicted_digit == true_digit
        if is_correct:
            correct_predictions += 1

        # Display results
        print(f"Test {test_num + 1}:")
        print(f"L-Shape Row {row_idx}, Col {col_idx} (missing pos {missing_pos})")

        # Show L-shape with missing position marked
        display_l = test_input.copy()
        display_l[missing_pos] = 0

        print(f"Row part:    {display_l[:9]}")
        print(f"Column part: {display_l[9:]}")
        print(f"True:        {true_digit}")
        print(f"Predicted:   {predicted_digit} (confidence: {confidence:.1f}%)")
        print(f"Result:      {'✅ CORRECT' if is_correct else '❌ WRONG'}")
        print("-" * 50)

    # Overall accuracy
    accuracy = correct_predictions / num_tests
    print(f"\n🎯 Overall Test Accuracy: {accuracy:.4f} ({accuracy*100:.1f}%)")
    return accuracy

# Run tests
test_accuracy = test_l_shape_model(model, sudoku_games, num_tests=10)

🧪 Testing L-Shape Model Predictions:

Test 1:
L-Shape Row 0, Col 3 (missing pos 16)
Row part:    [7, 5, 4, 1, 9, 3, 2, 6, 8]
Column part: [8, 7, 4, 2, 6, 5, 3, 0]
True:        9
Predicted:   1 (confidence: 79.4%)
Result:      ❌ WRONG
--------------------------------------------------
Test 2:
L-Shape Row 8, Col 8 (missing pos 2)
Row part:    [1, 7, 0, 2, 3, 5, 8, 9, 6]
Column part: [4, 9, 8, 1, 7, 5, 3, 2]
True:        4
Predicted:   4 (confidence: 100.0%)
Result:      ✅ CORRECT
--------------------------------------------------
Test 3:
L-Shape Row 5, Col 2 (missing pos 4)
Row part:    [1, 8, 4, 6, 0, 3, 9, 7, 2]
Column part: [7, 8, 3, 6, 2, 9, 1, 5]
True:        5
Predicted:   5 (confidence: 99.8%)
Result:      ✅ CORRECT
--------------------------------------------------
Test 4:
L-Shape Row 6, Col 2 (missing pos 4)
Row part:    [9, 7, 4, 5, 0, 2, 3, 6, 1]
Column part: [2, 8, 9, 7, 1, 3, 5, 6]
True:        8
Predicted:   8 (confidence: 100.0%)
Result:      ✅ CORRECT
--------------------

now common row and column sleeemnts misisng
\

In [18]:
import numpy as np
import random
from sklearn.model_selection import train_test_split

def create_l_shape_intersection_dataset(sudoku_df, max_samples=100000):
    X, y = [], []
    samples_needed = max_samples // 81  # Each puzzle generates 81 samples (9x9 positions)

    for i, solution in enumerate(sudoku_df['solution']):
        if i >= samples_needed:
            break

        # Convert solution to 9x9 grid
        grid = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            grid.append(row)

        # Generate L-shapes for each intersection position
        for row_idx in range(9):
            for col_idx in range(9):
                # Get the intersection digit (this will be our target)
                intersection_digit = grid[row_idx][col_idx]

                # Get row data with intersection missing
                row_data = grid[row_idx].copy()
                row_data[col_idx] = 0  # Hide intersection in row

                # Get column data with intersection missing
                col_data = [grid[r][col_idx] for r in range(9)]
                col_data[row_idx] = 0  # Hide intersection in column

                # Create L-shape: row + column (both missing intersection)
                # Remove intersection from column part to avoid duplication
                l_shape = row_data + col_data[:row_idx] + col_data[row_idx+1:]
                # Result: 9 (row with 0 at intersection) + 8 (column excluding intersection) = 17 elements

                X.append(l_shape)
                y.append(intersection_digit)

    return np.array(X), np.array(y)

# Create dataset
print("🔄 Creating L-shape intersection dataset...")
X, y = create_l_shape_intersection_dataset(sudoku_games, max_samples=50000)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

# Sparse encode inputs (17 positions, 10 classes each: 0-9)
X_sparse = np.zeros((len(X), 17, 10))
for i, l_shape in enumerate(X):
    for j, num in enumerate(l_shape):
        X_sparse[i, j, num] = num  # Put digit value at its index position

# One-hot encode targets (9 classes: 1-9)
y_onehot = np.zeros((len(y), 9))
for i, target in enumerate(y):
    y_onehot[i, target - 1] = 1  # Map 1-9 to indices 0-8

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_sparse, y_onehot, test_size=0.2, random_state=42
)

print(f"✅ Training data: X={X_train.shape}, y={y_train.shape}")
print(f"✅ Test data: X={X_test.shape}, y={y_test.shape}")
print(f"Sample input shape: {X_train[0].shape}")  # Should be (17, 10)
print(f"Sample target shape: {y_train[0].shape}")  # Should be (9,)
print(f"✅ Dataset ready - Missing intersection digit task!")

🔄 Creating L-shape intersection dataset...
Dataset shape: X=(49977, 17), y=(49977,)
✅ Training data: X=(39981, 17, 10), y=(39981, 9)
✅ Test data: X=(9996, 17, 10), y=(9996, 9)
Sample input shape: (17, 10)
Sample target shape: (9,)
✅ Dataset ready - Missing intersection digit task!


In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Build L-Shape Intersection Model
model = Sequential([
    # Input: (17, 10) sparse encoded L-shape
    Flatten(input_shape=(17, 10)),  # Flatten to 170 features

    # Hidden layers - slightly deeper for intersection reasoning
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(128, activation='relu'),
    Dropout(0.2),

    Dense(64, activation='relu'),

    # Output: 9 classes (digits 1-9)
    Dense(9, activation='softmax')
])

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Model summary
print("🏗️ L-Shape Intersection Model Architecture:")
model.summary()

# Train model
print("\n🚀 Training L-Shape Intersection Model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=256,
    verbose=1
)

# Evaluate
print("\n📊 Final Evaluation:")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

# Save model
model.save('l_shape_intersection_model.h5')
print("✅ Model saved as 'l_shape_intersection_model.h5'")

🏗️ L-Shape Intersection Model Architecture:



🚀 Training L-Shape Intersection Model...
Epoch 1/20
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 28ms/step - accuracy: 0.4177 - loss: 1.6144 - val_accuracy: 0.8798 - val_loss: 0.2575
Epoch 2/20
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9737 - loss: 0.0769 - val_accuracy: 1.0000 - val_loss: 2.1907e-04
Epoch 3/20
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9977 - loss: 0.0078 - val_accuracy: 1.0000 - val_loss: 1.5799e-05
Epoch 4/20
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9990 - loss: 0.0036 - val_accuracy: 1.0000 - val_loss: 1.2120e-06
Epoch 5/20
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9995 - loss: 0.0016 - val_accuracy: 1.0000 - val_loss: 2.9665e-06
Epoch 6/20
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9999 - loss: 8.3885e-04 - val_



Test Accuracy: 1.0000 (100.00%)
✅ Model saved as 'l_shape_intersection_model.h5'


In [20]:
import numpy as np
import random

def test_l_shape_intersection_model(model, sudoku_df, num_tests=10):
    print("🧪 Testing L-Shape Intersection Model:\n")

    correct_predictions = 0

    for test_num in range(num_tests):
        # Pick random puzzle
        puzzle_idx = random.randint(0, len(sudoku_df) - 1)
        solution = sudoku_df.iloc[puzzle_idx]['solution']

        # Convert to 9x9 grid
        grid = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            grid.append(row)

        # Pick random intersection position
        row_idx = random.randint(0, 8)
        col_idx = random.randint(0, 8)

        # The intersection digit is our target
        true_digit = grid[row_idx][col_idx]

        # Create L-shape with intersection missing
        row_data = grid[row_idx].copy()
        row_data[col_idx] = 0  # Hide intersection in row

        col_data = [grid[r][col_idx] for r in range(9)]
        col_data[row_idx] = 0  # Hide intersection in column

        # Build L-shape input
        l_shape = row_data + col_data[:row_idx] + col_data[row_idx+1:]

        # Sparse encode
        X_test_sparse = np.zeros((1, 17, 10))
        for j, num in enumerate(l_shape):
            X_test_sparse[0, j, num] = num

        # Predict
        prediction = model.predict(X_test_sparse, verbose=0)
        predicted_digit = np.argmax(prediction[0]) + 1  # Convert back to 1-9
        confidence = np.max(prediction[0]) * 100

        # Check correctness
        is_correct = predicted_digit == true_digit
        if is_correct:
            correct_predictions += 1

        # Display results
        print(f"Test {test_num + 1}:")
        print(f"Intersection at Row {row_idx}, Col {col_idx}")

        # Show row and column with missing intersection (marked as 0)
        display_row = row_data.copy()
        display_col = [grid[r][col_idx] if r != row_idx else 0 for r in range(9)]

        print(f"Row {row_idx}:     {display_row} (missing at pos {col_idx})")
        print(f"Col {col_idx}:     {display_col} (missing at pos {row_idx})")
        print(f"True intersection:    {true_digit}")
        print(f"Predicted:           {predicted_digit} (confidence: {confidence:.1f}%)")
        print(f"Result:              {'✅ CORRECT' if is_correct else '❌ WRONG'}")
        print("-" * 55)

    # Overall accuracy
    accuracy = correct_predictions / num_tests
    print(f"\n🎯 Overall Intersection Accuracy: {accuracy:.4f} ({accuracy*100:.1f}%)")
    return accuracy

# Run tests
test_accuracy = test_l_shape_intersection_model(model, sudoku_games, num_tests=10)

🧪 Testing L-Shape Intersection Model:

Test 1:
Intersection at Row 6, Col 7
Row 6:     [4, 2, 8, 5, 3, 1, 9, 0, 7] (missing at pos 7)
Col 7:     [9, 1, 4, 2, 8, 7, 0, 5, 3] (missing at pos 6)
True intersection:    6
Predicted:           6 (confidence: 100.0%)
Result:              ✅ CORRECT
-------------------------------------------------------
Test 2:
Intersection at Row 1, Col 1
Row 1:     [1, 0, 3, 8, 9, 2, 5, 7, 4] (missing at pos 1)
Col 1:     [7, 0, 2, 4, 5, 9, 1, 3, 8] (missing at pos 1)
True intersection:    6
Predicted:           6 (confidence: 100.0%)
Result:              ✅ CORRECT
-------------------------------------------------------
Test 3:
Intersection at Row 0, Col 4
Row 0:     [4, 8, 6, 9, 0, 7, 2, 1, 3] (missing at pos 4)
Col 4:     [0, 1, 8, 7, 3, 2, 6, 9, 4] (missing at pos 0)
True intersection:    5
Predicted:           5 (confidence: 100.0%)
Result:              ✅ CORRECT
-------------------------------------------------------
Test 4:
Intersection at Row 7, Col 3


ow common row and column sleeemnts misisngand plus one element from one row and oen element from one columns whidhc totls us to total 3 leemtns missing

In [21]:
import numpy as np
import random
from sklearn.model_selection import train_test_split

def create_triple_missing_dataset(sudoku_df, max_samples=100000):
    X, y = [], []
    samples_needed = max_samples // 81

    for i, solution in enumerate(sudoku_df['solution']):
        if i >= samples_needed:
            break

        # Convert to 9x9 grid
        grid = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            grid.append(row)

        # Generate samples for each intersection
        for row_idx in range(9):
            for col_idx in range(9):
                # Target: intersection digit
                intersection_digit = grid[row_idx][col_idx]

                # Create row with 2 missing (intersection + 1 random)
                row_data = grid[row_idx].copy()
                row_data[col_idx] = 0  # Hide intersection
                row_missing_pos = random.choice([j for j in range(9) if j != col_idx])
                row_data[row_missing_pos] = 0  # Hide additional element

                # Create column with 2 missing (intersection + 1 random)
                col_data = [grid[r][col_idx] for r in range(9)]
                col_data[row_idx] = 0  # Hide intersection
                col_missing_pos = random.choice([j for j in range(9) if j != row_idx])
                col_data[col_missing_pos] = 0  # Hide additional element

                # L-shape: row + column (excluding shared intersection)
                l_shape = row_data + col_data[:row_idx] + col_data[row_idx+1:]
                # Result: 9 (row) + 8 (column excluding intersection) = 17 positions

                X.append(l_shape)
                y.append(intersection_digit)

    return np.array(X), np.array(y)

# Create dataset
print("🔄 Creating triple-missing L-shape dataset...")
X, y = create_triple_missing_dataset(sudoku_games, max_samples=50000)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

# Sparse encode inputs (17 positions, 10 classes each: 0-9)
X_sparse = np.zeros((len(X), 17, 10))
for i, sample in enumerate(X):
    for j, num in enumerate(sample):
        X_sparse[i, j, num] = 1

# One-hot encode targets
y_onehot = np.zeros((len(y), 9))
for i, target in enumerate(y):
    y_onehot[i, target - 1] = 1

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_sparse, y_onehot, test_size=0.2, random_state=42
)

print(f"✅ Training: X={X_train.shape}, y={y_train.shape}")
print(f"✅ Test: X={X_test.shape}, y={y_test.shape}")
print(f"✅ Triple-missing L-shape (17 positions) ready!")

🔄 Creating triple-missing L-shape dataset...
Dataset shape: X=(49977, 17), y=(49977,)
✅ Training: X=(39981, 17, 10), y=(39981, 9)
✅ Test: X=(9996, 17, 10), y=(9996, 9)
✅ Triple-missing L-shape (17 positions) ready!


In [22]:
import tensorflow as tf
from tensorflow.keras import layers, models

def create_triple_missing_model():
   model = models.Sequential([
       # Input: (17, 10) - 17 positions with 10-class encoding
       layers.Input(shape=(17, 10)),

       # Flatten the input
       layers.Flatten(),  # (17*10 = 170,)

       # Dense layers
       layers.Dense(128, activation='relu'),
       layers.Dropout(0.3),
       layers.Dense(64, activation='relu'),
       layers.Dropout(0.2),

       # Output: 9 classes (digits 1-9)
       layers.Dense(9, activation='softmax')
   ])

   return model

# Create and compile model
model = create_triple_missing_model()
model.compile(
   optimizer='adam',
   loss='categorical_crossentropy',
   metrics=['accuracy']
)

# Model summary
model.summary()

# Train model
print("🔄 Training model...")
history = model.fit(
   X_train, y_train,
   batch_size=32,
   epochs=20,
   validation_data=(X_test, y_test),
   verbose=1
)

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"✅ Test Accuracy: {test_acc:.4f}")

🔄 Training model...
Epoch 1/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.4343 - loss: 1.5311 - val_accuracy: 0.9331 - val_loss: 0.1422
Epoch 2/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9174 - loss: 0.2440 - val_accuracy: 0.9342 - val_loss: 0.1253
Epoch 3/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9329 - loss: 0.1930 - val_accuracy: 0.9351 - val_loss: 0.1236
Epoch 4/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9377 - loss: 0.1712 - val_accuracy: 0.9375 - val_loss: 0.1182
Epoch 5/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9429 - loss: 0.1534 - val_accuracy: 0.9342 - val_loss: 0.1164
Epoch 6/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9458 - loss: 0.1380 - val_accuracy: 0.9340 - val_loss: 0.

In [23]:
# Test block - Predict all 3 missing numbers
import numpy as np

def test_predict_3_missing(model, sudoku_df, num_cases=10):
    """Test model by predicting all 3 missing numbers in L-shape"""

    print("🎯 Testing Model - Predict 3 Missing Numbers")
    print("=" * 60)

    for case_num in range(num_cases):
        print(f"\nTest Case {case_num + 1}:")
        print("-" * 30)

        # Get random sudoku solution
        solution = sudoku_df['solution'].iloc[case_num]

        # Convert to 9x9 grid
        grid = []
        for row_start in range(0, 81, 9):
            row = [int(d) for d in solution[row_start:row_start+9]]
            grid.append(row)

        # Pick random intersection
        row_idx = np.random.randint(0, 9)
        col_idx = np.random.randint(0, 9)

        # Get the 3 missing positions
        intersection_digit = grid[row_idx][col_idx]

        # Pick random missing from row (not intersection)
        row_missing_pos = np.random.choice([j for j in range(9) if j != col_idx])
        row_missing_digit = grid[row_idx][row_missing_pos]

        # Pick random missing from column (not intersection)
        col_missing_pos = np.random.choice([j for j in range(9) if j != row_idx])
        col_missing_digit = grid[col_missing_pos][col_idx]

        print(f"Position: Row {row_idx}, Col {col_idx}")
        print(f"Missing from row pos {row_missing_pos}")
        print(f"Missing from col pos {col_missing_pos}")

        # Create L-shape with 3 missing
        row_data = grid[row_idx].copy()
        row_data[col_idx] = 0  # Hide intersection
        row_data[row_missing_pos] = 0  # Hide random from row

        col_data = [grid[r][col_idx] for r in range(9)]
        col_data[row_idx] = 0  # Hide intersection
        col_data[col_missing_pos] = 0  # Hide random from column

        # Create L-shape (17 positions)
        l_shape = row_data + col_data[:row_idx] + col_data[row_idx+1:]

        print(f"Row with 2 missing:    {row_data}")
        print(f"Column with 2 missing: {col_data}")

        # Convert to model input format
        X_input = np.zeros((1, 17, 10))
        for j, num in enumerate(l_shape):
            X_input[0, j, num] = 1

        # Predict intersection
        prediction = model.predict(X_input, verbose=0)
        predicted_intersection = np.argmax(prediction) + 1
        confidence = np.max(prediction) * 100

        # Show results
        print("\n🎯 PREDICTIONS:")
        print(f"Intersection (True: {intersection_digit}) → Predicted: {predicted_intersection} ({confidence:.1f}%)")

        # For row missing - find what's missing from row
        row_known = [x for x in row_data if x != 0]
        row_candidates = [x for x in range(1, 10) if x not in row_known]
        print(f"Row missing (True: {row_missing_digit}) → Candidates: {row_candidates}")

        # For column missing - find what's missing from column
        col_known = [x for x in col_data if x != 0]
        col_candidates = [x for x in range(1, 10) if x not in col_known]
        print(f"Column missing (True: {col_missing_digit}) → Candidates: {col_candidates}")

        # Check intersection prediction
        if predicted_intersection == intersection_digit:
            print("✅ Intersection CORRECT")
        else:
            print("❌ Intersection WRONG")

        # Check if other candidates include true values
        row_correct = row_missing_digit in row_candidates
        col_correct = col_missing_digit in col_candidates

        print(f"Row candidate correct: {'✅' if row_correct else '❌'}")
        print(f"Col candidate correct: {'✅' if col_correct else '❌'}")

        print(f"Overall: {3 if (predicted_intersection == intersection_digit and row_correct and col_correct) else 'Some wrong'}/3 correct")

# Run the test
test_predict_3_missing(model, sudoku_games, num_cases=10)

🎯 Testing Model - Predict 3 Missing Numbers

Test Case 1:
------------------------------
Position: Row 2, Col 8
Missing from row pos 3
Missing from col pos 8
Row with 2 missing:    [8, 2, 1, 0, 3, 4, 9, 5, 0]
Column with 2 missing: [3, 8, 0, 6, 9, 4, 1, 5, 0]

🎯 PREDICTIONS:
Intersection (True: 7) → Predicted: 7 (100.0%)
Row missing (True: 6) → Candidates: [6, 7]
Column missing (True: 2) → Candidates: [2, 7]
✅ Intersection CORRECT
Row candidate correct: ✅
Col candidate correct: ✅
Overall: 3/3 correct

Test Case 2:
------------------------------
Position: Row 6, Col 1
Missing from row pos 0
Missing from col pos 0
Row with 2 missing:    [0, 0, 4, 1, 9, 5, 2, 8, 7]
Column with 2 missing: [0, 4, 9, 6, 8, 1, 0, 2, 5]

🎯 PREDICTIONS:
Intersection (True: 3) → Predicted: 3 (100.0%)
Row missing (True: 6) → Candidates: [3, 6]
Column missing (True: 7) → Candidates: [3, 7]
✅ Intersection CORRECT
Row candidate correct: ✅
Col candidate correct: ✅
Overall: 3/3 correct

Test Case 3:
------------------