# A Notebook for Solving All 1 Million the Quizzes in the Dataset

## Imports and Global Variables

In [None]:
import pandas as pd
import time
import os
from tqdm import tqdm
import sudoku_solver as ss
from sudoku_solver import Solver
import multiprocessing as mp
from functools import partial
from google.colab import drive

QUIZ_DF = pd.read_csv(r'sudoku.csv')
RESULTS_FPATH = r'/content/drive/MyDrive/colab_notebooks/results/solve_them_all_results.csv'
RESULTS_DF = None
RESULT_INDEX = 0
PASSED_N = 0

drive.mount("/content/drive", force_remount=True)

## Helper Functions

In [2]:
def solve_quiz(quiz_num):
    """
    Helper function to solve a single quiz.
    """
    # load quiz and solution
    global QUIZ_DF

    quiz, expected_solution = ss.load_quiz_from_dataset(QUIZ_DF, quiz_num)
    solver = Solver(quiz)
    solver.try_hard = True
    actual_solution_array = solver.solve()
    actual_solution_str = "".join([str(num) for num in actual_solution_array.flatten()])
    quiz_result = (expected_solution == actual_solution_str)
    return quiz_num, expected_solution, actual_solution_str, quiz_result

def load_result_df():
    """
    Loads or creates a CSV file for saving the results
    """
    global RESULTS_DF, RESULTS_FPATH, PASSED_N

    if os.path.isfile(RESULTS_FPATH):
        RESULTS_DF = pd.read_csv(RESULTS_FPATH)
    else:
        RESULTS_DF = pd.DataFrame(columns=['id', 'expected_solution', 'actual_solution', 'correct'])
        RESULTS_DF.to_csv(RESULTS_FPATH, index=False)
    RESULT_INDEX = len(RESULTS_DF)
    PASSED_N = RESULTS_DF['correct'].values.sum()
    print(f'Current results contain {RESULT_INDEX} solutions, {PASSED_N} are correct.')

## Start Solving

In [None]:
load_result_df()
results = []
N = len(QUIZ_DF)
for i in tqdm(range(RESULT_INDEX, N)):
    quiz_num, expected_solution, actual_solution, quiz_result = solve_quiz(i)

    results.append({
        "quiz_num": quiz_num,
        "expected_solution": expected_solution,
        "actual_solution": actual_solution,
        "correct": quiz_result
    })

    if quiz_result:
        PASSED_N += 1

     # Save in batches
    if (i + 1) % 1000 == 0 or (i + 1) == N:
        batch_results_df = pd.DataFrame(results)
        combined_df = pd.concat([RESULTS_DF, batch_results_df], ignore_index=True)
        combined_df.to_csv(RESULTS_FPATH, index=False)
        RESULTS_DF = combined_df  # Update previous results
        results = []  # Clear list after saving
        print(f"\n[UPDATE] Progress saved at {i + 1}/{N} quizzes completed - {PASSED_N} correctly solved")

print(f"\n[FINAL] From {N} total quizzes, {PASSED_N} were correctly solved!")
print(f"[FINAL] Verify the results at {RESULTS_FPATH}\n")