In [1]:
# Install necessary library for handling PGN files
!pip install python-chess

import os
from google.colab import files
import chess.pgn

# Upload the .pgn file
uploaded = '/content/dataset.pgn'

# Assuming the file name is dataset.pgn, adapt if the name is different
file_name = uploaded

# Function to split PGN file into smaller parts
def split_pgn(file_path, output_dir, games_per_file):
    """
    Splits a PGN file into smaller files with a fixed number of games.

    Parameters:
        file_path (str): Path to the input PGN file.
        output_dir (str): Directory to save the output files.
        games_per_file (int): Number of games per smaller file.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    with open(file_path, "r") as file:
        game_count = 0
        part_number = 1
        output_file = open(os.path.join(output_dir, f"part_{part_number}.pgn"), "w")

        while True:
            game = chess.pgn.read_game(file)
            if game is None:  # End of file
                break

            output_file.write(str(game) + "\n\n")
            game_count += 1

            if game_count >= games_per_file:
                output_file.close()
                part_number += 1
                game_count = 0
                output_file = open(os.path.join(output_dir, f"part_{part_number}.pgn"), "w")

        output_file.close()
    print(f"Splitting completed. Files saved in {output_dir}")

# Define parameters
output_directory = "merge_files_pgn"
games_per_part = 1000  # Adjust based on desired number of games per file

# Split the dataset
split_pgn(file_name, output_directory, games_per_part)

# Zip the output directory for easy download
!zip -r merge_files_pgn.zip merge_files_pgn

# Download the zip file
files.download("merge_files_pgn.zip")


Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.11.1.tar.gz (156 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.5/156.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.1-py3-none-any.whl size=148497 sha256=68dda417159c789eae5215b13b3a4843767f11068cfd1f389f5f1e48aabc613e
  Stored in directory: /root/.cache/pip/wheels/2e/2d/23/1bfc95db984ed3ecbf6764167dc7526d0ab521cf9a9852544e
Successfully built chess
Installing collected packages: chess, python-chess
Successfully installed chess-1.11.1 python-chess-1.999
Splitting completed. Files saved in merge_files_pgn
  adding: merge_files_pgn/ (stored 0

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>