In [6]:
def print_integers_from_binary_file(file_path, start_line, end_line):
    """
    Reads integers from a binary file (assuming one integer per line) and prints the integers
    from start_line to end_line (inclusive).

    Parameters:
        file_path (str): Path to the binary file.
        start_line (int): The starting line number (1-based index).
        end_line (int): The ending line number (1-based index).
    """
    try:
        with open(file_path, 'rb') as binary_file:
            for current_line_number, line in enumerate(binary_file, start=1):
                if start_line <= current_line_number <= end_line:
                    try:
                        integer_value = int.from_bytes(line.strip(), byteorder='big', signed=True)
                        print(f"Line {current_line_number}: {integer_value}")
                    except ValueError:
                        print(f"Line {current_line_number}: Could not decode to integer")
                elif current_line_number > end_line:
                    break
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [7]:
import torch
import numpy as np
import random

file_path = 'data/trainrandom6gb.bin'  # Replace with the path to your binary file
train_data = np.memmap(file_path, dtype=np.uint8, mode='r')
block_size = 1023
batch_size = 1


seed = 44
##controling seeding
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)


def get_batch_random():
    data = train_data
    # ix = torch.randint(len(data) - block_size, (batch_size,))
    # Ensure the starting index is a multiple of block_size
    ix = torch.randint(0, len(data) // (block_size + 1), (batch_size,)) * (block_size + 1)
    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
    return x, y

data = train_data
print(f"length of data is {len(data)}")
 
def get_batch_index(a,b):
   # ix = torch.randint(len(data) - block_size, (batch_size,))
    # Ensure the starting index is a multiple of block_size
    ix = torch.tensor(range(a,b)) * (block_size + 1)
    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
    return x, y


def write_tensor_to_file(tensor, tensor2,output_path):
    """
    Writes a PyTorch tensor to a file in a human-readable format.

    Parameters:
        tensor (torch.Tensor): The tensor to write to the file.
        output_path (str): Path to the output text file.
    """
    try:
        with open(output_path, 'w') as output_file:
            output_file.write(str(tensor))
            output_file.write('\n')
    except Exception as e:
        print(f"An error occurred: {e}")


def write_tensor_to_file(tensor, output_path):
    """
    Writes a PyTorch tensor to a file in a single line.

    Parameters:
        tensor (torch.Tensor): The tensor to write to the file.
        output_path (str): Path to the output text file.
    """
    print(f"tensor shape {tensor.shape}")
    print(f"tensor first dim {tensor.shape[0]}")
    try:
        with open(output_path, 'w') as output_file:
            for i in range(tensor.shape[0]):
                tensor_str = torch.flatten(tensor[i]).tolist()  # Flatten the tensor to 1D and convert to a list
                output_file.write(str(tensor_str) + '\n')  # Write the tensor as a single-line string
    except Exception as e:
        print(f"An error occurred: {e}")
x,y = get_batch_index(8080944100,8080944128)
output_file = "output.txt"

torch.set_printoptions(threshold=100000)

write_tensor_to_file(x,output_file)

length of data is 8080944128
tensor shape torch.Size([28, 0])
tensor first dim 28


In [7]:
# Example usage
file_path = 'data/trainrandom6gb.bin'  # Replace with the path to your binary file
start_line = 10  # Replace with the starting line number
end_line = 20    # Replace with the ending line number

print_lines_from_binary_file(file_path, start_line, end_line)

 	    	    
    
  
 	    
  
 
    
   
  
	  
 
