In [1]:
import pandas as pd
import numpy as np
import os

def TestData(rows, columns, file_type, intake='', name=None):
    """
    Generates a DataFrame with random integers and saves it as a specified file type.

    Parameters:
        rows (int): The number of rows in the DataFrame.
        columns (int): The number of columns in the DataFrame.
        file_type (str): The file type to save as ('csv', 'xlsx', 'parquet', 'json', 'xml').
        intake (str): The directory to save the file. Defaults to a relative directory.
        name (str): The base name of the file (without extension). Defaults to 'test_' + file_type.lower().

    Returns:
        str: Path of the saved file if successful, None otherwise.
    """
    # Set default name based on file type if name is not provided
    file_type = file_type.lower()
    if name is None:
        name = f"test_{file_type}"
    
    # Determine the full file path
    file_path = os.path.join(intake, f"{name}.{file_type}")
    
    # Generate the DataFrame with random integers and named columns
    column_names = [f'Column_{i+1}' for i in range(columns)]  # Use underscores instead of spaces
    data = pd.DataFrame(np.random.randint(0, 100, size=(rows, columns)), columns=column_names)
    
    try:
        # Save file in the specified format
        if file_type == 'csv':
            data.to_csv(file_path, index=False)
        elif file_type == 'xlsx':
            data.to_excel(file_path, index=False)
        elif file_type == 'parquet':
            data.to_parquet(file_path, index=False)
        elif file_type == 'json':
            data.to_json(file_path, orient='records')
        elif file_type == 'xml':
            # Rename columns to be valid XML tags
            valid_xml_columns = [f'Column{i+1}' for i in range(columns)]  # Remove spaces
            data.columns = valid_xml_columns
            data.to_xml(file_path, index=False)
        else:
            print("Unsupported file type. Choose from: 'csv', 'xlsx', 'parquet', 'json', 'xml'.")
            return None
        
        print(f"File saved successfully at {file_path}")
        return file_path
    
    except Exception as e:
        print("An error occurred while saving the file:", e)
        return None

# Examples

## CSV

In [2]:
TestData(rows=10, columns=5, file_type='CSV')

File saved successfully at test_csv.csv


'test_csv.csv'

## XLSX

In [13]:
TestData(rows=10, columns=5, file_type='XLSX')

File saved successfully at test_xlsx.xlsx


'test_xlsx.xlsx'

## PARQUET

In [14]:
TestData(rows=10, columns=5, file_type='PARQUET')

An error occurred while saving the file: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.


## JSON

In [15]:
TestData(rows=10, columns=5, file_type='JSON')

File saved successfully at test_json.json


'test_json.json'

## XML

In [16]:
TestData(rows=10, columns=5, file_type='XML')

File saved successfully at test_xml.xml


'test_xml.xml'