# Function

In [12]:
import pandas as pd
import numpy as np

def ReadDB(intake, sep=','):
    """
    Reads a CSV, XLSX, XML, JSON, or Parquet file from a specified URL or file path.

    Parameters:
        intake (str): The URL or local file path of the file.
        sep (str): The separator used in CSV files (default is ',').

    Returns:
        DataFrame: The loaded data as a pandas DataFrame, or None if an error occurs.
    """
    try:
        # Determine file type based on extension and read accordingly
        if intake.endswith('.csv'):
            data = pd.read_csv(intake, sep=sep)
        elif intake.endswith('.xlsx'):
            data = pd.read_excel(intake)
        elif intake.endswith('.xml'):
            data = pd.read_xml(intake)
        elif intake.endswith('.json'):
            data = pd.read_json(intake)
        elif intake.endswith('.parquet'):
            data = pd.read_parquet(intake)
        else:
            print("Unsupported file type. Only .csv, .xlsx, .xml, .json, and .parquet files are supported.")
            return None
        return data
    except Exception as e:
        print("An error occurred:", e)
        return None

# Examples

## CSV

In [15]:
ReadDB(intake = r'../../../Datasets/Testing/test_csv.csv', sep=',')

Unnamed: 0,Column_1,Column_2,Column_3,Column_4,Column_5
0,2,27,41,5,83
1,85,69,67,73,22
2,41,28,58,75,28
3,93,18,74,81,27
4,10,50,14,28,75
5,51,87,18,77,24
6,32,46,17,65,18
7,68,42,94,12,6
8,59,76,7,16,10
9,26,29,8,40,39


## XLSX

In [16]:
ReadDB(intake = r'../../../Datasets/Testing/test_xlsx.xlsx')

Unnamed: 0,Column_1,Column_2,Column_3,Column_4,Column_5
0,21,90,66,47,18
1,78,18,60,69,99
2,65,66,95,92,39
3,16,9,75,29,97
4,81,9,76,93,16
5,40,63,75,54,94
6,28,76,24,64,68
7,67,16,10,33,48
8,33,26,72,95,47
9,72,63,83,14,31


## PARQUET

In [19]:
ReadDB(intake = r'../../../Datasets/Testing/test_parquet.parquet')

An error occurred: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.


## JSON

In [17]:
ReadDB(intake = r'../../../Datasets/Testing/test_json.json')

Unnamed: 0,Column_1,Column_2,Column_3,Column_4,Column_5
0,56,83,66,80,61
1,55,98,19,58,58
2,53,78,36,46,45
3,22,4,2,99,48
4,81,66,16,68,83
5,66,12,57,57,64
6,49,75,80,18,82
7,61,83,22,30,28
8,92,87,21,80,47
9,82,24,91,31,48


## XML

In [18]:
ReadDB(intake = r'../../../Datasets/Testing/test_xml.xml')

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
0,34,47,30,10,94
1,94,55,55,55,13
2,23,6,14,85,3
3,87,97,96,73,35
4,4,34,58,92,57
5,82,27,28,55,39
6,50,91,75,45,55
7,21,61,85,74,65
8,12,74,47,45,67
9,8,58,26,14,74
