In [0]:
help(spark.read.parquet)

Help on method parquet in module pyspark.sql.readwriter:

parquet(*paths: str, **options: 'OptionalPrimitiveType') -> 'DataFrame' method of pyspark.sql.readwriter.DataFrameReader instance
    Loads Parquet files, returning the result as a :class:`DataFrame`.
    
    .. versionadded:: 1.4.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    paths : str
    
    Other Parameters
    ----------------
    **options
        For the extra options, refer to
        `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option>`_
        for the version you use.
    
        .. # noqa
    
    Examples
    --------
    Write a DataFrame into a Parquet file and read it back.
    
    >>> import tempfile
    >>> with tempfile.TemporaryDirectory() as d:
    ...     # Write a DataFrame into a Parquet file
    ...     spark.createDataFrame(
    ...         [{"age": 100, "name": "Hyukjin Kwon"}]
    

In [0]:
import getpass
username = getpass.getuser()

In [0]:
dbutils.fs.ls(f'/user/{username}/retail_db_parquet/orders')

[FileInfo(path='dbfs:/user/root/retail_db_parquet/orders/_SUCCESS', name='_SUCCESS', size=0, modificationTime=1697981160000),
 FileInfo(path='dbfs:/user/root/retail_db_parquet/orders/_committed_8179953115895123963', name='_committed_8179953115895123963', size=123, modificationTime=1697981160000),
 FileInfo(path='dbfs:/user/root/retail_db_parquet/orders/_started_8179953115895123963', name='_started_8179953115895123963', size=0, modificationTime=1697981160000),
 FileInfo(path='dbfs:/user/root/retail_db_parquet/orders/part-00000-tid-8179953115895123963-c35ccadf-1cdc-40b7-a979-8e858c75fe85-17-1.c000.snappy.parquet', name='part-00000-tid-8179953115895123963-c35ccadf-1cdc-40b7-a979-8e858c75fe85-17-1.c000.snappy.parquet', size=489027, modificationTime=1697981160000)]

In [0]:
df = spark.read.parquet(f'/user/{username}/retail_db_parquet/orders')

In [0]:
df.inputFiles()

['dbfs:/user/root/retail_db_parquet/orders/part-00000-tid-8179953115895123963-c35ccadf-1cdc-40b7-a979-8e858c75fe85-17-1.c000.snappy.parquet']

In [0]:
df.dtypes

[('order_customer_id', 'bigint'),
 ('order_date', 'string'),
 ('order_id', 'bigint'),
 ('order_status', 'string')]

In [0]:
df.show()

+-----------------+--------------------+--------+---------------+
|order_customer_id|          order_date|order_id|   order_status|
+-----------------+--------------------+--------+---------------+
|            11599|2013-07-25 00:00:...|       1|         CLOSED|
|              256|2013-07-25 00:00:...|       2|PENDING_PAYMENT|
|            12111|2013-07-25 00:00:...|       3|       COMPLETE|
|             8827|2013-07-25 00:00:...|       4|         CLOSED|
|            11318|2013-07-25 00:00:...|       5|       COMPLETE|
|             7130|2013-07-25 00:00:...|       6|       COMPLETE|
|             4530|2013-07-25 00:00:...|       7|       COMPLETE|
|             2911|2013-07-25 00:00:...|       8|     PROCESSING|
|             5657|2013-07-25 00:00:...|       9|PENDING_PAYMENT|
|             5648|2013-07-25 00:00:...|      10|PENDING_PAYMENT|
|              918|2013-07-25 00:00:...|      11| PAYMENT_REVIEW|
|             1837|2013-07-25 00:00:...|      12|         CLOSED|
|         

In [0]:
df = spark.read.format('parquet').load(f'/user/{username}/retail_db_parquet/orders')

In [0]:
df.inputFiles()

['dbfs:/user/root/retail_db_parquet/orders/part-00000-tid-8179953115895123963-c35ccadf-1cdc-40b7-a979-8e858c75fe85-17-1.c000.snappy.parquet']