In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.types import StructType
from pyspark.sql.utils import AnalysisException
from py4j.protocol import Py4JJavaError

class Reader:
    """
    Module responsible reading data based on file format and other parameters
    """
    def __init__(self, spark: SparkSession):
        self.spark = spark

    def read_csv(self, path: str, header: bool, schema: StructType = None):
        """
        Reads CSV data with optional schema.

        Parameters:
        path (str): Path to CSV file or directory
        header (bool): Whether CSV has header row
        schema (StructType, optional): Schema to enforce while reading CSV

        Returns:
        DataFrame: PySpark DataFrame

        Raises:
        TypeError: If 'header' is not boolean or schema is wrong type
        FileNotFoundError: If path does not exist
        Py4JJavaError: For general Spark errors
        Exception: For other unexpected errors
        """
        if not isinstance(header, bool):
            raise TypeError("The 'header' parameter must be a boolean (True or False).")

        try:
            reader = self.spark.read.format("csv").option("header", header)

            if schema is not None:
                if not isinstance(schema, StructType):
                    raise TypeError("The 'schema' parameter must be a StructType when provided.")
                reader = reader.schema(schema)
            else:
                # Infer schema if not provided
                reader = reader.option("inferSchema", True)

            return reader.load(path)

        except AnalysisException as e:
            if "path does not exist" in str(e).lower():
                raise FileNotFoundError(f"File or directory not found at path: '{path}'")
            raise Py4JJavaError(f"A PySpark AnalysisException occurred while reading the CSV: {e}")

        except Py4JJavaError as e:
            raise Py4JJavaError(f"A general PySpark error occurred while reading the CSV: {e}")

        except Exception as e:
            raise Exception(f"An unexpected error occurred while reading the CSV: {e}")

    def read_excel(self, path: str, header: bool, schema: StructType = None):
        """
        Reads Excel data into a PySpark DataFrame.

        Parameters:
        path (str): Path to the Excel file.
        header (bool): Whether the first row is a header.
        schema (StructType, optional): Schema for the DataFrame. Defaults to None.

        Returns:
        DataFrame: Loaded DataFrame.

        Raises:
        TypeError: If 'header' is not boolean.
        FileNotFoundError: If the file does not exist at the given path.
        Exception: For other unexpected errors.
        """
        if not isinstance(header, bool):
            raise TypeError("The 'header' parameter must be a boolean (True or False).")

        try:
            reader = self.spark.read.format("com.crealytics.spark.excel") \
                                    .option("header", header)

            # Apply schema only if provided
            if schema is not None:
                reader = reader.schema(schema)

            return reader.load(path)

        except AnalysisException as e:
            if "path does not exist" in str(e).lower():
                raise FileNotFoundError(f"File or directory not found at path: '{path}'")
            raise Exception(f"A PySpark AnalysisException occurred while reading the Excel: {e}")

        except Py4JJavaError as e:
            raise Exception(f"A general PySpark error occurred while reading the Excel: {e}")

        except Exception as e:
            raise Exception(f"An unexpected error occurred while reading the Excel: {e}")



    def read_json(self, path: str, multiLine: bool, schema: StructType = None):
        """
        Reads JSON data into a PySpark DataFrame.

        Parameters:
        path (str): Path to the JSON file.
        multiLine (bool): Whether JSON is multi-line.
        schema (StructType, optional): Schema for the DataFrame. Defaults to None.

        Returns:
        DataFrame: Loaded DataFrame.

        Raises:
        TypeError: If 'multiLine' is not boolean.
        FileNotFoundError: If the file does not exist at the given path.
        Exception: For other unexpected errors.
        """
        if not isinstance(multiLine, bool):
            raise TypeError("The 'multiLine' parameter must be a boolean (True or False).")

        try:
            reader = self.spark.read.format("json") \
                                    .option("multiLine", multiLine)

            # Apply schema only if provided
            if schema is not None:
                reader = reader.schema(schema)

            return reader.load(path)

        except AnalysisException as e:
            if "path does not exist" in str(e).lower():
                raise FileNotFoundError(f"File or directory not found at path: '{path}'")
            raise Exception(f"A PySpark AnalysisException occurred while reading the JSON: {e}")

        except Py4JJavaError as e:
            raise Exception(f"A general PySpark error occurred while reading the JSON: {e}")

        except Exception as e:
            raise Exception(f"An unexpected error occurred while reading the JSON: {e}")
