In [0]:
from pyspark.sql import SparkSession


spark = SparkSession.builder.appName("DataSourceExample").getOrCreate()

class DataSource:
    def __init__(self, path):
        self.path = path

    def get_dataframe(self):
        raise ValueError("Not implemented")

class CSVDataSource(DataSource):
    def get_dataframe(self):
        return spark.read.format("csv").option("header", True).load(self.path)

class ParquetDataSource(DataSource):
    def get_dataframe(self):
        return spark.read.format("parquet").load(self.path)

class DeltaDataSource(DataSource):
    def get_dataframe(self):
        return spark.read.format("delta").load(self.path)


def get_data_source(data_type, file_path):
    if data_type == "csv":
        return CSVDataSource(file_path)
    elif data_type == "parquet":
        return ParquetDataSource(file_path)
    elif data_type == "delta":
        return DeltaDataSource(file_path)
    else:
        raise ValueError(f"Not implemented for data_type: {data_type}")

if __name__ == "__main__":
    data_source = get_data_source("csv", "dbfs:/FileStore/tables/Transaction_Updated.csv")
    df = data_source.get_dataframe()
    df.show()


+--------------+-----------+------------+----------------+
|transaction_id|customer_id|product_name|transaction_date|
+--------------+-----------+------------+----------------+
|            11|        105|      iPhone|      2022-02-01|
|            12|        106|      iPhone|      2022-02-02|
|            13|        107|     AirPods|      2022-02-03|
|            14|        105|     AirPods|      2022-02-04|
|            15|        108|      iPhone|      2022-02-05|
|            16|        106|     MacBook|      2022-02-06|
|            17|        107|      iPhone|      2022-02-07|
|            18|        105|     MacBook|      2022-02-08|
|            19|        108|     AirPods|      2022-02-09|
|            20|        106|     AirPods|      2022-02-10|
+--------------+-----------+------------+----------------+



In [0]:
from pyspark.sql import SparkSession

# Singleton SparkSession
spark = SparkSession.builder.appName("DataSourceExample").getOrCreate()

class DataSource:
    def __init__(self, path):
        self.path = path

    def get_dataframe(self):
        """Abstract method to be implemented by subclasses."""
        raise ValueError("Not implemented")

class CSVDataSource(DataSource):
    def get_dataframe(self):
        try:
            return spark.read.format("csv").option("header", True).option("inferSchema", True).load(self.path)
        except Exception as e:
            print(f"Error loading CSV file {self.path}: {e}")
            return None

class ParquetDataSource(DataSource):
    def get_dataframe(self):
        try:
            return spark.read.format("parquet").load(self.path)
        except Exception as e:
            print(f"Error loading Parquet file {self.path}: {e}")
            return None

class DeltaDataSource(DataSource):
    def get_dataframe(self):
        try:
            return spark.read.format("delta").load(self.path)
        except Exception as e:
            print(f"Error loading Delta table {self.path}: {e}")
            return None

def get_data_source(data_type, file_path):
    """Returns the appropriate data source class based on file type."""
    if data_type == "csv":
        return CSVDataSource(file_path)
    elif data_type == "parquet":
        return ParquetDataSource(file_path)
    elif data_type == "delta":
        return DeltaDataSource(file_path)
    else:
        raise ValueError(f"Not implemented for data_type: {data_type}")

if __name__ == "__main__":
    file_path = "dbfs:/FileStore/tables/Transaction_Updated.csv"
    data_source = get_data_source("csv", file_path)
    
    df = data_source.get_dataframe()
    
    if df is not None:
        df.show()
    else:
        print("Failed to load DataFrame.")


+--------------+-----------+------------+----------------+
|transaction_id|customer_id|product_name|transaction_date|
+--------------+-----------+------------+----------------+
|            11|        105|      iPhone|      2022-02-01|
|            12|        106|      iPhone|      2022-02-02|
|            13|        107|     AirPods|      2022-02-03|
|            14|        105|     AirPods|      2022-02-04|
|            15|        108|      iPhone|      2022-02-05|
|            16|        106|     MacBook|      2022-02-06|
|            17|        107|      iPhone|      2022-02-07|
|            18|        105|     MacBook|      2022-02-08|
|            19|        108|     AirPods|      2022-02-09|
|            20|        106|     AirPods|      2022-02-10|
+--------------+-----------+------------+----------------+

