##### **Loading a Dataset**

In [None]:
# importing required libraries
import pandas as pd
import logging
import os
import sys

# Configuring logging
log_file = 'LOG_loading_dataset.log'
logging.basicConfig(
    filename=log_file,
    filemode='a',
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO,
    encoding='utf-8',
    errors='ignore'
)

def prompt_file_path() -> str:
    """
    Prompt the user to enter a file path or quit the program.

    return:
        str: A valid file path entered bhy the user.
    """
    
    while True:
        file_path = input('Please enter a valid file path or "q" to exit.').strip()
        
        if file_path.lower() == 'q':
            logging.info('User chosen to exit.')
            sys.exit(1)
        
        if file_path:
            logging.info('A valid file path has been entered.')
            return os.path.abspath(file_path)
        logging.warning('The file path is empty. Please provide a valid file path.')    

def validate_file_path(file_path: str) -> str:
    """
    Validate if the provided file path exists. If not, prompt the user for a valid file path.

    Args:
        file_path (str): The initial file path.

    Returns:
        str: A valid file path that exists.
    """
    
    while not os.path.exists(file_path):
        logging.error(f'File not found at: "{file_path}". Please try again.')
        file_path = prompt_file_path()
    
    logging.info(f'File found at: {file_path}.')
    return file_path
    
def load_dataset(file_path: str) -> pd.DataFrame:
    """
    Load the dataset from the specified file path into a Pandas DataFrame.

    Args:
        file_path (str): Path to the dataset file.

    Returns:
        pd.DataFrame: Loaded dataset as a DataFrame.
    """
    
    while True:
        try:
            valid_path = validate_file_path(file_path)
            df = pd.read_csv(filepath_or_buffer=valid_path, encoding='utf-8')
            logging.info(f'Dataset successfully loaded from: {valid_path}.')
            return df
        except pd.errors.EmptyDataError:
            logging.error(f'The file is empty. Please provide a valid dataset file.' )
        except pd.errors.ParserError:
            logging.error('Error occured while parsing the file. Please check the file format.')
        except Exception as e:
            logging.error(f'Error occurred while reading the dataset: {e}.')
            sys.exit(1)
            
        file_path = prompt_file_path()

In [2]:
# Example Usage

if __name__ == '__main__':
    logging.info('Starting dataset loading process...')
    initial_path = './data/titanic.csv'
    dataset = load_dataset(initial_path)
    logging.info('Dataset loaded successfully.')
    
dataset.head()




Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
