### Loading and Saving Data from the github repo as a local file

In [1]:
import pandas as pd
import os
import logging

In [None]:
# logger configuration
logger = logging.getLogger('mycode')
logger.setLevel(logging.DEBUG)

In [None]:
# using logger at a terminal/console level
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
# formatting the logger
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
# adding log handler
logger.addHandler(console_handler)

In [4]:
def load_data(data_url: str) -> pd.DataFrame:
    """Loading data from a CSV file."""
    try:
        df = pd.read_csv(data_url)
        logger.debug('Data loaded successfully from %s', data_url)
        return df
    except pd.errors.ParserError as e:
        logger.error('Failed to parse the CSV file: %s', e)
        raise
    except Exception as e:
        logger.error('Unexpected error occured while loading the data: %s', e)
        raise

In [5]:
def save_data(df: pd.DataFrame, data_path: str) -> None:
    """Saving csv data to a local folder."""
    try:
        raw_data_path = os.path.join(data_path, 'raw')
        os.makedirs(raw_data_path, exist_ok=True)
        df.to_csv(os.path.join(raw_data_path, "data.csv"), index=False)
        logger.debug('Data successfully saved to %s', raw_data_path)
    except Exception as e:
        logger.error('Unexpected error occur while saving the data: %s', e)
        raise


In [15]:
def main():
    try:
        data_path = "https://raw.githubusercontent.com/JishnudipSaha/Datasets/refs/heads/main/Titanic-Dataset.csv"
        df = load_data(data_url=data_path)
        save_data(df=df, data_path='./data') # this will make a data folder in root
        logger.debug('Data loading and saving successfully completed')
    except Exception as e:
        logger.error('Failed to complete data loading and saving: %s', e)
        print(f"Error: {e}")
        raise
    

if __name__ == '__main__':
    main()

2026-01-26 13:14:27,903 - mycode - DEBUG - Data loaded successfully from https://raw.githubusercontent.com/JishnudipSaha/Datasets/refs/heads/main/Titanic-Dataset.csv
2026-01-26 13:14:27,907 - mycode - DEBUG - Data successfully saved to ./data\raw
2026-01-26 13:14:27,908 - mycode - DEBUG - Data loading and saving successfully completed


In [13]:
data_path = './data/raw/data.csv'
dataFrame = pd.read_csv(data_path)

In [14]:
dataFrame.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
