### Check AWS - local connection and data pulling capacity

In [1]:
import boto3
import pandas as pd
import os

In [42]:
#os.environ["AWS_ACCESS_KEY_ID"]=""
#os.environ["AWS_SECRET_ACCESS_KEY"]=""


In [46]:
s3=boto3.resource(
    service_name="s3",
    region_name='ap-southeast-2',
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)

In [47]:
s3.Bucket("blackflame11").download_file(Key="Transaction.csv",Filename="D:/Silent_Night/mlops/AML_Classification/research/test_data.csv")
#Conclusion: Data Pull from AWS is successful

#### Modular Coding Approach

In [5]:
import os

In [2]:
%pwd
#Conclusion: Move back to AML_Classification folder to access the configration file more easyly

'd:\\Silent_Night\\mlops\\AML_Classification\\research'

In [3]:
#Move back
os.chdir("../")

In [4]:
%pwd

'd:\\Silent_Night\\mlops\\AML_Classification'

In [11]:
#Entity
from dataclasses import dataclass 
from pathlib import Path

@dataclass(frozen=True)# To ensure that we give correct type of input as specified for the data ingestion process.
class DataIngestionConfig:
    root_dir: Path
    service: str
    region: str
    bucket_name: str
    aws_file: str
    download_path: Path
    aws_access_key_id: str
    aws_secret_access_key: str

In [12]:
from AML_Classifier.constants import CONFIG_FILE_PATH,PARAMS_FILE_PATH
from AML_Classifier.utils.common import read_yaml, create_directories

In [7]:
import yaml

In [76]:
with open('secrets.yaml', 'r') as file:
    prime_service = yaml.safe_load(file)

In [16]:
#Configration Manger

from AML_Classifier.constants.__init__ import CONFIG_FILE_PATH,PARAMS_FILE_PATH
from AML_Classifier.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH):
        self.config= read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])



    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        #Create file path to store the dataset
        create_directories([config.root_dir])
        #Load the AWs connection info
        with open('secrets.yaml', 'r') as file:
            aws = yaml.safe_load(file)

        data_ingestion_config= DataIngestionConfig(
            root_dir = config.root_dir,
            service = config.service,
            region = config.region,
            bucket_name = config.bucket_name,
            aws_file = config.aws_file,
            download_path = config.download_path,
            aws_access_key_id = aws["aws_access_key_id"],
            aws_secret_access_key= aws['aws_secret_access_key']
        )
        return data_ingestion_config

In [18]:
import boto3
import os
from AML_Classifier import logger
import pandas as pd

In [14]:
#Compontent

class DataIngestion:
    def __init__(self,config: DataIngestionConfig):
        self.config = config    

    
    def download_file(self)->str:
        try:
            #Create the s3 object
            s3=boto3.resource(
                service_name=self.config.service,
                region_name=self.config.region,
                aws_access_key_id=self.config.aws_access_key_id,
                aws_secret_access_key=self.config.aws_secret_access_key
                )
            
            #download the data
            s3.Bucket(self.config.bucket_name).download_file(Key=self.config.aws_file,Filename=self.config.download_path)

            #Log
            logger.info(f"Downloaded data from {self.config.bucket_name} AWS bucket into the path {self.config.download_path}")
        
        except Exception as e:
            raise e
            

In [19]:
#pipeline
try:
    config=ConfigurationManager()
    data_ingestion_config= config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
except Exception as e:
    raise e

[2024-06-30 09:33:21,567: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-30 09:33:21,575: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-30 09:33:21,579: INFO: common: created directory at: artifacts]
[2024-06-30 09:33:21,582: INFO: common: created directory at: artifacts/data_ingestion]


[2024-06-30 09:33:25,319: INFO: 3980503791: Downloaded data from blackflame11 AWS bucket into the path artifacts/data_ingestion/data.csv]
