<a href="https://colab.research.google.com/github/SravanGatla/AWS-Snowflake-DataPipeline/blob/main/testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import boto3
import pandas as pd

class S3Fetcher:
    @staticmethod
    def fetch_csv_data(bucket_name, file_key):
        s3 = boto3.client('s3')
        response = s3.get_object(Bucket=bucket_name, Key=file_key)
        content = response['Body'].read().decode('utf-8')
        return pd.read_csv(pd.compat.StringIO(content))

    @staticmethod
    def fetch_json_data(bucket_name, file_key):
        s3 = boto3.client('s3')
        response = s3.get_object(Bucket=bucket_name, Key=file_key)
        content = response['Body'].read().decode('utf-8')
        return pd.read_json(pd.compat.StringIO(content))

In [None]:
import pytest
from your_module import CSVDataExtractor
from S3Fetcher import S3Fetcher

class TestCSVDataExtractor:
    @pytest.fixture
    def example_csv_data(self):
        return S3Fetcher.fetch_csv_data('your-test-bucket-name', 'test-file.csv')

    def test_extract_data(self, example_csv_data):
        extractor = CSVDataExtractor('test-bucket', 'test-file.csv')
        result = extractor.extract_data()

        assert len(result) == 1
        assert result[0].equals(example_csv_data)

if __name__ == "__main__":
    pytest.main()


In [None]:
import pytest
from your_module import JSONDataExtractor
from S3Fetcher import S3Fetcher

class TestJSONDataExtractor:
    @pytest.fixture
    def example_json_data(self):
        return S3Fetcher.fetch_json_data('your-test-bucket-name', 'test-file.json')

    def test_extract_data(self, example_json_data):
        extractor = JSONDataExtractor('test-bucket', 'test-file.json')
        result = extractor.extract_data()

        assert len(result) == 1
        assert result[0].equals(example_json_data)

if __name__ == "__main__":
    pytest.main()


In [None]:
import pytest
from your_module import DataProcessor
from S3Fetcher import S3Fetcher

class TestDataProcessor:
    @pytest.fixture
    def example_data(self):
        return S3Fetcher.fetch_csv_data('your-test-bucket-name', 'test-file.csv')

    def test_process_and_analyze_data(self, example_data):
        processor = DataProcessor(example_data)
        result = processor.process_and_analyze_data()

        assert result['age'].dtype == int
        assert result['date'].dtype == 'datetime64[ns]'

if __name__ == "__main__":
    pytest.main()


In [None]:
import pytest
from your_module import DataMasker
from S3Fetcher import S3Fetcher

class TestDataMasker:
    @pytest.fixture
    def example_data(self):
        return S3Fetcher.fetch_csv_data('your-test-bucket-name', 'test-file.csv')

    def test_mask_sensitive_data(self, example_data):
        masker = DataMasker([example_data])
        result = masker.mask_sensitive_data()

        assert len(result) == 1
        assert result[0].equals(example_data)

if __name__ == "__main__":
    pytest.main()


In [None]:
import pytest
from your_module import SnowflakeLoader
from S3Fetcher import S3Fetcher
from unittest.mock import patch, MagicMock

class TestSnowflakeLoader:
    @pytest.fixture
    def example_data(self):
        return S3Fetcher.fetch_csv_data('your-test-bucket-name', 'test-file.csv')

    @patch('snowflake.connector.connect')
    def test_load_data_to_snowflake(self, mock_connect, example_data):
        mock_cursor = MagicMock()
        mock_connect.return_value.cursor.return_value = mock_cursor

        loader = SnowflakeLoader([example_data], {}, 'test_table')
        loader.load_data_to_snowflake()

        assert mock_cursor.execute.called
        assert mock_cursor.commit.called

if __name__ == "__main__":
    pytest.main()


In [None]:
import pytest
from your_module import DataProcessor, DataMasker, SnowflakeLoader
from S3Fetcher import S3Fetcher
from unittest.mock import patch

class TestParallelProcessing:
    @pytest.fixture
    def example_data(self):
        return S3Fetcher.fetch_csv_data('your-test-bucket-name', 'test-file.csv')

    def test_process_data_in_parallel(self, example_data):
        processor = DataProcessor(example_data)
        result = processor.process_data_in_parallel()
        assert result['age'].dtype == int
        assert result['date'].dtype == 'datetime64[ns]'

    def test_mask_data_in_parallel(self, example_data):
        masker = DataMasker([example_data])
        result = masker.mask_data_in_parallel()
        assert len(result) == 1
        assert result[0].equals(example_data)

    @patch('snowflake.connector.connect')
    def test_load_data_in_parallel(self, mock_connect, example_data):
        mock_cursor = MagicMock()
        mock_connect.return_value.cursor.return_value = mock_cursor

        loader = SnowflakeLoader([example_data], {}, 'test_table')
        loader.load_data_in_parallel()

        assert mock_cursor.execute.called
        assert mock_cursor.commit.called

if __name__ == "__main__":
    pytest.main()
