In [None]:
import os
import pandas as pd
import unittest

def data_extraction(file_path):
    data = pd.read_csv(file_path)
    return data


def data_transformation(data):
    data = data.drop_duplicates()
    data['billing_amount'] = data['billing_amount'].str.replace('$', '').astype(float)
    data['tax_amount'] = data['tax_amount'].astype(float)  # Convert tax_amount to float
    data['total_charges'] = data['billing_amount'] + data['tax_amount']
    return data


def data_loading(data, output_file):
    data.to_csv(output_file, index=False)

class TestDataPipeline(unittest.TestCase):
    def setUp(self):
        self.input_file = 'billing_data.csv'
        self.output_file = 'output.csv'
        self.input_data = pd.DataFrame({
                                        'customer_id': [1, 2, 3],
                                        'billing_amount': ['$100', '$200', '$150'],
                                        'tax_amount': [10, 20, 15]
                                      })



    def tearDown(self):
        # Cleaning up the output file generated during tests
        if os.path.exists(self.output_file):
            os.remove(self.output_file)

    def test_data_extraction(self):
      # Test case 1: Verifying if data is extracted correctly from the CSV file
      expected_result = self.input_data
      result = data_extraction(self.input_file)
      print("Expected Result:")
      print(expected_result)
      print("Actual Result:")
      print(result)
      self.assertTrue(result.equals(expected_result), "Data extraction failed")


    def test_data_transformation(self):
        # Test case 1: Verifying if data transformation is performed correctly
        expected_result = pd.DataFrame({'customer_id': [1, 2, 3],
                                        'billing_amount': [100.0, 200.0, 150.0],
                                        'tax_amount': [10.0, 20.0, 15.0],
                                        'total_charges': [110.0, 220.0, 165.0]})
        result = data_transformation(self.input_data)
        self.assertTrue(result.equals(expected_result), "Data transformation failed")

    def test_data_loading(self):
        # Test case 1: Verifying if data is loaded correctly into the CSV file
        input_data = pd.DataFrame({'customer_id': [1, 2, 3],
                                   'total_charges': [110.0, 220.0, 165.0]})
        expected_result = input_data
        data_loading(input_data, self.output_file)
        result = pd.read_csv(self.output_file)
        self.assertTrue(result.equals(expected_result), "Data loading failed")

if __name__ == '__main__':
    unittest.main()


**I runned the below code on cmd after downloading the above code saving it as "unit_test_project.py"**

pip install pandas --trusted-host pypi.org --trusted-host files.pythonhosted.org

cd C:\Users\YourUsername\Downloads

python unit_test_project.py


**CMD SAMPLE ERROR AFTER TEST:**

C:\Users\SNYABUTO\Downloads>python unit_test_project.py
Expected Result:
   customer_id billing_amount  tax_amount
0            1           $100          10
1            2           $200          20
2            3           $150          15
Actual Result:
   customer_id billing_amount  tax_amount
0            1           $100          10
1            2           $200          20
2            3           $300          30
3            4           $400          40
4            5           $500          50
F..
======================================================================
FAIL: test_data_extraction (__main__.TestDataPipeline)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "C:\Users\SNYABUTO\Downloads\unit_test_project.py", line 55, in test_data_extraction
    self.assertTrue(result.equals(expected_result), "Data extraction failed")
AssertionError: False is not true : Data extraction failed

----------------------------------------------------------------------
Ran 3 tests in 0.022s

FAILED (failures=1)

C:\Users\SNYABUTO\Downloads>