In [1]:
# mount google drive and export your work
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import unittest
from unittest.mock import patch
from unittest import mock
import requests
import pandas as pd
import numpy as np

class CustomTestResult(unittest.TestResult):
    def addSuccess(self, test):
        super().addSuccess(test)
        print(f"Test Passed: {test.id()}")

    def addFailure(self, test, err):
        super().addFailure(test, err)
        print(f"Test Failed: {test.id()}")

# Task I - Multiply Numbers Testing

In [5]:
def multiply_numbers(a, b):
    if (isinstance(a, int) or isinstance(a,float)) and (isinstance(b,int) or isinstance(b,float)):
        return a * b
    else:
      raise TypeError(" A and B need to be int or float")

class TestMultiplyNumbers(unittest.TestCase):

    def test_positive_numbers(self):
        self.assertEqual(multiply_numbers(3, 4), 12)
        print("two positive numbers and checks if the result is correct")

    def test_positive_with_zero(self):
        self.assertEqual(multiply_numbers(5, 0), 0)
        print("a positive number with zero and verifies if the result is zero")

    def test_positive_with_negative(self):
        self.assertEqual(multiply_numbers(-2, 3), -6)
        print("Sign is Correct")

    def test_negative_numbers(self):
        self.assertEqual(multiply_numbers(-4, -2), 8)
        print("Two Negatives make a Positive")

    def test_type_error(self):
        with self.assertRaises(TypeError):
            multiply_numbers("a", 5)

unittest.main(testRunner=unittest.TextTestRunner(resultclass=CustomTestResult),
              argv=['first-arg-is-ignored'],
              exit=False)

Ran 12 tests in 0.010s

OK


Duplicates correct
Test Passed: __main__.TestFindMax.test_duplicate_numbers
None is returned
Test Passed: __main__.TestFindMax.test_empty_list
max positive negative
Test Passed: __main__.TestFindMax.test_mixed_numbers
Negative Maximim
Test Passed: __main__.TestFindMax.test_negative_numbers
positive Maximum
Test Passed: __main__.TestFindMax.test_positive_numbers
Correct Result
Test Passed: __main__.TestFindMax.test_single_number
Test Passed: __main__.TestFindMax.test_type_error
Two Negatives make a Positive
Test Passed: __main__.TestMultiplyNumbers.test_negative_numbers
two positive numbers and checks if the result is correct
Test Passed: __main__.TestMultiplyNumbers.test_positive_numbers
Sign is Correct
Test Passed: __main__.TestMultiplyNumbers.test_positive_with_negative
a positive number with zero and verifies if the result is zero
Test Passed: __main__.TestMultiplyNumbers.test_positive_with_zero
Test Passed: __main__.TestMultiplyNumbers.test_type_error


<unittest.main.TestProgram at 0x7d09c180ff70>

# Task II - Find Max Testing

In [4]:
def find_max(numbers):
    if not isinstance(numbers, list):
        raise TypeError("Input must be a list.")
    if not numbers:
        return None
    max_num = numbers[0]
    for num in numbers:
        if num > max_num:
            max_num = num
    return max_num

class TestFindMax(unittest.TestCase):

    def test_empty_list(self):
        self.assertIsNone(find_max([]))
        print("None is returned")

    def test_single_number(self):
        self.assertEqual(find_max([5]), 5)
        print("Correct Result")

    def test_positive_numbers(self):
        self.assertEqual(find_max([1, 2, 3, 4, 5]), 5)
        print("positive Maximum")

    def test_negative_numbers(self):
        self.assertEqual(find_max([-1, -2, -3, -4, -5]), -1)
        print("Negative Maximim")

    def test_mixed_numbers(self):
        self.assertEqual(find_max([-5, 2, -3, 8, -1]), 8)
        print("max positive negative")

    def test_duplicate_numbers(self):
        self.assertEqual(find_max([3, 3, 3, 3, 3]), 3)
        print("Duplicates correct")

    def test_type_error(self):
        with self.assertRaises(TypeError):
            find_max("not_a_list")

if __name__ == '__main__':
    unittest.main(testRunner=unittest.TextTestRunner(resultclass=CustomTestResult),
              argv=['first-arg-is-ignored'],
              exit=False)

Ran 12 tests in 0.005s

OK


Duplicates correct
Test Passed: __main__.TestFindMax.test_duplicate_numbers
None is returned
Test Passed: __main__.TestFindMax.test_empty_list
max positive negative
Test Passed: __main__.TestFindMax.test_mixed_numbers
Negative Maximim
Test Passed: __main__.TestFindMax.test_negative_numbers
positive Maximum
Test Passed: __main__.TestFindMax.test_positive_numbers
Correct Result
Test Passed: __main__.TestFindMax.test_single_number
Test Passed: __main__.TestFindMax.test_type_error
Two Negatives make a Positive
Test Passed: __main__.TestMultiplyNumbers.test_negative_numbers
Result is Correct
Test Passed: __main__.TestMultiplyNumbers.test_positive_numbers
Sign is Correct
Test Passed: __main__.TestMultiplyNumbers.test_positive_with_negative
Result is zero
Test Passed: __main__.TestMultiplyNumbers.test_positive_with_zero
Test Passed: __main__.TestMultiplyNumbers.test_type_error


# Task III - Code Refactoring

In [6]:
import pandas as pd

def check_is_dataframe(df):
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Input must be a Pandas DataFrame.")
    return df

def check_has_price_column(df):
    if 'price' not in df.columns:
        raise ValueError("DataFrame must contain a column named 'price'.")
    return df

def check_price_type(df):
    if df['price'].dtype != float:
        raise TypeError("Column 'price' must be of float type.")
    return df

def remove_negative_prices(df):
    df = df[df['price'] >= 0].copy()
    return df

def check_mean_and_std_deviation(df):
    mean_price = df['price'].mean()
    std_price = df['price'].std()
    if not (4 <= round(mean_price, 2) <= 6):
        raise ValueError("Mean price should be approximately 5.")
    if std_price > 1:
        raise ValueError("Standard deviation should not exceed +-1.")
    return df

def mean_center_data(df):
    df['price'] = df['price'] - df['price'].mean()
    return df

def check_dataframe(df):
    df = check_is_dataframe(df)
    df = check_has_price_column(df)
    df = check_price_type(df)
    df = remove_negative_prices(df)
    df = check_mean_and_std_deviation(df)
    df = mean_center_data(df)
    return df

class TestDataChecks(unittest.TestCase):

    def setUp(self):
        self.passing_df = pd.DataFrame({'price': [4.5, 5.2, 4.8, 5.5, 5.1]})
        self.failing_df_type = [4.5, 5.2, 4.8, 5.5, 5.1]

    def test_is_dataframe(self):
        with self.assertRaises(TypeError):
            check_is_dataframe(self.failing_df_type)

    def test_has_price_column(self):
        with self.assertRaises(ValueError):
            check_has_price_column(pd.DataFrame({'no_price': [1, 2, 3]}))

    def test_price_type(self):
        with self.assertRaises(TypeError):
            check_price_type(pd.DataFrame({'price': ['a', 'b', 'c']}))

    def test_remove_negative_prices(self):
        cleaned_df = remove_negative_prices(self.passing_df)
        self.assertTrue(cleaned_df.equals(self.passing_df))

    def test_mean_and_std_deviation(self):
        with self.assertRaises(ValueError):
            check_mean_and_std_deviation(pd.DataFrame({'price': [1, 2, 3]}))

    def test_mean_center_data(self):
        centered_df = mean_center_data(self.passing_df)
        self.assertAlmostEqual(centered_df['price'].mean(), 0)
        print("Centering of Data was success")

if __name__ == '__main__':
    unittest.main(testRunner=unittest.TextTestRunner(resultclass=CustomTestResult),
              argv=['first-arg-is-ignored'],
              exit=False)

Ran 18 tests in 0.012s

OK


Test Passed: __main__.TestDataChecks.test_has_price_column
Test Passed: __main__.TestDataChecks.test_is_dataframe
Test Passed: __main__.TestDataChecks.test_mean_and_std_deviation
Centering of Data was success
Test Passed: __main__.TestDataChecks.test_mean_center_data
Test Passed: __main__.TestDataChecks.test_price_type
Test Passed: __main__.TestDataChecks.test_remove_negative_prices
Duplicates correct
Test Passed: __main__.TestFindMax.test_duplicate_numbers
None is returned
Test Passed: __main__.TestFindMax.test_empty_list
max positive negative
Test Passed: __main__.TestFindMax.test_mixed_numbers
Negative Maximim
Test Passed: __main__.TestFindMax.test_negative_numbers
positive Maximum
Test Passed: __main__.TestFindMax.test_positive_numbers
Correct Result
Test Passed: __main__.TestFindMax.test_single_number
Test Passed: __main__.TestFindMax.test_type_error
Two Negatives make a Positive
Test Passed: __main__.TestMultiplyNumbers.test_negative_numbers
two positive numbers and checks if the

# Task IV - Data Transformation Pipeline

In [7]:


def clean_dataframe(df):
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Input must be a Pandas DataFrame.")

    df['price'] = df['price'].round(2)
    df = df.dropna(subset=['price'])
    # Drop rows with negative values
    df = df[df['price'] >= 0]

    return df

import unittest

class TestDataTransformationPipeline(unittest.TestCase):

    def setUp(self):
        self.df = pd.DataFrame({"price": {"0": 1983.578, "1": 3838, "2": 1759, "3": np.nan, "4": 2626.73, "5": -7325.10},
                                "product name": {"0": "Cloud Trace", "1": "Cloud DNS", "2": "Cloud SQL",
                                                 "3": "Cloud Dataflow", "4": "Cloud Dataflow", "5": "Cloud Security Scanner"}})
        self.expected_result = pd.DataFrame({"price": {"0": 1983.58, "1": 3838.00, "2": 1759.00, "4": 2626.73},
                                             "product name": {"0": "Cloud Trace", "1": "Cloud DNS", "2": "Cloud SQL",
                                                              "4": "Cloud Dataflow"}})

    def test_clean_dataframe(self):
        cleaned_df = clean_dataframe(self.df)
        self.assertTrue(cleaned_df.equals(self.expected_result))

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

...................
----------------------------------------------------------------------
Ran 19 tests in 0.059s

OK


Centering of Data was success
Duplicates correct
None is returned
max positive negative
Negative Maximim
positive Maximum
Correct Result
Two Negatives make a Positive
two positive numbers and checks if the result is correct
Sign is Correct
a positive number with zero and verifies if the result is zero


In [8]:
def round_prices(df):
    df['price'] = df['price'].round(2)
    return df

def drop_nan_values(df):
    df = df.dropna(subset=['price'])
    return df

def drop_negative_values(df):
    df = df[df['price'] >= 0]
    return df

def clean_dataframe(df):
  try:
      df = round_prices(df)
      df = drop_nan_values(df)
      df = drop_negative_values(df)
      df

      return df

  except Exception as e:
    print(f"Pipeline failed with error: {e}")
    return -5

class TestDataTransformationPipeline(unittest.TestCase):

      def setUp(self):
        self.df = pd.DataFrame({"price": {"0": 1983.578, "1": 3838, "2": 1759, "3": np.nan, "4": 2626.73, "5": -7325.10},
                                "product name": {"0": "Cloud Trace", "1": "Cloud DNS", "2": "Cloud SQL",
                                                 "3": "Cloud Dataflow", "4": "Cloud Dataflow", "5": "Cloud Security Scanner"}})
        self.expected_result = pd.DataFrame({"price": {"0": 1983.58, "1": 3838.00, "2": 1759.00, "4": 2626.73},
                                             "product name": {"0": "Cloud Trace", "1": "Cloud DNS", "2": "Cloud SQL",
                                                              "4": "Cloud Dataflow"}})

      def test_clean_dataframe(self):
        cleaned_df = clean_dataframe(self.df)
        self.assertTrue(cleaned_df.equals(self.expected_result))


if __name__ == '__main__':
    unittest.main(testRunner=unittest.TextTestRunner(resultclass=CustomTestResult),
              argv=['first-arg-is-ignored'],
              exit=False)

Ran 19 tests in 0.064s

OK


Test Passed: __main__.TestDataChecks.test_has_price_column
Test Passed: __main__.TestDataChecks.test_is_dataframe
Test Passed: __main__.TestDataChecks.test_mean_and_std_deviation
Centering of Data was success
Test Passed: __main__.TestDataChecks.test_mean_center_data
Test Passed: __main__.TestDataChecks.test_price_type
Test Passed: __main__.TestDataChecks.test_remove_negative_prices
Test Passed: __main__.TestDataTransformationPipeline.test_clean_dataframe
Duplicates correct
Test Passed: __main__.TestFindMax.test_duplicate_numbers
None is returned
Test Passed: __main__.TestFindMax.test_empty_list
max positive negative
Test Passed: __main__.TestFindMax.test_mixed_numbers
Negative Maximim
Test Passed: __main__.TestFindMax.test_negative_numbers
positive Maximum
Test Passed: __main__.TestFindMax.test_positive_numbers
Correct Result
Test Passed: __main__.TestFindMax.test_single_number
Test Passed: __main__.TestFindMax.test_type_error
Two Negatives make a Positive
Test Passed: __main__.TestMu

In [10]:
# replace ###### with your file name
# make sure you have your google drive mounted.

!cp "/content/drive/MyDrive/Colab Notebooks/A6_Vu_Nguyen.ipynb" ./
!jupyter nbconvert --to html "A6_Vu_Nguyen.ipynb"

[NbConvertApp] Converting notebook A6_Vu_Nguyen.ipynb to html
[NbConvertApp] Writing 630432 bytes to A6_Vu_Nguyen.html
