In [11]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.datasets import load_iris
import pythonDataProcessingLibraryCopy as pdpl


In [13]:

class Main:
    def __init__(self):
        pass

    def test_data_preprocessor(self):
        # Create sample data
        data = pd.DataFrame({
            'A': [1, 2, np.nan, 4],
            'B': [5, np.nan, 7, 8]
        })

        # Test DataPreprocessor
        preprocessor = pdpl.DataPreprocessor(data)
        print("Original Data:")
        print(data)
        print("\nAfter DataPreprocessor:")
        print(preprocessor.data)

    def test_missing_value_handler(self):
        # Create sample data
        data = pd.DataFrame({
            'A': [1, 2, np.nan, 4],
            'B': [5, np.nan, 7, 8]
        })

        # Test MissingValueHandler
        handler = pdpl.MissingValueHandler(data)
        print("Original Data:")
        print(data)
        print("\nAfter Filling Missing Values with Mean:")
        handler.fill_missing_values_with_mean()
        print(handler.data)

    def test_outlier_handler(self):
        # Load sample data (Iris dataset)
        iris = load_iris()
        data = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])

        # Test OutlierHandler
        handler = pdpl.OutlierHandler(data)
        print("Original Data:")
        print(data.head())
        print("\nAfter Handling Outliers:")
        handler.handle_outliers_iqr('sepal length (cm)')
        print(handler.data.head())

    def test_text_cleaner(self):
        # Create sample data
        data = pd.DataFrame({
            'text': ['Hello, World!', 'How are you?', 'This is a test.']
        })

        # Test TextCleaner
        cleaner = pdpl.TextCleaner(data)
        print("Original Data:")
        print(data)
        print("\nAfter Text Cleaning:")
        cleaner.remove_punctuation('text')
        print(cleaner.data)

    def test_feature_engineer(self):
        # Create sample data
        data = pd.DataFrame({
            'product_price': [10, 20, 30],
            'quantity': [1, 2, 3],
            'age': [25, 35, 45],
            'individual_sales': [100, 200, 300]
        })

        # Test FeatureEngineer
        engineer = pdpl.FeatureEngineer(data)
        print("Original Data:")
        print(data)
        print("\nAfter Feature Engineering:")
        engineer.create_new_features()
        print(engineer.data)

    def test_categorical_encoder(self):
        # Create sample data
        data = pd.DataFrame({
            'category': ['A', 'B', 'A', 'C', 'B']
        })

        # Test CategoricalEncoder
        encoder = pdpl.CategoricalEncoder(data)
        print("Original Data:")
        print(data)
        print("\nAfter One-Hot Encoding:")
        encoder.one_hot_encode(['category'])
        print(encoder.data)

    def test_date_time_handler(self):
        # Create sample data
        data = pd.DataFrame({
            'date_time': ['2022-01-01 08:00:00', '2022-01-02 09:30:00', '2022-01-03 10:45:00']
        })

        # Test DateTimeHandler
        handler = pdpl.DateTimeHandler(data)
        print("Original Data:")
        print(data)
        print("\nAfter Extracting Date Features:")
        handler.extract_date_features('date_time')
        print(handler.data)

if __name__ == "__main__":
    main = Main()
    main.test_data_preprocessor()
    main.test_missing_value_handler()
    main.test_outlier_handler()
    main.test_text_cleaner()
    main.test_feature_engineer()
    main.test_categorical_encoder()
    main.test_date_time_handler()


Original Data:
     A    B
0  1.0  5.0
1  2.0  NaN
2  NaN  7.0
3  4.0  8.0

After DataPreprocessor:
     A    B
0  1.0  5.0
1  2.0  NaN
2  NaN  7.0
3  4.0  8.0
Original Data:
     A    B
0  1.0  5.0
1  2.0  NaN
2  NaN  7.0
3  4.0  8.0

After Filling Missing Values with Mean:
An error occurred while filling missing values with mean: NDFrame.fillna() got an unexpected keyword argument 'subset'
     A    B
0  1.0  5.0
1  2.0  NaN
2  NaN  7.0
3  4.0  8.0
Original Data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0     0.0  
1     0.0  
2     0.0  
3     0