In [None]:
import pandas as pd
import numpy as np





class  DataFrameTransform:
    '''
    This class contains method that will be used to perform EDA tranformation 
    of dataframes data
    
    methods:
    ---------
    imput_null()
    
    tw_min()
    
    tran_col()
    
    drop_col()
    
    save_df()
    
    map_type()
    
    map_pro_id()
    
    rm_rpm_outl()
    
    rm_tq_outl()
    
    range_seldfcol()
    
    range_sel_df_col_H()
    
    range_sel_df_col_M()
    
    range_sel_df_col_L()
    
    
    
    
    
    '''
    
    @staticmethod
    def imput_null(df):
        '''
        this method imput the null values in  dataframe columns with the mean of the 
        dataframe column values
        (task3, step 3)
        
        parameters:
        ---------
        df:dataframe
        
        '''
        df.fillna({'Air temperature [K]':df['Air temperature [K]'].mean(),'Process temperature [K]': 
            df['Process temperature [K]'].mean(), 'Tool wear [min]': df['Tool wear [min]'].mean()}, inplace=True)
        return df
    
    @staticmethod
    def tw_min(df):
        '''
        this method transforms the dataframe column 'Tool wear [min]' to 
        minutes from seconds
        
        parameters:
        ----------
        df:dataframe
        '''
        df['Tool wear [min]'] = df['Tool wear [min]'] / 60
        return df
    
    @staticmethod
    def tran_col(df):
        ''''
        this method is used to transform the dataframe column, 'Rotational speed [rpm]'  to
        reduce its skewness
        
        parameters:
        ---------
        df:dataframe
        
        '''
        log_trans = df['Rotational speed [rpm]'].map(lambda i: np.log(i) if i > 0 else 0)
        df['Rotational speed [rpm]'] = log_trans
        return df
    
    @staticmethod
    def map_type(df):
        '''
        In the df i noticed the column 'Tool wear [min]' does not corrrelate with 
        the the product quality type H:M:L which should have this value of 'Tool wear [min]'
        5:3:2 respectively.
        This method corrects that by returning the correct product quality type for the correct
        'Tool wear [min]' values. This is done by mapping the function map_tl()  to the 'Tool wear [min]'
        column
        
        parameters:
        ---------
        df:dataframe
        'Type': Quality of the product being created 
                (L, M, or H, for low, medium and high quality products)
        'Tool wear [min]':The current minutes of wear on the tool. H, M and L product
                          manufacturing cause 5/3/2 minutes of tool wear.
        methods:
        -------
        map_tl()
        '''
        def map_tl(x):
            '''
            this method is used to iterate into the 'Tool wear [min]' column to return
            H:M:L for the 'Tool wear [min]' values 5:3:2 respectively
            
            parameters:
            ---------
            x:dataframe column 'Tool wear [min]' values
            '''
            if 5 >= x >= 3:
                return 'H'
            elif 3 >= x >= 2:
                return 'M'
            elif 2 >= x >= 0:
                return 'L'
                pass
            
        df['Type'] = df['Tool wear [min]'].map(map_tl)
        return df
        
    @staticmethod    
    def map_pro_id(df):
        '''
        This method correct the non numeric part of the 'Product ID' column which is the same as 
        the column 'Type' by replacing it with the modified df['Type']
        
        parameters
        --------
        df:dataframe
        'Product ID':Product specific serial number column
        'Type':Product quality type
        '''
        df['Product ID']= df['Type'].astype("str") + df['Product ID'].astype("str").str.slice(1)
        return df    
        
    @staticmethod
    def drop_col(df, col):
        '''
        This method is used to drop unwanted columns of the dataframe
        
        parameters:
        ---------
        df:dataframe
        col:dataframe column to be dropped
        '''
        df.drop(columns=[col], inplace=True)
        return df
    
    
    @staticmethod
    def rm_rpm_outl(df):
        '''
        This method is used to remove the outliers from the 'Rotational speed [rpm]'column
        of the dataframe
        
        parameters:
        ----------
        df:column dataframe
        '''
        df.where(df['Rotational speed [rpm]'] <= 1830).dropna()
        return df
    
    
    @staticmethod
    def rm_tq_outl(df):
        '''
        This method is used to remove the outliers from the ['Torque [Nm]' column
        of the dataframe
        
        parameters:
        ----------
        df:column dataframe
        '''
        df.where(df['Torque [Nm]'] <= 66).dropna()
        return df
    
    
    
    @staticmethod
    def save_df(df):
        '''
        This method is used to save Transformed data of the dataframe to your local
        machine
        
        parameters:
        df:dataframe to be saved
        'new_df.csv': saved dataframe in the your local machine
        '''
        df.to_csv('new_df.csv', index=False)
    
    
    
    @staticmethod
    def range_seldfcol(df):
        '''
        returns a dataframe of the min and max values of of the selected dataframe columns
        
        parameters:
        ----------
        df: dataframe
        '''
        return df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 
               'Torque [Nm]','Tool wear [min]']].agg(['min', 'max'])
    
    @staticmethod
    def range_sel_df_col_H(df):
        '''
        returns a dataframe of the min and max values of of the selected dataframe columns grouped by column Type 'H'
        
        '''
        return df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 
               'Torque [Nm]','Tool wear [min]']][df['Type'] == 'H'].agg(['min', 'max'])                                                                                            
    
    @staticmethod
    def range_sel_df_col_M(df):
        '''
        returns a dataframe of the min and max values of of the selected dataframe columns grouped by column Type 'M'
        
        '''
        return df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 
                'Torque [Nm]','Tool wear [min]']][df['Type'] == 'M'].agg(['min', 'max'])
    
    @staticmethod
    def range_sel_df_col_L(df):
        '''
        returns a dataframe of the min and max values of of the selected dataframe columns grouped by column Type 'L'
        
        '''
        return df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 
              'Torque [Nm]','Tool wear [min]']][df['Type'] == 'L'].agg(['min', 'max'])

In [None]:

    
if __name__ == "__main__":   
    transfm = DataFrameTransform()

    transfm.imput_null(df)
    transfm.tw_min(df)
    transfm.map_type(df)
    transfm.map_pro_id(df)
    transfm.rm_rpm_outl(df)
    transfm.rm_tq_outl(df)
    transfm.save_df(df)
    print(transfm.range_seldfcol(df))
    print(transfm.range_sel_df_col_H(df))
    print(transfm.range_sel_df_col_M(df))
    print(transfm.range_sel_df_col_L(df))
