# Create a Pipeline in Pandas


In [2]:
# importing pandas library
import pandas as pd
 
# Create empty dataframe
df = pd.DataFrame()
 
# Creating a simple dataframe
df['name'] = ['Ali', 'Shyam', 'Noor',
              'Asma', 'Fatema', 'Ahmed']
df['major'] = ['IT', 'Math', 'IT',
                'IT', 'CS', 'CS']
df['age'] = [31, 32, 19, 23, 28, 33]
 
# View dataframe
df

Unnamed: 0,name,major,age
0,Ali,IT,31
1,Shyam,Math,32
2,Noor,IT,19
3,Asma,IT,23
4,Fatema,CS,28
5,Ahmed,CS,33


### Now, creating functions for data processing.

In [4]:
# function to find mean
def mean_age_by_group(dataframe, col):

    # groups the data by a column and
    # returns the mean age per group
    return dataframe.groupby(col).mean()

# function to convert to uppercase
def uppercase_column_name(dataframe):

    # Converts all the column names into uppercase
    dataframe.columns = dataframe.columns.str.upper()

    # And returns them
    return dataframe


### Now, creating a pipeline using .pipe() function.


In [5]:
# Create a pipeline that applies both the functions created above
pipeline = df.pipe(mean_age_by_group, col='major').pipe(uppercase_column_name)

# calling pipeline
pipeline


  return dataframe.groupby(col).mean()


Unnamed: 0_level_0,AGE
major,Unnamed: 1_level_1
CS,30.5
IT,24.333333
Math,32.0


In [6]:
pip install pdpipe

Collecting pdpipe
  Downloading pdpipe-0.3.2-py3-none-any.whl (119 kB)
     -------------------------------------- 120.0/120.0 kB 3.5 MB/s eta 0:00:00
Collecting birch>=0.0.34
  Downloading birch-0.0.35-py3-none-any.whl (15 kB)
Collecting strct
  Downloading strct-0.0.32-py2.py3-none-any.whl (16 kB)
Collecting skutil>=0.0.15
  Downloading skutil-0.0.18-py2.py3-none-any.whl (21 kB)
Collecting decore
  Downloading decore-0.0.1.tar.gz (19 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: decore
  Building wheel for decore (setup.py): started
  Building wheel for decore (setup.py): finished with status 'done'
  Created wheel for decore: filename=decore-0.0.1-py2.py3-none-any.whl size=4222 sha256=730423e1a9184dfb6ad07173942572003b753842f81c6852721158bf1a43dfd7
  Stored in directory: c:\users\hp\appdata\local\pip\cache\wheels\dc\10\28\e89d09a0b2d721ffd8ca99c3e642a9c74f8fed0cdbcb9f5ef0
Successfully

In [7]:
# importing the package
import pdpipe as pdp
import pandas as pd

# creating a empty dataframe named dataset
dataset = pd.DataFrame()

# Creating a simple dataframe
dataset['name'] = ['Ali', 'Shyam', 'Noor', 'Asma', 'Fatema', 'Ahmed']
dataset['major'] = ['IT', 'Math', 'IT','IT', 'CS', 'CS']

dataset['age'] = [31, 32, 19, 23, 28, 33]

dataset['department'] = ['IT', 'Account','IT', 'IT', 'Management','Management']

dataset['index'] = [1, 2, 3, 4, 5, 6]

# View dataframe
dataset


  from tqdm.autonotebook import tqdm


Unnamed: 0,name,major,age,department,index
0,Ali,IT,31,IT,1
1,Shyam,Math,32,Account,2
2,Noor,IT,19,IT,3
3,Asma,IT,23,IT,4
4,Fatema,CS,28,Management,5
5,Ahmed,CS,33,Management,6


### Removing a column from dataframe using pdpipe.



In [8]:
# creating a pipeline and
# dropping the unwanted column
dropCol = pdp.ColDrop("index").apply(dataset)

# display the new dataframe
# after column drop
dropCol


Unnamed: 0,name,major,age,department
0,Ali,IT,31,IT
1,Shyam,Math,32,Account
2,Noor,IT,19,IT
3,Asma,IT,23,IT
4,Fatema,CS,28,Management
5,Ahmed,CS,33,Management


In [9]:
# creating a pipeline and
# dropping the unwanted column
dropCol2 = pdp.ColDrop("index")

# applying the ColDrop to dataframe
df2 = dropCol2(dataset)

# display dataframe
df2


Unnamed: 0,name,major,age,department
0,Ali,IT,31,IT
1,Shyam,Math,32,Account
2,Noor,IT,19,IT
3,Asma,IT,23,IT
4,Fatema,CS,28,Management
5,Ahmed,CS,33,Management


### Now we are adding one column to dataframe using pdpipe

In [10]:
# importing the package
import pdpipe as pdp
import pandas as pd

# creating a empty dataframe named dataset
dataset = pd.DataFrame()

# Creating a simple dataframe
dataset['name'] = ['Ali', 'Shyam', 'Noor', 'Asma', 'Fatema', 'Ahmed']
dataset['major'] = ['IT', 'Math', 'IT','IT', 'CS', 'CS']

dataset['age'] = [31, 32, 19, 23, 28, 33]

dataset['department'] = ['IT', 'Account','IT', 'IT', 'Management','Management']

dataset['index'] = [1, 2, 3, 4, 5, 6]


# View dataframe
dataset


Unnamed: 0,name,major,age,department,index
0,Ali,IT,31,IT,1
1,Shyam,Math,32,Account,2
2,Noor,IT,19,IT,3
3,Asma,IT,23,IT,4
4,Fatema,CS,28,Management,5
5,Ahmed,CS,33,Management,6


In [11]:
#dropping the values using ValDrop
df3 = pdp.ValDrop(['IT'],'department').apply(dataset)

#display dataframe
df3


Unnamed: 0,name,major,age,department,index
1,Shyam,Math,32,Account,2
4,Fatema,CS,28,Management,5
5,Ahmed,CS,33,Management,6


In [12]:
# function to find mean
def mean_age_by_group(dataframe, col):

    # groups the data by a column and
    # returns the mean age per group
    return dataframe.groupby(col).mean()

# function to convert to uppercase
def uppercase_column_name(dataframe):

    # Converts all the column names into uppercase
    dataframe.columns = dataframe.columns.str.upper()

    # And returns them
    return dataframe


In [13]:
#dropping the values using ValDrop
df3 = pdp.ValDrop(['IT'],'department').apply(dataset)

#display dataframe
df3


Unnamed: 0,name,major,age,department,index
1,Shyam,Math,32,Account,2
4,Fatema,CS,28,Management,5
5,Ahmed,CS,33,Management,6
