[Reference](https://towardsdatascience.com/data-preprocessing-using-pipeline-in-pandas-6170f4456796)

In [1]:
import pandas as pd

candidates= {
    'Full_Name':["Aida, Kone","Mamadou, Diop","Ismael, Camara","Aicha, Konate",
                 "Fanta, Koumare", "Khalil, Cisse"],
    'Degree':['Master','Master','Bachelor', "PhD", "Master", "PhD"],
    'From':["Abidjan","Dakar","Bamako", "Abidjan","Konakry", "Lomé"],
    'Application_date': ['11/17/2022', '09/23/2022', '12/2/2021', 
                         '08/25/2022', '01/07/2022', '12/26/2022'],
    'From_office (min)': [120,95,75, 80,100, 34]
          }
candidates_df = pd.DataFrame(candidates)
candidates_df['Application_date'] = pd.to_datetime(candidates_df["Application_date"])

candidates_df

Unnamed: 0,Full_Name,Degree,From,Application_date,From_office (min)
0,"Aida, Kone",Master,Abidjan,2022-11-17,120
1,"Mamadou, Diop",Master,Dakar,2022-09-23,95
2,"Ismael, Camara",Bachelor,Bamako,2021-12-02,75
3,"Aicha, Konate",PhD,Abidjan,2022-08-25,80
4,"Fanta, Koumare",Master,Konakry,2022-01-07,100
5,"Khalil, Cisse",PhD,Lomé,2022-12-26,34


In [2]:
def get_first_last_name(df, col_name):

  final_df = df.copy()

  splited_full_name = final_df[col_name].str.split(",", expand=True)

  final_df["First_Name"] = splited_full_name.get(0)
  final_df["Last_Name"] = splited_full_name.get(1)

  return final_df

result_task1 = get_first_last_name(candidates_df, "Full_Name")
result_task1

Unnamed: 0,Full_Name,Degree,From,Application_date,From_office (min),First_Name,Last_Name
0,"Aida, Kone",Master,Abidjan,2022-11-17,120,Aida,Kone
1,"Mamadou, Diop",Master,Dakar,2022-09-23,95,Mamadou,Diop
2,"Ismael, Camara",Bachelor,Bamako,2021-12-02,75,Ismael,Camara
3,"Aicha, Konate",PhD,Abidjan,2022-08-25,80,Aicha,Konate
4,"Fanta, Koumare",Master,Konakry,2022-01-07,100,Fanta,Koumare
5,"Khalil, Cisse",PhD,Lomé,2022-12-26,34,Khalil,Cisse


In [3]:
def get_application_date_info(df, column_name):

  application_date = df[column_name]

  final_df = df.copy()

  final_df["Day"] = application_date.dt.day 
  final_df["Month"] = application_date.dt.month 
  final_df["Year"] = application_date.dt.year 
  final_df["Day_of_week"] = application_date.dt.day_name()
  final_df["Month_of_year"] = application_date.dt.month_name()

  return final_df

result_task2 = get_application_date_info(candidates_df, "Application_date")
result_task2

Unnamed: 0,Full_Name,Degree,From,Application_date,From_office (min),Day,Month,Year,Day_of_week,Month_of_year
0,"Aida, Kone",Master,Abidjan,2022-11-17,120,17,11,2022,Thursday,November
1,"Mamadou, Diop",Master,Dakar,2022-09-23,95,23,9,2022,Friday,September
2,"Ismael, Camara",Bachelor,Bamako,2021-12-02,75,2,12,2021,Thursday,December
3,"Aicha, Konate",PhD,Abidjan,2022-08-25,80,25,8,2022,Thursday,August
4,"Fanta, Koumare",Master,Konakry,2022-01-07,100,7,1,2022,Friday,January
5,"Khalil, Cisse",PhD,Lomé,2022-12-26,34,26,12,2022,Monday,December


In [4]:
def info_by_row(row):

  # Select columns of interest 
  full_name = row.Full_Name.replace(",", " ")
  is_from = row.From
  degree = row.Degree
  from_office = row["From_office (min)"]

  # Generate the description from previous variables
  info = f"""{full_name} from {is_from} holds a {degree} degree 
              and lives {from_office} from the office"""

  return info

# Create the info
def candidate_info(df):

  final_df = df.copy()

  final_df["Info"] = final_df.apply(lambda row: info_by_row(row), axis=1)

  return final_df

# Apply the function for Task 3
result_task3 = candidate_info(candidates_df)
result_task3

Unnamed: 0,Full_Name,Degree,From,Application_date,From_office (min),Info
0,"Aida, Kone",Master,Abidjan,2022-11-17,120,Aida Kone from Abidjan holds a Master degree ...
1,"Mamadou, Diop",Master,Dakar,2022-09-23,95,Mamadou Diop from Dakar holds a Master degree...
2,"Ismael, Camara",Bachelor,Bamako,2021-12-02,75,Ismael Camara from Bamako holds a Bachelor de...
3,"Aicha, Konate",PhD,Abidjan,2022-08-25,80,Aicha Konate from Abidjan holds a PhD degree ...
4,"Fanta, Koumare",Master,Konakry,2022-01-07,100,Fanta Koumare from Konakry holds a Master deg...
5,"Khalil, Cisse",PhD,Lomé,2022-12-26,34,Khalil Cisse from Lomé holds a PhD degree \n ...


In [5]:
# Create the pipe by using calling all the functions. 
preprocessed_candidates = (candidates_df.
                            pipe(get_first_last_name, "Full_Name").
                            pipe(get_application_date_info, "Application_date").
                            pipe(candidate_info)
                          )

# Show the final result
preprocessed_candidates

Unnamed: 0,Full_Name,Degree,From,Application_date,From_office (min),First_Name,Last_Name,Day,Month,Year,Day_of_week,Month_of_year,Info
0,"Aida, Kone",Master,Abidjan,2022-11-17,120,Aida,Kone,17,11,2022,Thursday,November,Aida Kone from Abidjan holds a Master degree ...
1,"Mamadou, Diop",Master,Dakar,2022-09-23,95,Mamadou,Diop,23,9,2022,Friday,September,Mamadou Diop from Dakar holds a Master degree...
2,"Ismael, Camara",Bachelor,Bamako,2021-12-02,75,Ismael,Camara,2,12,2021,Thursday,December,Ismael Camara from Bamako holds a Bachelor de...
3,"Aicha, Konate",PhD,Abidjan,2022-08-25,80,Aicha,Konate,25,8,2022,Thursday,August,Aicha Konate from Abidjan holds a PhD degree ...
4,"Fanta, Koumare",Master,Konakry,2022-01-07,100,Fanta,Koumare,7,1,2022,Friday,January,Fanta Koumare from Konakry holds a Master deg...
5,"Khalil, Cisse",PhD,Lomé,2022-12-26,34,Khalil,Cisse,26,12,2022,Monday,December,Khalil Cisse from Lomé holds a PhD degree \n ...
