In [1]:
import numpy as np 
import pandas as pd

In [2]:
empWithHeaderDF = pd.read_csv("employeeWithHeaders.csv", sep=',')
empWithHeaderDF

Unnamed: 0,eid,ename,esal
0,1,Prashant,1000
1,2,Amar,2000
2,3,Chitra,2000
3,3,Chitra,2000
4,4,Utkarsha,9878
5,5,Ajit,9999


In [8]:
#Use-case: I want to calculate the yearlySalary of the employee and add a new column called yearlySalary

yearlySalary = empWithHeaderDF.esal * 12
yearlySalary

0     12000
1     24000
2     24000
3     24000
4    118536
5    119988
Name: esal, dtype: int64

In [9]:
type(yearlySalary)

pandas.core.series.Series

In [10]:
#Pandas Dataframes are mutable in nature

empWithHeaderDF['yearlySalary'] = yearlySalary
empWithHeaderDF

Unnamed: 0,eid,ename,esal,yearlySalary
0,1,Prashant,1000,12000
1,2,Amar,2000,24000
2,3,Chitra,2000,24000
3,3,Chitra,2000,24000
4,4,Utkarsha,9878,118536
5,5,Ajit,9999,119988


In [17]:
# Add a new column using a collection object

deptList = ['HR','Ops','','','Ops','HR']

# Convert the collection object into series
# Technique -- Load the collection as series

deptSeries = pd.Series(deptList)

#Add Series in dataFrame
empWithHeaderDF['department'] = deptSeries
empWithHeaderDF

Unnamed: 0,eid,ename,esal,yearlySalary,department
0,1,Prashant,1000,12000,HR
1,2,Amar,2000,24000,Ops
2,3,Chitra,2000,24000,
3,3,Chitra,2000,24000,
4,4,Utkarsha,9878,118536,Ops
5,5,Ajit,9999,119988,HR


In [18]:
# Use-case: I want to create a new column called UpdatedYearlySalary based on the following increment/hike conditions
# If the salary is less than equal to 1500, then inc the sal by 10%
# If the salary is between 1501 and 10000, then inc the sal by 5%
# If the salary is between 10001 and 20000, then inc the sal by 2.5%
# If the salary is greater than 20000 , then no inc

In [19]:
# Pandas recommends to create a Function that can implement the logic
# and use apply method to implement function in a series

In [21]:
def incrementSalary(salary):
    newSal = 0
    
    if salary <= 1500:
        newSal = salary * 1.1
    elif salary <= 10000:
        newSal = salary * 1.05
    elif salary <= 20000:
        newSal = salary * 1.025
    else:
        newSal = salary
    
    return newSal

In [26]:
empWithHeaderDF['UpdatedYearlySalary'] = empWithHeaderDF.esal.apply(incrementSalary) * 12
empWithHeaderDF

Unnamed: 0,eid,ename,esal,yearlySalary,department,UpdatedSalary,UpdatedYearlySalary
0,1,Prashant,1000,12000,HR,1100.0,13200.0
1,2,Amar,2000,24000,Ops,2100.0,25200.0
2,3,Chitra,2000,24000,,2100.0,25200.0
3,3,Chitra,2000,24000,,2100.0,25200.0
4,4,Utkarsha,9878,118536,Ops,10371.9,124462.8
5,5,Ajit,9999,119988,HR,10498.95,125987.4


In [27]:
empWithHeaderDF.department = pd.Series(['HR','Ops','Admin','Admin','Ops','HR'])
empWithHeaderDF

Unnamed: 0,eid,ename,esal,yearlySalary,department,UpdatedSalary,UpdatedYearlySalary
0,1,Prashant,1000,12000,HR,1100.0,13200.0
1,2,Amar,2000,24000,Ops,2100.0,25200.0
2,3,Chitra,2000,24000,Admin,2100.0,25200.0
3,3,Chitra,2000,24000,Admin,2100.0,25200.0
4,4,Utkarsha,9878,118536,Ops,10371.9,124462.8
5,5,Ajit,9999,119988,HR,10498.95,125987.4


In [29]:
#Store the dataframe into a file
empWithHeaderDF.to_csv('FinalOutput.csv' , index=False)