In [1]:
import pandas as pd

### Operation on a new column

In [2]:
data = {
  "EmpName": ["A", "B", "C"],
  "Salary": [10000, 25000, 30000],
  "Location": ["Pune", "Mumbai","Delhi"]
}

In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,EmpName,Salary,Location
0,A,10000,Pune
1,B,25000,Mumbai
2,C,30000,Delhi


In [5]:
df['Flag'] = df.apply(lambda row: row.Salary + (0.1*row.Salary), axis=1)
df

Unnamed: 0,EmpName,Salary,Location,Flag
0,A,10000,Pune,11000.0
1,B,25000,Mumbai,27500.0
2,C,30000,Delhi,33000.0


### Concat() function

In [6]:
df1 = pd.DataFrame({'id': [1,2,3]}, index = ['r1','r2','r3'])
df1

Unnamed: 0,id
r1,1
r2,2
r3,3


In [7]:
df2 = pd.DataFrame({'id': [10,20,30]}, index = ['r1','r2','r3'])
df2

Unnamed: 0,id
r1,10
r2,20
r3,30


In [8]:
df3 = pd.concat([df1,df2], axis=0)

In [9]:
df3

Unnamed: 0,id
r1,1
r2,2
r3,3
r1,10
r2,20
r3,30


### Simply Sum in another row

In [10]:
df3['Sum'] = df1+df2

In [11]:
df3

Unnamed: 0,id,Sum
r1,1,11
r2,2,22
r3,3,33
r1,10,11
r2,20,22
r3,30,33


### Append () function

In [12]:
# It will simply append the rows in a dataFrame one below the other.

In [13]:
df3 = df1.append(df2)

In [14]:
df3

Unnamed: 0,id
r1,1
r2,2
r3,3
r1,10
r2,20
r3,30


#### Basic df structure

In [15]:
df = pd.DataFrame([100,200,300,400,500], index=range(5), columns=["Num"])

In [16]:
df

Unnamed: 0,Num
0,100
1,200
2,300
3,400
4,500


### More Concat() func

In [17]:
import pandas as pd

df1 = pd.DataFrame({'id': ['A01', 'A02', 'A03', 'A04'],
					'Name': ['ABC', 'PQR', 'DEF', 'GHI']})

df3 = pd.DataFrame({'City': ['MUMBAI', 'PUNE', 'MUMBAI', 'DELHI'],
					'Age': ['12', '13', '14', '12']})

# the default behaviour is join='outer'

# Along column axis & inner join 
result = pd.concat([df1, df3], axis=1, join='inner')
display(result)
# no change here in outer and inner

Unnamed: 0,id,Name,City,Age
0,A01,ABC,MUMBAI,12
1,A02,PQR,PUNE,13
2,A03,DEF,MUMBAI,14
3,A04,GHI,DELHI,12


In [18]:
# Along row axis & default outer join 
result = pd.concat([df1, df3], axis=1, join='outer')
display(result)

Unnamed: 0,id,Name,City,Age
0,A01,ABC,MUMBAI,12
1,A02,PQR,PUNE,13
2,A03,DEF,MUMBAI,14
3,A04,GHI,DELHI,12


### Concatenating using append

In [19]:
import pandas as pd
# First DataFrame
df1 = pd.DataFrame({'id': ['A01', 'A02', 'A03', 'A04'],
					'Name': ['ABC', 'PQR', 'DEF', 'GHI']})

# Second DataFrame
df2 = pd.DataFrame({'id': ['B05', 'B06', 'B07', 'B08'],
					'Name': ['XYZ', 'TUV', 'MNO', 'JKL']})

df3 = pd.DataFrame({'City': ['MUMBAI', 'PUNE', 'MUMBAI', 'DELHI'],
					'Age': ['12', '13', '14', '12']})


# appending multiple DataFrame
result = df1.append([df2, df3])
display(result)


Unnamed: 0,id,Name,City,Age
0,A01,ABC,,
1,A02,PQR,,
2,A03,DEF,,
3,A04,GHI,,
0,B05,XYZ,,
1,B06,TUV,,
2,B07,MNO,,
3,B08,JKL,,
0,,,MUMBAI,12.0
1,,,PUNE,13.0


### Join() function

In [20]:
left = pd.DataFrame({"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=[0,1,2])

In [21]:
right = pd.DataFrame({"C": ["C0", "C2", "C3"], "D": ["D0", "D2", "D3"]}, index=[0,2,3])

In [22]:
# common columns
result = left.join(right)
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,,
2,A2,B2,C2,D2


In [23]:
# inner
result = left.join(right, how='inner')
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
2,A2,B2,C2,D2


In [24]:
# outer
result = left.join(right, how='outer')
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,,
2,A2,B2,C2,D2
3,,,C3,D3


In [25]:
# left
result = left.join(right, how='left')
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,,
2,A2,B2,C2,D2


In [26]:
# right
result = left.join(right, how='right')
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
2,A2,B2,C2,D2
3,,,C3,D3


### Pivot Tables

In [27]:
df = pd.DataFrame({'foo': ['one','one','one','two','two','two'],
                   'bar': ['A','B','C','A','B','C'],
                   'baz': [1,2,3,4,5,6]})

In [28]:
df

Unnamed: 0,foo,bar,baz
0,one,A,1
1,one,B,2
2,one,C,3
3,two,A,4
4,two,B,5
5,two,C,6


In [29]:
df.pivot(index='foo', columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


### Pipe Method

In [30]:
# importing pandas library
import pandas as pd

# Create empty dataframe
df = pd.DataFrame()

# Creating a simple dataframe
df['name'] = ['Reema', 'Shyam', 'Jai',
			'Nimisha', 'Rohit', 'Riya']
df['gender'] = ['Female', 'Male', 'Male',
				'Female', 'Male', 'Female']
df['age'] = [31, 32, 19, 23, 28, 33]

# View dataframe
df


Unnamed: 0,name,gender,age
0,Reema,Female,31
1,Shyam,Male,32
2,Jai,Male,19
3,Nimisha,Female,23
4,Rohit,Male,28
5,Riya,Female,33


In [31]:
# For using pipe, follow link-
# LINK: https://www.geeksforgeeks.org/create-a-pipeline-in-pandas/

### Apply() | ApplyMap() | Map() Functions

    DataFrame.apply
    Apply a function along input axis of DataFrame.

    DataFrame.applymap
    Apply a function elementwise on a whole DataFrame.

    Series.map
    Apply a mapping correspondence on a Series.
    
https://towardsdatascience.com/how-to-use-apply-applymap-and-map-in-3-minutes-36a08c09257

## APPLY() 

#### Example 1 — apply
Imagine you are the data scientist on the Simpsons’ team. The stars of the series are sponsored and each star gets a budget every season to buy stuff. You have the costs for seasons 1 to 3:

In [32]:
import numpy as np

df = pd.DataFrame({'Customer': 'Mr.Burns Lisa Marge Milhouse Bart Homer'.split(),
                     'Costs_1': np.arange(6) *2, 'Costs_2': np.arange(6) * 4, 'Costs_3': np.arange(6) * 6})

In [33]:
df

Unnamed: 0,Customer,Costs_1,Costs_2,Costs_3
0,Mr.Burns,0,0,0
1,Lisa,2,4,6
2,Marge,4,8,12
3,Milhouse,6,12,18
4,Bart,8,16,24
5,Homer,10,20,30


In [34]:
def diff_calc(x):
    diff = x.max() - x.min()
    return diff

# diff_calc calculates difference in values of each column(Costs_1,Costs_2,Costs_3)

In [35]:
# row-wise
df[['Costs_1', 'Costs_2', 'Costs_3']].apply(diff_calc)

Costs_1    10
Costs_2    20
Costs_3    30
dtype: int64

In [36]:
# column-wise
df[['Costs_1', 'Costs_2', 'Costs_3']].apply(diff_calc, axis=1)

0     0
1     4
2     8
3    12
4    16
5    20
dtype: int64

## APPLYMAP()

#### Example 2 — applymap
In the first example we wanted to apply a specific function to each numeric column of our dataframe. What if we want to change each element individually? Imagine a colleague from marketing asks you to format each element as a string and to add the prefix “S_” as a label for a Simpson value to each element. In this case, you need a function that can be applied to each individual element. Let’s create the function first:

In [37]:
def simpsons_string(x):
    result = "S_" + str(x)
    return result    

In [38]:
df.applymap(simpsons_string)

Unnamed: 0,Customer,Costs_1,Costs_2,Costs_3
0,S_Mr.Burns,S_0,S_0,S_0
1,S_Lisa,S_2,S_4,S_6
2,S_Marge,S_4,S_8,S_12
3,S_Milhouse,S_6,S_12,S_18
4,S_Bart,S_8,S_16,S_24
5,S_Homer,S_10,S_20,S_30


## MAP()

####  Example 3 — map()
After we prepared the data above and added a prefix to each element, we got the feedback that the colleague had made a mistake. He only wanted all string elements to have a prefix. So now we have to change every single element for a column:

In [39]:
df['Customer'].map(simpsons_string)

0    S_Mr.Burns
1        S_Lisa
2       S_Marge
3    S_Milhouse
4        S_Bart
5       S_Homer
Name: Customer, dtype: object

#### Perfect! 
With map() we can apply a function to every single element in a column, while with applymap() we can apply the function to the whole dataframe.

#### Conclusion
With the three methods apply(), applymap() and map() functions can be applied to the dataframe or parts of the dataframe. 

>> apply() a function is applied to columns or rows, 
>>> applymap() works element-wise for the whole dataframe and 
>>>> map() works element-wise for single series.