[Reference](https://moez-62905.medium.com/take-your-data-analysis-to-the-next-level-with-these-5-advanced-pandas-functions-98cef90512ce)

# 1. pd.merge_ordered

In [1]:
import pandas as pd

# create df1 and df2
df1 = pd.DataFrame({'Date': ['2021-01-01', '2021-02-01', '2021-03-01'],
                    'Revenue': [100, 200, 300]})

df2 = pd.DataFrame({'Date': ['2021-01-01', '2021-02-01', '2021-04-01'],
                    'Cost': [50, 100, 150]})

# merge df1 and df2
merged_df = pd.merge_ordered(df1, df2, on='Date', fill_method='ffill')
print(merged_df)

         Date  Revenue  Cost
0  2021-01-01      100    50
1  2021-02-01      200   100
2  2021-03-01      300   100
3  2021-04-01      300   150


# 2. MultiIndexing in Pandas

In [2]:
import pandas as pd

data = {'year': [2010, 2010, 2011, 2011],
        'month': [1, 2, 1, 2],
        'sales': [100, 150, 200, 250]}

df = pd.DataFrame(data)
df = df.set_index(['year', 'month'])

# 3. pd.grouper

In [3]:
import pandas as pd
import numpy as np

date_rng = pd.date_range(start='1/1/2020', end='1/01/2022', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0,100,size=(len(date_rng)))
df = df.set_index('date')
grouped_year = df.groupby(pd.Grouper(freq='Y')).sum()
grouped_year

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2020-12-31,18622
2021-12-31,17872
2022-12-31,42


In [4]:
import pandas as pd
import numpy as np

date_rng = pd.date_range(start='1/1/2020', end='1/01/2022', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0,100,size=(len(date_rng)))
df = df.set_index('date')
grouped_month = df.groupby(pd.Grouper(freq='M')).sum()
grouped_month

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2020-01-31,1369
2020-02-29,1589
2020-03-31,1409
2020-04-30,1639
2020-05-31,1550
2020-06-30,1542
2020-07-31,1629
2020-08-31,1472
2020-09-30,1751
2020-10-31,1487


# 4. pd.eval

In [5]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randint(0, 100, size=(1000000, 4)), columns=list('ABCD'))
df['E'] = pd.eval('df.A + df.B')
df['F'] = pd.eval('df.C - df.D')
df.head()

Unnamed: 0,A,B,C,D,E,F
0,64,69,3,35,133,-32
1,76,10,71,49,86,22
2,77,87,10,82,164,-72
3,65,18,81,32,83,49
4,30,33,42,54,63,-12


In [6]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randint(0, 100, size=(1000000, 4)), columns=list('ABCD'))
df['E'] = pd.eval('df.A > 50 and df.B < 30')
df.head()

Unnamed: 0,A,B,C,D,E
0,58,66,65,47,False
1,75,43,13,83,False
2,17,77,93,52,False
3,83,85,44,39,False
4,86,34,57,2,False


# 5. pd.DataFrame.style

In [7]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(5, 5), columns=list('ABCDE'))
styled_df = df.style.background_gradient(cmap='coolwarm', axis=0)
styled_df

Unnamed: 0,A,B,C,D,E
0,-0.355131,-0.347968,0.865194,0.472778,0.925201
1,-1.669599,1.19468,-0.223249,1.50265,-0.110354
2,-1.156169,0.7721,-0.894298,0.497568,-0.260167
3,1.617421,0.270704,-1.602151,1.797463,-0.078411
4,1.853081,0.797295,1.682386,0.360912,0.736553


In [8]:
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'd']})
styled_df = df.style.set_properties(**{'background-color': 'pink', 'color': 'white'}, subset=pd.IndexSlice[1:3, 'B'])
styled_df

Unnamed: 0,A,B
0,1,a
1,2,b
2,3,c
3,4,d


In [9]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(5, 5), columns=list('ABCDE'))
styled_df = df.style.set_caption('My Custom Caption').set_table_styles([{'selector': 'caption', 'props': [('color', 'red'), ('font-size', '20px')]}])
styled_df

Unnamed: 0,A,B,C,D,E
0,-1.499051,-0.69448,-0.173657,-0.335961,0.528174
1,0.122313,1.789473,1.404269,-3.099198,1.273354
2,0.473963,0.373504,-1.004536,0.864541,-1.766586
3,-0.829341,-0.914608,0.436055,0.380552,0.684919
4,0.741998,-2.000769,0.450342,1.397902,-0.463563
