# Selection

In [1]:
import pandas as pd
df = pd.DataFrame({'name' : ['Ankit', 'Vinay', 'Nigam'],
                  'growth' : [0.7, 0.2, 0.9]})

In [2]:
df

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [3]:
df[df.name == 'Ankit']

Unnamed: 0,growth,name
0,0.7,Ankit


In [4]:
df.name == 'Ankit'

0     True
1    False
2    False
Name: name, dtype: bool

In [5]:
df[df.name.str.contains('o')]

Unnamed: 0,growth,name


In [7]:
df.dtypes

growth    float64
name       object
dtype: object

In [8]:
df[df.name.str.contains('i')]

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [10]:
df[(df.name == 'Ankit') & (df.growth > 0.4)]

Unnamed: 0,growth,name
0,0.7,Ankit


In [12]:
df.query('name == "Ankit" and growth > .4')  #pip install numexpr

Unnamed: 0,growth,name
0,0.7,Ankit


In [13]:
df[(df.name == 'Ankit') | (df.name == "Vinay")]

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay


In [14]:
df.query('name == "Ankit" or name == "Nigam"')

Unnamed: 0,growth,name
0,0.7,Ankit
2,0.9,Nigam


In [15]:
df[~(df.name == 'John')]

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [16]:
df.query('not name == "John"')

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [17]:
df.head(2)

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay


In [18]:
df.tail(2)

Unnamed: 0,growth,name
1,0.2,Vinay
2,0.9,Nigam


In [20]:
df.iloc[[2,1]]

Unnamed: 0,growth,name
2,0.9,Nigam
1,0.2,Vinay


In [21]:
df[:2]

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay


# Projection

In [22]:
df

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [23]:
df.dtypes

growth    float64
name       object
dtype: object

In [24]:
df.name

0    Ankit
1    Vinay
2    Nigam
Name: name, dtype: object

In [25]:
df['name']

0    Ankit
1    Vinay
2    Nigam
Name: name, dtype: object

In [26]:
df[['name','growth']]

Unnamed: 0,name,growth
0,Ankit,0.7
1,Vinay,0.2
2,Nigam,0.9


In [27]:
df.iloc[:1]

Unnamed: 0,growth,name
0,0.7,Ankit


In [28]:
df.iloc[:, :1]

Unnamed: 0,growth
0,0.7
1,0.2
2,0.9


In [29]:
df.loc[:, ['name']]

Unnamed: 0,name
0,Ankit
1,Vinay
2,Nigam


# Product

In [30]:
df

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [36]:
inst_df = pd.DataFrame([{'name' : 'Ankit', 'inst' : 'Guitar'},
                        {'inst' : 'Drum', 'name' : 'Bob'}])

In [37]:
inst_df

Unnamed: 0,inst,name
0,Guitar,Ankit
1,Drum,Bob


In [38]:
pd.merge(df, inst_df)

Unnamed: 0,growth,name,inst
0,0.7,Ankit,Guitar


In [40]:
pd.merge(df, inst_df, how="inner", on="name")

Unnamed: 0,growth,name,inst
0,0.7,Ankit,Guitar


In [41]:
pd.merge(df, inst_df, how="outer", on="name")

Unnamed: 0,growth,name,inst
0,0.7,Ankit,Guitar
1,0.2,Vinay,
2,0.9,Nigam,
3,,Bob,Drum


In [42]:
pd.merge(df, inst_df, how="left", on="name")

Unnamed: 0,growth,name,inst
0,0.7,Ankit,Guitar
1,0.2,Vinay,
2,0.9,Nigam,


In [43]:
pd.merge(df, inst_df, how="right", on="name")

Unnamed: 0,growth,name,inst
0,0.7,Ankit,Guitar
1,,Bob,Drum


In [48]:
inst2 = inst_df.copy()
inst2 = inst2.rename(columns={'name' : 'first'})

In [49]:
inst2

Unnamed: 0,inst,first
0,Guitar,Ankit
1,Drum,Bob


In [51]:
pd.merge(df, inst2, left_on="name", right_on="first")

Unnamed: 0,growth,name,inst,first
0,0.7,Ankit,Guitar,Ankit


In [52]:
pd.merge(df, inst_df, left_index=True, right_index=True)

Unnamed: 0,growth,name_x,inst,name_y
0,0.7,Ankit,Guitar,Ankit
1,0.2,Vinay,Drum,Bob


In [53]:
df.set_index('name')

Unnamed: 0_level_0,growth
name,Unnamed: 1_level_1
Ankit,0.7
Vinay,0.2
Nigam,0.9


In [54]:
pd.merge(df.set_index('name'), inst_df.set_index('name'), left_index=True, right_index=True)

Unnamed: 0_level_0,growth,inst
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ankit,0.7,Guitar


In [55]:
pd.merge(df.set_index('name'), inst_df.set_index('name'), left_index=True, right_index=True, how="outer")

Unnamed: 0_level_0,growth,inst
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ankit,0.7,Guitar
Bob,,Drum
Nigam,0.9,
Vinay,0.2,


# Union

In [56]:
df

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [57]:
inst_df

Unnamed: 0,inst,name
0,Guitar,Ankit
1,Drum,Bob


In [58]:
inst2

Unnamed: 0,inst,first
0,Guitar,Ankit
1,Drum,Bob


In [60]:
pd.concat([df, df])

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [61]:
pd.concat([df, df]).reset_index()

Unnamed: 0,index,growth,name
0,0,0.7,Ankit
1,1,0.2,Vinay
2,2,0.9,Nigam
3,0,0.7,Ankit
4,1,0.2,Vinay
5,2,0.9,Nigam


In [63]:
df.append(df)

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [64]:
df.append(df).reset_index()

Unnamed: 0,index,growth,name
0,0,0.7,Ankit
1,1,0.2,Vinay
2,2,0.9,Nigam
3,0,0.7,Ankit
4,1,0.2,Vinay
5,2,0.9,Nigam


In [65]:
df.append(df, ignore_index=True)

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam
3,0.7,Ankit
4,0.2,Vinay
5,0.9,Nigam


In [66]:
df.append(df).reset_index(drop=True)

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam
3,0.7,Ankit
4,0.2,Vinay
5,0.9,Nigam


In [67]:
df2 = df.append(df)

In [68]:
df

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [69]:
df2

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [70]:
df2.index.is_unique

False

In [71]:
df2.reset_index().index.is_unique

True

In [72]:
df2.iloc[0]

growth      0.7
name      Ankit
Name: 0, dtype: object

In [73]:
df2.loc[0]

Unnamed: 0,growth,name
0,0.7,Ankit
0,0.7,Ankit


In [74]:
df.loc[0]

growth      0.7
name      Ankit
Name: 0, dtype: object

# Differences

In [75]:
df

Unnamed: 0,growth,name
0,0.7,Ankit
1,0.2,Vinay
2,0.9,Nigam


In [76]:
inst_df

Unnamed: 0,inst,name
0,Guitar,Ankit
1,Drum,Bob


In [77]:
inst2

Unnamed: 0,inst,first
0,Guitar,Ankit
1,Drum,Bob


In [79]:
~df.name.isin(inst_df.name)

0    False
1     True
2     True
Name: name, dtype: bool

In [80]:
mask = ~df.name.isin(inst_df.name)
df[mask]

Unnamed: 0,growth,name
1,0.2,Vinay
2,0.9,Nigam


In [81]:
df.query('name not in inst_df.name')

UndefinedVariableError: name 'inst_df' is not defined