# Pandas Operations

In [1]:
import pandas as pd
import numpy as np

In [4]:
df_one = pd.DataFrame({'k1': ['A', 'A', 'B', 'B', 'C', 'C'],
                       'col1': [100, 200, 300, 300, 400 ,500],
                       'col2': ['NY', 'CA', 'WA', 'WA', 'AK', 'NV']})

In [5]:
df_one

Unnamed: 0,k1,col1,col2
0,A,100,NY
1,A,200,CA
2,B,300,WA
3,B,300,WA
4,C,400,AK
5,C,500,NV


## Unique Values

In [6]:
df_one['col2'].unique()

array(['NY', 'CA', 'WA', 'AK', 'NV'], dtype=object)

In [7]:
df_one['k1'].unique()

array(['A', 'B', 'C'], dtype=object)

In [8]:
df_one['col2'].nunique()

5

## Count

In [9]:
df_one['col2'].value_counts()

WA    2
AK    1
NY    1
NV    1
CA    1
Name: col2, dtype: int64

## Remove Duplicates

In [10]:
df_one.drop_duplicates()

Unnamed: 0,k1,col1,col2
0,A,100,NY
1,A,200,CA
2,B,300,WA
4,C,400,AK
5,C,500,NV


## Create New Columns specifically with 'apply' method

In [11]:
df_one['NEW'] = df_one['col1'] * 10

In [12]:
df_one

Unnamed: 0,k1,col1,col2,NEW
0,A,100,NY,1000
1,A,200,CA,2000
2,B,300,WA,3000
3,B,300,WA,3000
4,C,400,AK,4000
5,C,500,NV,5000


In [13]:
def grab_fletter(state):
  return state[0]

In [14]:
grab_fletter("NA")

'N'

In [16]:
df_one['First Letter'] = df_one['col2'].apply(grab_fletter)

In [17]:
df_one

Unnamed: 0,k1,col1,col2,NEW,First Letter
0,A,100,NY,1000,N
1,A,200,CA,2000,C
2,B,300,WA,3000,W
3,B,300,WA,3000,W
4,C,400,AK,4000,A
5,C,500,NV,5000,N


In [18]:
def complex_letter(state):
  if state[0] == 'W':
    return "Washington"
  else:
    return "Error"

In [19]:
df_one['col2'].apply(complex_letter)

0         Error
1         Error
2    Washington
3    Washington
4         Error
5         Error
Name: col2, dtype: object

## Map method

In [20]:
df_one['k1']

0    A
1    A
2    B
3    B
4    C
5    C
Name: k1, dtype: object

In [21]:
my_map = {'A': 1, 'B': 2, 'C': 3}

In [23]:
df_one['num'] = df_one['k1'].map(my_map)

In [24]:
df_one

Unnamed: 0,k1,col1,col2,NEW,First Letter,num
0,A,100,NY,1000,N,1
1,A,200,CA,2000,C,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
4,C,400,AK,4000,A,3
5,C,500,NV,5000,N,3


## Get max value index

In [28]:
df_one['col1'].max()

500

In [27]:
df_one['col1'].idxmax()

5

## Some other stuff

In [29]:
df_one.columns

Index(['k1', 'col1', 'col2', 'NEW', 'First Letter', 'num'], dtype='object')

In [30]:
df_one.columns = ['k1', 'C1', 'C2', 'New', 'First Letter', 'num']

In [31]:
df_one

Unnamed: 0,k1,C1,C2,New,First Letter,num
0,A,100,NY,1000,N,1
1,A,200,CA,2000,C,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
4,C,400,AK,4000,A,3
5,C,500,NV,5000,N,3


In [32]:
df_one.sort_values(by = 'C1')

Unnamed: 0,k1,C1,C2,New,First Letter,num
0,A,100,NY,1000,N,1
1,A,200,CA,2000,C,1
2,B,300,WA,3000,W,2
3,B,300,WA,3000,W,2
4,C,400,AK,4000,A,3
5,C,500,NV,5000,N,3


In [33]:
features = pd.DataFrame({'A': [100, 200, 300, 400, 500],
                         'B': [12, 13, 14, 15, 16]
                         })
predictions = pd.DataFrame({'pred': [0, 1, 1, 0, 1]})

In [35]:
features

Unnamed: 0,A,B
0,100,12
1,200,13
2,300,14
3,400,15
4,500,16


In [36]:
predictions

Unnamed: 0,pred
0,0
1,1
2,1
3,0
4,1


In [37]:
pd.concat([features, predictions])

Unnamed: 0,A,B,pred
0,100.0,12.0,
1,200.0,13.0,
2,300.0,14.0,
3,400.0,15.0,
4,500.0,16.0,
0,,,0.0
1,,,1.0
2,,,1.0
3,,,0.0
4,,,1.0


In [38]:
pd.concat([features, predictions], axis = 1)

Unnamed: 0,A,B,pred
0,100,12,0
1,200,13,1
2,300,14,1
3,400,15,0
4,500,16,1


In [39]:
df_one['k1']

0    A
1    A
2    B
3    B
4    C
5    C
Name: k1, dtype: object

In [40]:
pd.get_dummies(df_one['k1'])

Unnamed: 0,A,B,C
0,1,0,0
1,1,0,0
2,0,1,0
3,0,1,0
4,0,0,1
5,0,0,1
