In [1]:
import pandas as pd

In [2]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
df = pd.DataFrame(people)

In [4]:
# df


In [5]:
# pd.set_option('display.max_rows', 1)

In [6]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [7]:
df.iloc[[0, 1], [2, 1]]

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [8]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

## UPDATING ROWS AND COLUMNS

In [9]:
df.columns = ['first_name', 'last_name', 'email']

In [10]:
df.columns = [x.lower() for x in df.columns]

In [11]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
df.columns = df.columns.str.replace(' ', '_')

In [13]:
df['first_name'] = ['Corey', 'Jane', 'John']
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [14]:
df.rename(columns={'first_name': 'first'}, inplace=True)

In [15]:
# df.replace('_', ' ')

In [16]:
df.loc[2, 'first'] = 'Smith'
df

Unnamed: 0,first,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Smith,Doe,JohnDoe@email.com


In [17]:
df.loc[2] = ['John', 'Doe', 'JohnDoe@email.com']

In [18]:
# df 

In [19]:
df['email'] = df['email'].str.lower()

In [20]:
df

Unnamed: 0,first,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [21]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [22]:
def update_email(e):
    return e.upper()

In [23]:
df['email'] = df['email'].apply(update_email)

In [24]:
df

Unnamed: 0,first,last_name,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Doe,JOHNDOE@EMAIL.COM


In [25]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [26]:
df

Unnamed: 0,first,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [27]:
df.apply(len)

first        3
last_name    3
email        3
dtype: int64

In [28]:
len(df['email'])

3

In [29]:
 df.apply(pd.Series.min)

first                          Corey
last_name                        Doe
email        coreymschafer@gmail.com
dtype: object

In [30]:
df.applymap(len)

Unnamed: 0,first,last_name,email
0,5,7,23
1,4,3,17
2,4,3,17


In [31]:
df.applymap(str.lower)

Unnamed: 0,first,last_name,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


In [32]:
df

Unnamed: 0,first,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [33]:
df['first'].map({'Corey': 'ccc', 'Jane': 'jjj'})
df['first'].replace({'Corey': 'ccc', 'Jane': 'jjj'})

0     ccc
1     jjj
2    John
Name: first, dtype: object

In [34]:
df

Unnamed: 0,first,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


## ADDING AND REMOVING ROW, COLUMN, DATA

In [35]:
# we are adding a new column and assigning a value 
df['full_name'] = df['first'] + ' ' + df['last_name']
# df

In [36]:
# we can DROP COLUMNS
df.drop(columns=['first', 'last_name']) # add inplace=True to make it permanent

Unnamed: 0,email,full_name
0,coreymschafer@gmail.com,Corey Schafer
1,janedoe@email.com,Jane Doe
2,johndoe@email.com,John Doe


In [37]:
df.drop(index=2) # for droping

# We can drop many rows by finding its index 
ix = df[df['full_name'] == 'Jane Doe'].index
# df.drop(index=ix)

Int64Index([1], dtype='int64')


In [38]:
# we can return it back or reformat the full name in to first and last
# DO NOT FORGET expand=True
df[['first', 'last_name']] = df['full_name'].str.split(' ', expand=True)

In [39]:
df

Unnamed: 0,first,last_name,email,full_name
0,Corey,Schafer,coreymschafer@gmail.com,Corey Schafer
1,Jane,Doe,janedoe@email.com,Jane Doe
2,John,Doe,johndoe@email.com,John Doe


In [40]:
df.append({'first': 'Baba', 'email': 'b@gmail.com'}, ignore_index=True)

Unnamed: 0,first,last_name,email,full_name
0,Corey,Schafer,coreymschafer@gmail.com,Corey Schafer
1,Jane,Doe,janedoe@email.com,Jane Doe
2,John,Doe,johndoe@email.com,John Doe
3,Baba,,b@gmail.com,


In [41]:
people = {
    "first": ["AAA", 'BBB',], 
    "last_name": ["aaa", 'zzz',], 
    "email": ["aaa@gmail.com", 'bbb@email.com',]
}

In [42]:
df2 = pd.DataFrame(people)

In [43]:
# we need to assign it to df to make it permanent b/c here we do not have inplace=True
df.append(df2, ignore_index=True)
df = df.append(df2, ignore_index=True)

In [44]:
df

Unnamed: 0,first,last_name,email,full_name
0,Corey,Schafer,coreymschafer@gmail.com,Corey Schafer
1,Jane,Doe,janedoe@email.com,Jane Doe
2,John,Doe,johndoe@email.com,John Doe
3,AAA,aaa,aaa@gmail.com,
4,BBB,zzz,bbb@email.com,


## SORTING

In [51]:
df.sort_values(by=['first', 'full_name'], ascending=[False, True])

Unnamed: 0,first,last_name,email,full_name
2,John,Doe,johndoe@email.com,John Doe
1,Jane,Doe,janedoe@email.com,Jane Doe
0,Corey,Schafer,coreymschafer@gmail.com,Corey Schafer
4,BBB,zzz,bbb@email.com,
3,AAA,aaa,aaa@gmail.com,
