In [3]:
import pandas as pd

In [4]:
people = {
    "first": ["corey", 'jane', 'John'],
    "last": ["schafer", 'Doe', 'Doe'],
    "email":["corey@gmail", "john@gmail", "jane@gmail"]
    
}

In [6]:
df = pd.DataFrame(people)

In [7]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,John,Doe,jane@gmail


In [8]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [16]:
df.columns = ['first_name', 'last_name', 'email']

In [10]:
df

Unnamed: 0,first_name,last_name,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,John,Doe,jane@gmail


In [17]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [20]:
df.columns = [x.lower() for x in df.columns] # list comprehension to change them to upper case

In [21]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [22]:
# when to use only some columns

In [24]:
df.rename(columns= {'first_name': "first", "last_name":"last"}, inplace = True)

In [25]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,John,Doe,jane@gmail


In [27]:
df.loc[2] = ["john", "Smith", "johnsmith@gmail.com"]

In [28]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,john,Smith,johnsmith@gmail.com


In [29]:
df.loc[2, ['last', 'email']]

last                   Smith
email    johnsmith@gmail.com
Name: 2, dtype: object

In [30]:
df.loc[2, ['last', 'email']] = [ 'Doe', 'JohnDoe@gmail.com']

In [31]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,john,Doe,JohnDoe@gmail.com


In [32]:
df.at[2,'last'] = 'Doe'

In [33]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,john,Doe,JohnDoe@gmail.com


In [35]:
filt = (df['email']=='JohnDoe@gmail.com')
df[filt]

Unnamed: 0,first,last,email
2,john,Doe,JohnDoe@gmail.com


In [36]:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [37]:
df.loc[filt, 'last'] = 'Smith'

In [38]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,john,Smith,JohnDoe@gmail.com


In [39]:
# Apply, Map, Applymap, Replace

In [40]:
#Apply

In [41]:
df['email'].apply(len)

0    11
1    10
2    17
Name: email, dtype: int64

In [42]:
def updateemail(email):
    return email.upper()

In [43]:
df['email'].apply(updateemail)

0          COREY@GMAIL
1           JOHN@GMAIL
2    JOHNDOE@GMAIL.COM
Name: email, dtype: object

In [44]:
df['email'] = df['email'].apply(updateemail)

In [45]:
df

Unnamed: 0,first,last,email
0,corey,schafer,COREY@GMAIL
1,jane,Doe,JOHN@GMAIL
2,john,Smith,JOHNDOE@GMAIL.COM


In [46]:
df['email'] = df['email'].apply(lambda x:x.lower())

In [47]:
df

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail
1,jane,Doe,john@gmail
2,john,Smith,johndoe@gmail.com


In [49]:
type(df['email'])

pandas.core.series.Series

In [50]:
df['email'].apply(len)

0    11
1    10
2    17
Name: email, dtype: int64

In [51]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [54]:
df.apply(len, axis= 'columns')

0    3
1    3
2    3
dtype: int64

In [55]:
len(df['email'])

3

In [56]:
#to grab the min value, actually we can use on numerical value to make more sense
df.apply(pd.Series.min)

first          corey
last             Doe
email    corey@gmail
dtype: object

In [57]:
df.apply(lambda x:x.min()) # same result

first          corey
last             Doe
email    corey@gmail
dtype: object

In [59]:
#Apply map only works on dataframes whereas apply only works on series- each column or row
# Use applymap to apply a function for each and every element of the dataframe

In [60]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,11
1,4,3,10
2,4,5,17


In [64]:
#Map- only works on a series
df['first'].map({'corey':'Chris', 'jane':'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [67]:
# The above returns a NaN for the unreplaced, to really replace only a few-use replace method
df['first'].replace({'corey':'Chris', 'jane':'Mary'})

0    Chris
1     Mary
2     john
Name: first, dtype: object

In [69]:
df['first'] + ' '+ df['last']

0    corey schafer
1         jane Doe
2       john Smith
dtype: object

In [70]:
df['full_name']= df['first'] + ' '+ df['last']

In [71]:
df

Unnamed: 0,first,last,email,full_name
0,corey,schafer,corey@gmail,corey schafer
1,jane,Doe,john@gmail,jane Doe
2,john,Smith,johndoe@gmail.com,john Smith


In [72]:
df.drop(columns = ['first', 'last'])

Unnamed: 0,email,full_name
0,corey@gmail,corey schafer
1,john@gmail,jane Doe
2,johndoe@gmail.com,john Smith


In [73]:
df.drop(columns = ['first', 'last'], inplace=True)

In [74]:
df

Unnamed: 0,email,full_name
0,corey@gmail,corey schafer
1,john@gmail,jane Doe
2,johndoe@gmail.com,john Smith


In [76]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,corey,schafer
1,jane,Doe
2,john,Smith


In [77]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [78]:
df

Unnamed: 0,email,full_name,first,last
0,corey@gmail,corey schafer,corey,schafer
1,john@gmail,jane Doe,jane,Doe
2,johndoe@gmail.com,john Smith,john,Smith


In [79]:
#adding Row
df.append({'First':'Tony'})

TypeError: Can only append a Series if ignore_index=True or if the Series has a name

In [80]:
df.append({'First':'Tony'}, ignore_index=True)

Unnamed: 0,email,full_name,first,last,First
0,corey@gmail,corey schafer,corey,schafer,
1,john@gmail,jane Doe,jane,Doe,
2,johndoe@gmail.com,john Smith,john,Smith,
3,,,,,Tony


In [84]:
people = {
    "first": ["Tony", 'Steve'],
    "last": ["Stark", 'Rogers'],
    "email":["IronMan@gmail", "Cap@gmail"]
    
}
df2 = pd.DataFrame(people)

In [85]:
df2

Unnamed: 0,first,last,email
0,Tony,Stark,IronMan@gmail
1,Steve,Rogers,Cap@gmail


In [86]:
df.append(df2, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,corey@gmail,corey schafer,corey,schafer
1,john@gmail,jane Doe,jane,Doe
2,johndoe@gmail.com,john Smith,john,Smith
3,IronMan@gmail,,Tony,Stark
4,Cap@gmail,,Steve,Rogers


In [87]:
#to make in permenant
df = df.append(df2, ignore_index=True)

In [88]:
df

Unnamed: 0,email,full_name,first,last
0,corey@gmail,corey schafer,corey,schafer
1,john@gmail,jane Doe,jane,Doe
2,johndoe@gmail.com,john Smith,john,Smith
3,IronMan@gmail,,Tony,Stark
4,Cap@gmail,,Steve,Rogers


In [91]:
df.drop(index = 4)

Unnamed: 0,email,full_name,first,last
0,corey@gmail,corey schafer,corey,schafer
1,john@gmail,jane Doe,jane,Doe
2,johndoe@gmail.com,john Smith,john,Smith
3,IronMan@gmail,,Tony,Stark


In [92]:
df.drop(index = df[df['last'] == 'Doe'].index)

Unnamed: 0,email,full_name,first,last
0,corey@gmail,corey schafer,corey,schafer
2,johndoe@gmail.com,john Smith,john,Smith
3,IronMan@gmail,,Tony,Stark
4,Cap@gmail,,Steve,Rogers


In [96]:
#alternatively
filt = df['last'] == 'Doe'
df.drop(index = df[filt].index)

Unnamed: 0,email,full_name,first,last
0,corey@gmail,corey schafer,corey,schafer
2,johndoe@gmail.com,john Smith,john,Smith
3,IronMan@gmail,,Tony,Stark
4,Cap@gmail,,Steve,Rogers
