# Creating DataFrames

In [1]:
people = {
"first": ["Corey", 'Jane', 'John'],
"last": ["Schafer", 'Doe', 'Doe'],
"email": ["CoreyMSchafer@gmail.com", 'Jane Doe@email.com', 'JohnDoe@email.com']
}



In [2]:
import pandas as pd

In [3]:
df=pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [5]:
df['email']

0    CoreyMSchafer@gmail.com
1         Jane Doe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [6]:
type(df["email"])

pandas.core.series.Series

In [7]:
df[['email','last']]

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,Jane Doe@email.com,Doe
2,JohnDoe@email.com,Doe


In [8]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [9]:
df.iloc[[0,1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com


In [10]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [11]:
df.loc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [12]:
df.loc[[0,1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com


In [13]:
df.loc[[0,1],['first','email','last']]

Unnamed: 0,first,email,last
0,Corey,CoreyMSchafer@gmail.com,Schafer
1,Jane,Jane Doe@email.com,Doe


In [14]:
df['email']

0    CoreyMSchafer@gmail.com
1         Jane Doe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [15]:
df.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
Jane Doe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [16]:
df.set_index('first', inplace=True)

In [17]:
df

Unnamed: 0_level_0,last,email
first,Unnamed: 1_level_1,Unnamed: 2_level_1
Corey,Schafer,CoreyMSchafer@gmail.com
Jane,Doe,Jane Doe@email.com
John,Doe,JohnDoe@email.com


In [18]:
df.reset_index(inplace=True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


# Filtering in Data


In [19]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [20]:
 filt = (df['last'] == 'Schafer') | (df['first']=='John')
    

In [21]:
df.loc[-filt]

Unnamed: 0,first,last,email
1,Jane,Doe,Jane Doe@email.com


# Updating Rows AND Columns

In [22]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [23]:
df.loc[[0,1,2],['first','last','email']]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [24]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [25]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [26]:
df.columns=['first_name','last_name','email']

In [27]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [28]:
df.columns=(x.lower() for x in df.columns)
df


Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [29]:
df.columns = df.columns.str.replace(' ', '')
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,Jane Doe@email.com
2,John,Doe,JohnDoe@email.com


In [30]:
df.rename(columns={'firstname':'first', 'lastname':'last'},inplace=True)

In [31]:
df.loc[1]=['Jane','Doe','JaneDoe@email.com']

In [32]:
df.loc[2,['last','email']]=['Doe','JohnDoe@email.com']

In [33]:
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,CoreyMSchafer@gmail.com,
1,Jane,Doe,JaneDoe@email.com,
2,John,Doe,JohnDoe@email.com,Doe


In [34]:
df.loc[2,'last']='Smith'

In [35]:
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,CoreyMSchafer@gmail.com,
1,Jane,Doe,JaneDoe@email.com,
2,John,Doe,JohnDoe@email.com,Smith


In [36]:
df.at[2,'last']='Doe'
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,CoreyMSchafer@gmail.com,
1,Jane,Doe,JaneDoe@email.com,
2,John,Doe,JohnDoe@email.com,Doe


In [37]:
filt=(df['email']=='JohnDoe@email.com')
df[filt]['last']

2    Doe
Name: last, dtype: object

In [38]:
filt=(df['email']=='JohnDoe@email.com')
df.loc[filt,'last'] ='Smith'

In [39]:
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,CoreyMSchafer@gmail.com,
1,Jane,Doe,JaneDoe@email.com,
2,John,Doe,JohnDoe@email.com,Smith


In [40]:
df['email']=df['email'].str.lower()
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,coreymschafer@gmail.com,
1,Jane,Doe,janedoe@email.com,
2,John,Doe,johndoe@email.com,Smith


# Apply,Map,ApplyMap,replace

In [41]:
df['email'].apply(len) #How Apply works with series object and Map it only works for series Objects

0    23
1    17
2    17
Name: email, dtype: int64

In [42]:
def update_email(email):
    return email.upper()

In [43]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [44]:
df['email']=df['email'].apply(update_email)
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM,
1,Jane,Doe,JANEDOE@EMAIL.COM,
2,John,Doe,JOHNDOE@EMAIL.COM,Smith


In [45]:
df['email']=df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,coreymschafer@gmail.com,
1,Jane,Doe,janedoe@email.com,
2,John,Doe,johndoe@email.com,Smith


In [46]:
#How apply works on DataFrames

In [47]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [48]:
df.apply(len, axis='columns')

0    4
1    4
2    4
dtype: int64

In [53]:
len(df['email'])

3

In [54]:
#ApplyMap works only for DataFrames and it is used for every individual elements

In [59]:
df.applymap()

TypeError: the first argument must be callable

In [52]:
df.applymap(str.lower)

TypeError: descriptor 'lower' for 'str' objects doesn't apply to a 'float' object

In [56]:
#MAP only for series Object
df['first'].map({'Corey':'Chris','Jane':'Mary'})

KeyError: 'first'

In [57]:
#in case if you want to change only two element and not the other one then we use Replace
df['first']= df['first'].replace({'Corey':'Chris','Jane':'Mary'})


KeyError: 'first'

In [58]:

df

Unnamed: 0,first_name,last_name,email,last
0,Corey,Schafer,coreymschafer@gmail.com,
1,Jane,Doe,janedoe@email.com,
2,John,Doe,johndoe@email.com,Smith


# Add/Remove Rows and Columns