In [1]:
person = {
"first": "Corey",
"last": "Schafer",
"email": "CoreyMSchafer@gmail.com"
}

In [2]:
people = {
    "first": ["Corey"],
    "last": ["Schafer"],
    "email": ["CoreyMSchafer@gmail.com"]
}

In [7]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com"]
}

# The keys are the rows and the values are the columns

In [8]:
people["email"]

['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']

In [9]:
import pandas as pd

In [10]:
df = pd.DataFrame(people)

In [11]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
df["email"]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [13]:
type(df["email"])

pandas.core.series.Series

In [14]:
# A series is basically a list of data but just like with a dataframe, 
# it has more functionality than just that. You can think of a dataframe
# as being rows and columns and a series as being rows of a single column.
# A dataframe is basically a container for multiple of these series object.

In [15]:
# Accessing a single column of a datafame is like accessing a key 
# in a dictionary, and we can also use dot notation

df.email

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [16]:
# In order to access multuple columns, we can use the bracket notation
# and pass in the list of the column that we want
# Note: That a Series is basically a single columns of rows

df[["last", "email"]]

Unnamed: 0,last,email
0,Schafer,CoreyMSchafer@gmail.com
1,Doe,JaneDoe@email.com
2,Doe,JohnDoe@email.com


In [17]:
# If you have a lot of columns and you want to see them easily

df.columns

Index(['first', 'last', 'email'], dtype='object')

In [18]:
# In order to get rows...
# loc: allows us to access rows by label
# iloc: allows us to access rows by integer location and it uses 
# integers only

# To get the first row
df.iloc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [19]:
# We can select multiple rows by passing in a list of integers

# If we want the first and second row
df.iloc[[0, 1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [20]:
# If we want to grab the email address of the first two rows 
# then we can grab the column at index 2

df.iloc[[0, 1], 2]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [21]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [22]:
# 0 is the label for the first row, so accessing it will be:

df.loc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [23]:
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [24]:
# with loc we use values-strings instead of integers to pass 
# in a second value as an indexer

df.loc[[0, 1], "email"]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [25]:
# We can also pass in a list for the columns as well, so if we want 
# the last name and email for these rows...

df.loc[[0, 1], ["email", "last"]]

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [26]:
# INDEXES

In [27]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [28]:
df["email"]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [29]:
# to set email addresses as the index for this dataframe temporarily

df.set_index("email")

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [30]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [32]:
# to set email addresses as the index for this dataframe permanently

df.set_index("email", inplace=True)

In [33]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [34]:
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [36]:
# the email address as an index gives a nice unique identifier
# for our row 

# Now the 'loc' label is not an index but an email
df.loc["CoreyMSchafer@gmail.com"]

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [37]:
# we can still pass in values for the specific columns as well

df.loc["CoreyMSchafer@gmail.com", "last"]

'Schafer'

In [39]:
df.iloc[0]

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [69]:
# To reset the index

df.reset_index(inplace=True)
df

Unnamed: 0,index,first,last,email
0,0,CoreyMSchafer@gmail.com,Corey,Schafer
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


In [41]:
# PART 4: FILTER ROWS & COLUMNS

In [42]:
df['last'] == "Doe"

0    False
1     True
2     True
Name: last, dtype: bool

In [43]:
# putting parenthesis doesn't change anything

filt = (df["last"] == "Doe")

In [44]:
# applying the filter to the dataframe

df[filt]

Unnamed: 0,email,first,last
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [45]:
# We can get the same results by doing

df.loc[filt]

Unnamed: 0,email,first,last
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [46]:
# '.loc[]' doesn't just look up rows and columns by label,
# if you pass in a series of booleans, like we did above,
# it can also filter data out and it's very good cause we 
# can still grab specific columns that we want as well

# If we want the email column...
df.loc[filt, "email"]

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [47]:
# Symbols for pandas filters '&' and '|', they are using 
# JavaScript conventions o

filt = (df["last"] == "Doe") & (df["first"]  == "John")

In [49]:
df.loc[filt, "email"]

2    JohnDoe@email.com
Name: email, dtype: object

In [52]:
filt = (df["last"] == "Schafer") | (df["first"]  == "John")

In [53]:
df.loc[filt, "email"]

0    CoreyMSchafer@gmail.com
2          JohnDoe@email.com
Name: email, dtype: object

In [54]:
# Get the complete opposite of the filter where we want all 
# of the rows where the last name isn't Schafer and the first
# name isn't John

df.loc[-filt, "email"]

1    JaneDoe@email.com
Name: email, dtype: object

In [55]:
# PART 5: UPDATE DATA WITHIN ROWS AND COLUMNS

In [56]:
df.columns

Index(['email', 'first', 'last'], dtype='object')

In [57]:
# Rename all columns by using an assignment

df.columns = ["first_name", "last_name", "email"]

In [58]:
df

Unnamed: 0,first_name,last_name,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [59]:
# Uppercase all of the column names by using list comprehension

df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [60]:
df.columns = df.columns.str.replace("_", " ")
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [61]:
df.columns = df.columns.str.replace(" ", "_")
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [63]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [64]:
# renaming some columns
# The key is going to be the old value of the column while the 
# value is going to be the new value of the column

df.rename(columns={"first_name": "first", "last_name": "last"})

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [66]:
# to make the changes go through

df.rename(columns={"first_name": "first", "last_name": "last"}, inplace=True)

In [67]:
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [70]:
# how to update a single value in a row

df.loc[2]

index                    2
first    JohnDoe@email.com
last                  John
email                  Doe
Name: 2, dtype: object

In [71]:
df

Unnamed: 0,index,first,last,email
0,0,CoreyMSchafer@gmail.com,Corey,Schafer
1,1,JaneDoe@email.com,Jane,Doe
2,2,JohnDoe@email.com,John,Doe


In [72]:
df.columns

Index(['index', 'first', 'last', 'email'], dtype='object')

In [73]:
df.drop('index', axis=1, inplace=True)

In [74]:
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [75]:
df.columns = ["email", "first", "last"]
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [77]:
df.loc[2] = ["JohnSmith@email.com", "John", "Smith"]


In [78]:
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnSmith@email.com,John,Smith


In [79]:
# If we had a lot of columns but we wanted to change just 
# a couple of values 

df.loc[2, ["last", "email"]] = ["Doe", "JohnDoe@email.com"]

In [80]:
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [81]:
df.loc[2, "last"] = "Smith"

In [82]:
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Smith


In [84]:
# Pandas has an indexer called 'at', and this is specifically meant
# for changing or looking up a single value but '.loc[]' can be used
# most of the time

df.at[2, "last"] = "Doe"
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [89]:
# an error that can occur when changing values wrongly

filt = (df["email"] == "JohnDoe@email.com")
df[filt]["last"] = "Smith"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [102]:
# the right way to change values 

filt = (df["email"] == "JohnDoe@email.com")
df.loc[filt, "last"] = "Smith"

df

Unnamed: 0,email,first,last
0,coreymschafer@gmail.com,Corey,Schafer
1,janedoe@gmail.com,Jane,Doe
2,johndoe@gmail.com,John,Smith


In [103]:
# Updating multiple rows of data at once, maybe change all email addresses
# to lower case

df["email"] = df["email"].str.lower()
df

Unnamed: 0,email,first,last
0,coreymschafer@gmail.com,Corey,Schafer
1,janedoe@gmail.com,Jane,Doe
2,johndoe@gmail.com,John,Smith


In [100]:
df.loc[2, "email"] = "JohnDoe@gmail.com"
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Smith


In [104]:
# 4 advanced way of updating multiple rows of data
# apply
# map
# applymap
# replace 

In [105]:
# apply -> can apply a function to every value in our series
# say we want to see the length of our email addresses cause 
# we have a website and we want to make sure none of the 
# email addresses are too long 

df["email"].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [106]:
# we can also use the 'apply' method to update values as well
# e.g create a return function that returns the uppercase value 
# of our email

def update_email(email):
    return email.upper()

In [107]:
df["email"].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@GMAIL.COM
2          JOHNDOE@GMAIL.COM
Name: email, dtype: object

In [108]:
df["email"] = df["email"].apply(update_email)
df

Unnamed: 0,email,first,last
0,COREYMSCHAFER@GMAIL.COM,Corey,Schafer
1,JANEDOE@GMAIL.COM,Jane,Doe
2,JOHNDOE@GMAIL.COM,John,Smith


In [109]:
df["email"] = df["email"].apply(lambda x: x.lower())
df

Unnamed: 0,email,first,last
0,coreymschafer@gmail.com,Corey,Schafer
1,janedoe@gmail.com,Jane,Doe
2,johndoe@gmail.com,John,Smith


In [110]:
# run 'apply' on a dataframe runs a function on each row and 
# column of the dataframe

df["email"].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [111]:
df.apply(len)

email    3
first    3
last     3
dtype: int64

In [114]:
len(df['email'])

3

In [116]:
df.apply(len, axis="columns")

0    3
1    3
2    3
dtype: int64

In [117]:
# to see the minimum value alphabetically in each series

df.apply(pd.Series.min)

email    coreymschafer@gmail.com
first                      Corey
last                         Doe
dtype: object

In [118]:
# to see the minimum value alphabetically in each series using lambda

df.apply(lambda x: x.min())

email    coreymschafer@gmail.com
first                      Corey
last                         Doe
dtype: object

In [119]:
# running apply on a series runs applies a function to every value
# in the series and running apply to a dataframe applies a function
# to every series in the dataframe

In [120]:
# we use the 'applymap' to apply a function to every individual
# element in the dataframe. It only works on dataframes, series
# objects don't have the applymap method 

# for example
df.applymap(len)

Unnamed: 0,email,first,last
0,23,5,7
1,17,4,3
2,17,4,5


In [123]:
df.applymap(str.title)

Unnamed: 0,email,first,last
0,Coreymschafer@Gmail.Com,Corey,Schafer
1,Janedoe@Gmail.Com,Jane,Doe
2,Johndoe@Gmail.Com,John,Smith


In [124]:
df.applymap(str.lower)

Unnamed: 0,email,first,last
0,coreymschafer@gmail.com,corey,schafer
1,janedoe@gmail.com,jane,doe
2,johndoe@gmail.com,john,smith


In [127]:
# the map method only works for series and it's used for substituting
# each value in a series with another value 

# say we want to substitute a couple of our first names
# map converts numbers that are not substituted automatically 
# to NaN but the replace method doesn't

df["first"].map({"Corey": "Chris", "Jane": "Mary"})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [128]:
df["first"].replace({"Corey": "Chris", "Jane": "Mary"})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [129]:
df["first"] = df["first"].replace({"Corey": "Chris", "Jane": "Mary"})
df

Unnamed: 0,email,first,last
0,coreymschafer@gmail.com,Chris,Schafer
1,janedoe@gmail.com,Mary,Doe
2,johndoe@gmail.com,John,Smith


In [130]:
# ADD AND REMOVE COLUMNS


In [132]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [134]:
df["first"] + " " + df["last"]

0    Corey Schafer
1         Jane Doe
2         John Doe
dtype: object

In [135]:
# adding a column

df["full_name"] = df["first"] + " " + df["last"]
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe


In [136]:
# removing a column

df.drop(columns=["first", "last"])

Unnamed: 0,email,full_name
0,CoreyMSchafer@gmail.com,Corey Schafer
1,JaneDoe@email.com,Jane Doe
2,JohnDoe@email.com,John Doe


In [137]:
# if we are happy with the changes, we can set the inplace to true

df.drop(columns=["first", "last"], inplace=True)

In [138]:
df

Unnamed: 0,email,full_name
0,CoreyMSchafer@gmail.com,Corey Schafer
1,JaneDoe@email.com,Jane Doe
2,JohnDoe@email.com,John Doe


In [139]:
# spliting the full name column into two different columns

df["full_name"].str.split(" ")

0    [Corey, Schafer]
1         [Jane, Doe]
2         [John, Doe]
Name: full_name, dtype: object

In [140]:
df["full_name"].str.split(" ", expand=True)

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [143]:
df[["first", "last"]] = df["full_name"].str.split(" ", expand=True)
df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe


In [144]:
# Adding and removing rows of data

# First, We might just want to add a single row to our dataframe 
# of new data and 

# Second, maybe we want to combine two dataframes together into 
# a single dataframe by appending the rows of one into another

In [145]:
# Adding a single row of data using the append method

df.append({"first": "Tony"}, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,,,Tony,


In [149]:
people = {
    "first": ["Tony", "Steve"],
    "last": ["Stark", "Rogers"],
    "email": ["IronMan@avenge.com", "Cap@avenge.com"]
}

df2 = pd.DataFrame(people)

df2

Unnamed: 0,first,last,email
0,Tony,Stark,IronMan@avenge.com
1,Steve,Rogers,Cap@avenge.com


In [150]:
df.append(df2, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,IronMan@avenge.com,,Tony,Stark
4,Cap@avenge.com,,Steve,Rogers


In [151]:
# 'sort=False' was for the previous version of pandas, but in 
# the recent version of pandas, it wasn't needed anymore, that
# was why the code before this one ran smoothly

df.append(df2, ignore_index=True, sort=False)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,IronMan@avenge.com,,Tony,Stark
4,Cap@avenge.com,,Steve,Rogers


In [153]:
df = df.append(df2, ignore_index=True, sort=False)

In [154]:
df.loc[4]

email        Cap@avenge.com
full_name               NaN
first                 Steve
last                 Rogers
Name: 4, dtype: object

In [155]:
df.loc[4, "full_name"] = "Steve Rogers"

In [156]:
df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,IronMan@avenge.com,,Tony,Stark
4,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [157]:
# removing the rows by dropping the index, we'd need to set the
# inplace to true to make it permanent

df.drop(index=4)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,IronMan@avenge.com,,Tony,Stark


In [158]:
# drop all of the rows where the last name is equal to Doe

filt = df["last"] == "Doe"
df.drop(index=df[filt].index)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
3,IronMan@avenge.com,,Tony,Stark
4,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [159]:
# PART 7: SORTING DATA

In [177]:
people = {
    "first": ["Corey", "Jane", "John", "Adam"],
    "last": ["Schafer", "Doe", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com", "A@email.com"]
}


In [178]:
import pandas as pd

In [179]:
df = pd.DataFrame(people)

In [180]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,A@email.com


In [181]:
# sorting the dataframe by its last name

df.sort_values(by="last")

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,A@email.com
0,Corey,Schafer,CoreyMSchafer@gmail.com


In [182]:
# sorting in descending order

df.sort_values(by="last", ascending=False)

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,A@email.com


In [186]:
# sometimes sorting might be complicated, maybe you
# want to sort on multiple columns, now you do this
# when the first value that you sort on has identical
# values and then you want to sort on a second value,
# in order to do this, we can just pass in a list for
# these columns that we want to sort on



# sort the last name first, then sort the first name

# after the last name has been sorted, what will need
# sorting next will be if there are same names as last
# names, so that's what will end up being sorted

df.sort_values(by=["last", "first"], ascending=False)

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
2,John,Doe,JohnDoe@email.com
1,Jane,Doe,JaneDoe@email.com
3,Adam,Doe,A@email.com


In [188]:
# if we want to sort the dataframe by last name in descending order,
# then we want the first names to be in ascending order

df.sort_values(by=["last", "first"], ascending=[False, True], inplace=True)

In [189]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
3,Adam,Doe,A@email.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [190]:
# if we want to set it back to have our index reordered/resorted

df.sort_index()

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,A@email.com


In [191]:
# if we want to see the sorted last names and not the entire dataframes

df["last"].sort_values()

3        Doe
1        Doe
2        Doe
0    Schafer
Name: last, dtype: object