In [None]:
import pandas as pd

In [None]:
bond = pd.read_csv("jamesbond.csv")
bond.head()

# The `set_index()` and `reset_index()` methods

In [None]:
bond.set_index(keys="Film", inplace=True)
bond.head()

In [None]:
bond.reset_index(inplace=True)
bond.head()

In [None]:
bond.set_index("Film", inplace=True)
bond.head()

If we want to set a new index, first we need to reset_index to previous state.

In [None]:
bond.reset_index(inplace=True)
bond.set_index("Year", inplace=True)
bond.head()

# Retrieve Rows by Index Label with `.loc()` Accessor

In [None]:
# use film as index
bond = pd.read_csv("jamesbond.csv", index_col="Film")
# sort index for optimization
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Goldfinger"]

In [None]:
bond.loc["Casino Royale"]

In [None]:
bond.loc["Diamonds Are Forever":"From Russia with Love"]

In [None]:
# in steps of two
bond.loc["Diamonds Are Forever":"From Russia with Love":2]

In [None]:
bond.loc["GoldenEye":]

In [None]:
bond.loc[["Die Another Day", "Octopussy"]]

In [None]:
"Gold Bond" in bond.index
# bond.loc[["Die Another Day", "Octopussy", "Gold Bond"]]

# Retrieve Rows by Index Position with `.iloc()` Accessor

In [None]:
bond = pd.read_csv("jamesbond.csv")
bond.head()

In [None]:
bond.iloc[1]

In [None]:
bond.iloc[1:5]  # the upper bond is not included

In [None]:
bond.iloc[[1, 3]]

In [None]:
bond.set_index("Film", inplace=True)
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Casino Royale"]

In [None]:
bond.iloc[0]

# Second Arguments to `.loc()` and `.iloc()` Accesors

The first argument represents the rows whereas the second argument represents the columns.

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Moonraker", "Actor"]

In [None]:
bond.loc["Casino Royale", "Director"]

In [None]:
bond.loc["Goldfinger", ["Actor", "Director"]]

In [None]:
bond.loc[["Moonraker", "A View to a Kill"], ["Actor", "Director"]]

In [None]:
bond.loc["Moonraker", "Director":"Budget"]

In [None]:
bond.loc["Moonraker":"Thunderball", "Director":"Budget"]

In [None]:
bond.loc["Moonraker":, "Director":]

In [None]:
bond.head()

In [None]:
bond.iloc[0, 2]

In [None]:
bond.iloc[1, 0:3]

# Retrieve Rows by Index Label with `.loc()` Accessor

In [None]:
# use film as index
bond = pd.read_csv("jamesbond.csv", index_col="Film")
# sort index for optimization
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Goldfinger"]

In [None]:
bond.loc["Casino Royale"]

In [None]:
bond.loc["Diamonds Are Forever":"From Russia with Love"]

In [None]:
# in steps of two
bond.loc["Diamonds Are Forever":"From Russia with Love":2]

In [None]:
bond.loc["GoldenEye":]

In [None]:
bond.loc[["Die Another Day", "Octopussy"]]

In [None]:
"Octopussy" in bond.index
"Gold Bond" in bond.index
# bond.loc[["Die Another Day", "Octopussy", "Gold Bond"]]

# Retrieve Rows by Index Position with `.iloc()` Accessor

In [None]:
bond = pd.read_csv("jamesbond.csv")
bond.head()

In [None]:
bond.iloc[1]

In [None]:
bond.iloc[1:5]  # the upper bond is not included

In [None]:
bond.iloc[[1, 3]]

In [None]:
bond.set_index("Film", inplace=True)
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Casino Royale"]

In [None]:
bond.iloc[0]

# Second Arguments to `.loc()` and `.iloc()` Accesors

The first argument represents the rows whereas the second argument represents the columns.

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Moonraker", "Actor"]

In [None]:
bond.loc["Casino Royale", "Director"]

In [None]:
bond.loc["Goldfinger", ["Actor", "Director"]]

In [None]:
bond.loc[["Moonraker", "A View to a Kill"], ["Actor", "Director"]]

In [None]:
bond.loc["Moonraker", "Director":"Budget"]

In [None]:
bond.loc["Moonraker":"Thunderball", "Director":"Budget"]

In [None]:
bond.loc["Moonraker":, "Director":]

In [None]:
bond.head()

In [None]:
bond.iloc[0, 2]

In [None]:
bond.iloc[1, 0:3]

# Set New Value for a Specific Cell

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.loc["Dr. No", "Actor"] = "Sir Sean Connery"

In [None]:
bond.loc["Dr. No"]

In [None]:
bond.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] = [
    448800000,
    7000000,
    600000,
]

In [None]:
bond.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]]

# Set Multiple Values in DataFrame

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
# I want to change Sean Connery to Sir Sean Connery
actor_is_sean_connery = bond["Actor"] == "Sean Connery"

In [None]:
bond.loc[actor_is_sean_connery, "Actor"] = "Sir Sean Connery"

In [None]:
bond

# Rename Index Labels or Column Names in a `DataFrame`

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)

In [None]:
bond.rename(index={"GoldenEye": "Golden Eye"})

In [None]:
bond.rename(columns={"Year": "Release Date", "Box Office": "Revenue"})

In [None]:
bond.columns

In [None]:
bond.columns = [
    "Release Date",
    "Actor",
    "Director",
    "Revenue",
    "Budget",
    "Bond Actor Salary",
]

In [None]:
bond.head()

# Delete Rows or Columns from a `Dataframe`

## `drop()` method
### Removing Rows

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
bond.drop("A View to a Kill")

In [None]:
bond.drop(["A View to a Kill", "Diamonds Are Forever", "Die Another Day"])

In [None]:
bond.drop("Casino Royale")

### Removing columns

In [None]:
bond.drop(["Box Office", "Budget"], axis=1)  # or axis="columns"

## The `pop()` method

In [None]:
actor = bond.pop("Actor")  # pops change inplace the dataframe
actor

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
del bond["Actor"]  # delete the "Actor" column

In [None]:
bond.head()

# Create a Random Sample with `.random()` method

In [None]:
bond.sample(n=3, axis=0)

In [None]:
bond.sample(n=3, axis=1)

# The `.largest()` and `.smallest()` Methods

In [None]:
bond.head()

In [None]:
# extract the three movies with the highest box office
bond.nlargest(n=3, columns="Box Office")

In [None]:
# extract the three movies with the lowest box office
bond.nsmallest(n=3, columns="Box Office")

In [None]:
# another way
bond["Box Office"].nlargest(3)

# Filtering with the `where()` method

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)

In [None]:
mask1 = bond["Actor"] == "Sean Connery"
bond[mask1]

In [None]:
bond.where(mask1)

In [None]:
mask2 = bond["Box Office"] > 800
bond.where(mask2)

In [None]:
bond.where(mask1 & mask2)

# The `query()` method

In [None]:
bond.head()

This method only works if there is no spaces in the column names. So firstly we have to change the blank spaces for underscores.

In [None]:
bond.columns = [column_name.replace(" ", "_") for column_name in bond.columns]
bond.head()

In [None]:
bond.query('Actor == "Sean Connery"')

In [None]:
bond.query('Director == "Terence Young"')

In [None]:
bond.query('Actor != "Sean Connery"')

In [None]:
bond.query("Box_Office > 800")

In [None]:
bond.query("Actor == 'Daniel Craig' and Director == 'Martin Campbell'")

In [None]:
bond.query("Actor == 'Daniel Craig' or Director == 'Martin Campbell'")

In [None]:
bond.query("Actor in ['Timothy Dalton', 'Daniel Craig', 'George Lazenby']")

In [None]:
bond.query("Actor not in ['Timothy Dalton', 'Daniel Craig', 'George Lazenby']")

# A Review of the `apply()` Method in Singles Columns

In [None]:
bond.head(3)

In [None]:
def convert_to_string_and_add_millions(num):
    return f"{str(num)} MILLIONS!"

In [None]:
bond["Box_Office"].apply(convert_to_string_and_add_millions)

In [None]:
columns = ["Box_Office", "Budget", "Bond_Actor_Salary"]

for column in columns:
    print(bond[column].apply(convert_to_string_and_add_millions))

# Apply a Function to Every DataFrame Row with the `apply` Method

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
def good_movie(row):
    actor = row[1]
    budget = row[4]

    if actor == "Daniel Craig":
        return "The best"
    elif actor == "Sean Connery" and budget >= 40:
        return "Enjoyable"
    else:
        return "I have no clue"

In [None]:
bond["Review"] = bond.apply(good_movie, axis="columns")

In [None]:
bond

# The `copy()` Method

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col="Film")
bond.sort_index(inplace=True)
bond.head()

In [None]:
directors = bond["Director"].copy()
directors.head(3)

In [None]:
directors["A View to a Kill"] = "Mister John Glen"

In [None]:
directors.head()

In [None]:
bond.head()