In [None]:
import pandas as pd

In [None]:
nba = pd.read_csv("nba.csv")

In [None]:
nba.head(10)

In [None]:
nba.tail()

In [None]:
nba.index

In [None]:
nba.values

In [None]:
nba.shape

In [None]:
nba.dtypes

In [None]:
nba.dtypes.value_counts()  # counting the number of columns of a data type

# Dataframe exclusive attributes

In [None]:
nba.columns

In [None]:
nba.axes

In [None]:
nba.info()

## Differences between shared methods

In [None]:
rev = pd.read_csv("revenue.csv", index_col="Date")
rev.head(3)

In [None]:
s = pd.Series([1, 2, 3, 4])
s

In [None]:
s.sum()

In [None]:
# in dataframes we can sum the values in rows or in columns
# both forms are equivalent
rev.sum(axis=1)
rev.sum(axis="columns")

In [None]:
rev.sum(axis=0)
rev.sum(axis="index")

# Select One Column from a `DataFrame`

In [None]:
nba.head()

## First option: `.` syntax

In [None]:
nba.Name

When we extract a single column from a DF we get a Series

In [None]:
type(nba.Name)

## Second option: *brackets* syntax
This method is safer.

In [None]:
nba["Name"]

# Selecting two or more columns

In [None]:
nba.head()

In [None]:
nba[["Name", "Team"]]  # put to pairs of square brackets.

In [None]:
# using variable
antropo = ["Age", "Height", "Weight"]
nba[antropo]

# Add a new column

In [None]:
nba["Sport"] = "Basketball"
nba.head()

In [None]:
nba["League"] = "National Basketball Association"
nba.head()

In [None]:
nba = pd.read_csv("nba.csv")
nba.head()

In [None]:
nba.insert(loc=3, column="Sports", value="Basketball")
nba.head()

In [None]:
nba.head(3)

# Broadcasting Operations

In [None]:
nba["Age"].add(5)  # adding with function

In [None]:
nba["Age"] + 5  # adding with `plus` operator

In [None]:
# convert pounds to kilograms
nba["Weight in Kilograms"] = nba["Weight"] * 0.453592

In [None]:
nba.head()

In [None]:
# express salary in millios
nba["Salary (millions $)"] = nba["Salary"] / 1e06

In [None]:
nba.head(3)

# Review `value_counts()`

This method only works on series.

In [None]:
nba["Team"].value_counts()

In [None]:
nba["Position"].value_counts(ascending=True)

# Drop cells with null values

In [None]:
nba = pd.read_csv("nba.csv")
nba.shape

In [None]:
nba.dropna(how="any").tail()  # this method drops a row, by default, with ANY null value

In [None]:
nba.dropna(how="all", inplace=True)  # remove a row if all columns have null value

In [None]:
nba.tail()

In [None]:
nba.dropna(axis=1)  # remove columns

In [None]:
nba.dropna(
    subset=["Salary", "College"]
)  # remove row if College or Salary column is NaN

# Fill NaN values with `fillna()` method

In [None]:
nba = pd.read_csv("nba.csv")
nba.head()

In [None]:
nba.fillna(value=0)

In [None]:
nba["Salary"].fillna(value=0, inplace=True)
nba.head()

In [None]:
nba["College"].fillna(value="No college", inplace=True)
nba.head()

# The `astype()` method

In [None]:
nba = pd.read_csv("nba.csv").dropna(how="all")
nba["Salary"].fillna(value=0, inplace=True)
nba["College"].fillna(value="None", inplace=True)
nba.head(6)

In [None]:
nba.dtypes

In [None]:
nba.info()

In [None]:
# Convert Salary to int
nba["Salary"] = nba["Salary"].astype(dtype="int", copy=True, errors="raise")

In [None]:
nba["Number"] = nba["Number"].astype(dtype="int", copy=True, errors="raise")

In [None]:
nba["Age"] = nba["Age"].astype(dtype="int", copy=True, errors="raise")

In [None]:
nba["Weight"] = nba["Weight"].astype(dtype="int", copy=True, errors="raise")

In [None]:
# The position only have 5 possible values so it is ideal to cast to category
nba["Position"].value_counts()

In [None]:
nba["Position"] = nba["Position"].astype(dtype="category")
nba["Team"] = nba["Team"].astype(dtype="category")

In [None]:
nba.info()

# Sort a Dataframe with the `sort_values()` method - Part 1 

In [None]:
nba = pd.read_csv("nba.csv")
nba.head()

In [None]:
nba.sort_values(by="Name")  # we have to choose the column to sort values.
nba.head()
# by default sort_values places the NaN values at the very last
# this can be changed using the first parameter.

# Sort a Dataframe with the `sort_values()` method - Part 1 

In [None]:
# sort by team (ascending order), then by player name (descending order),
nba.sort_values(by=["Team", "Name"], ascending=[True, False]).head(10)

# The `.sort_index()` method

In [None]:
nba.sort_values(by=["Team", "Name"], ascending=[True, False], inplace=True)

In [None]:
nba.tail()  # idexes are in random order

In [None]:
## revert the original order with sort_index()
nba.sort_index(inplace=True)
nba.head()

# Rank values with the `rank()` method

In [None]:
nba = pd.read_csv("nba.csv").dropna(how="all")  # remove row with all NaN values
nba["Salary"] = (
    nba["Salary"].fillna(value=0).astype("int")
)  # substitute the NaN values in this column
nba.head()

In [None]:
nba["Salary_Rank"] = nba["Salary"].rank(ascending=False).astype("int")
nba.head()

In [None]:
nba.sort_values(by=["Salary"], ascending=False)