In [None]:
import pandas as pd

In [None]:
nba = pd.read_csv("data/nba.csv")

## Shared methods and Attributes

In [None]:
nba.head(3)
nba.tail(4)

In [None]:
nba.index

In [None]:
nba.values

In [None]:
nba.shape

In [None]:
# it returns new series
nba.dtypes

In [None]:
# array the represents column name
nba.columns

In [None]:
# combine axis that constitutes DataFrame
nba.axes

In [None]:
# summary of DataFrame
nba.info()

In [None]:
# no. of cols by data type
nba.get_dtype_counts()

## Difference between shared methods

In [None]:
rev = pd.read_csv("data/revenue.csv", index_col = "Date")
rev.head()

In [None]:
# returns new series
rev.sum()

# But we can sum in different ways, row wise or column wise which is not present in Series

In [None]:
# axis = 0,move along each row and sum every col
rev.sum(axis=0)

In [None]:
# axis = 1 or axis = 'columns', move along column and sum in each row
rev.sum(axis=1)

## Select one column from DataFrame

In [None]:
# Dot notation

# returns new series
nba.Name.head()

In [None]:
# bracket notation
nba["Salary"].tail()

## Selecting Two or more Columns from DataFrame

In [None]:
# returns new DataFrame

nba[["Name", "Salary"]].head()

In [None]:
column_name = ['Salary', 'Team', 'Name']
nba[column_name].head(3)

## Adding new column to DataFrame

In [None]:
# creates new column with each value to 'BasketBall'
nba["Sport"] = "BasketBall"
nba.head(3)

In [None]:
nba["League"] = "National BasketBall Association"
nba.tail()

In [None]:
# using insert method
nba = pd.read_csv("data/nba.csv")

nba.insert(3, column="Sport", value="Basketball")
nba.head(3)

In [None]:
nba.insert(7, column="League", value="National Basketball Association")
nba.tail()

In [None]:
nba.head()

## Broadcasting operations

In [None]:
nba = pd.read_csv("data/nba.csv")
nba.head()

In [None]:
nba["Age"].add(5).head()

In [None]:
# another shortcut
nba = pd.read_csv("data/nba.csv")
nba["Age"] + 5

# or for other maths function
nba["Salary"].sub(500000)
nba["Salary"] - 500000

In [None]:
# create new Series
nba["Weight"].mul(0.453592)
nba["Weight in Kgs"] = nba["Weight"].mul(0.453592)
nba.head()

## .value_counts() method

In [None]:
nba = pd.read_csv("data/nba.csv")
nba.head()

In [None]:
nba["Team"].value_counts(ascending=True).head(3)

## Drop Rows with null values

In [None]:
# drop rows with any NaN value
nba.dropna()

In [None]:
# it has parameter 'how' with defualt values = 'any'
nba.dropna(how='all')

In [None]:
# to drop permanently, use inplace = True
nba.dropna(how='all', inplace=True)

In [None]:
nba.shape

In [None]:
# to drop column wise
nba.dropna(axis = 1)

In [None]:
# drop null values in specific columns
nba.dropna(subset=["Salary"])

## Fill null values with .fillna() method

In [None]:
nba = pd.read_csv("data/nba.csv")
nba.head()

In [None]:
nba["Salary"].fillna(0, inplace=True)
nba.head()

In [None]:
nba["College"].fillna("No College", inplace=True)
nba.head()

## .astype() method

In [None]:
# converts data type of series, it won't work for null values

nba = pd.read_csv("data/nba.csv").dropna(how='all')
nba["Salary"].fillna(0, inplace=True)
nba["College"].fillna("No College", inplace=True)
nba.head(6)

In [None]:
nba.info()

In [None]:
nba["Salary"].astype("int")

# but astype doesnot have inplace
nba["Salary"] = nba["Salary"].astype("int")
nba["Age"] = nba["Age"].astype("int")
nba.info()

In [None]:
# categorising series as per unique values
nba["Position"] = nba["Position"].astype("category")
nba.info()

In [None]:
nba["Team"].nunique()

In [None]:
nba["Team"] = nba["Team"].astype("category")
nba.info()

## .sort_values() method

In [None]:
nba = pd.read_csv("data/nba.csv")
nba.head()

In [None]:
nba.sort_values("Name", ascending=False)

In [None]:
# sorting DataFrame on multiple columns
nba.sort_values(["Team", "Name"]).head(5)

In [None]:
# we can pass boolean values for ascending for each column
nba.sort_values(["Team", "Name"], ascending=[True, False]).head(10)

## sorting DataFrame with .sort_index()

In [None]:
nba = pd.read_csv("data/nba.csv")
nba.head()

In [None]:
nba.sort_index(ascending=False).head(3)

## Rank values with .rank() Method

In [None]:
nba = pd.read_csv("data/nba.csv").dropna(how='all')
nba["Salary"] = nba["Salary"].fillna(0).astype("int")
nba.head(3)

In [None]:
nba["Salary Rank"] = nba["Salary"].rank(ascending=False).astype("int")
nba.head()

In [None]:
nba.sort_values("Salary", ascending=False)