# LEVEL 1
- 1] Select: one column, many columns, row slice, row by index.  
- 2] Filter: single condition, two conditions, OR, NOT.  
- 3] Create & reuse boolean masks.  
- 4] Drop columns and rows.

In [196]:
import pandas as pd
df =  pd.read_csv("nba.csv")

- 1] Select: one column, many columns, row slice, row by index.  

In [197]:
# columns
df["Team"]                              # one column
df[["Team", "Height", "Weight"]]        # many column
df.filter(items=["Name", "Team"])       # name-based
df.take([0,1,2,3,4], axis=1)            # by column positions

# rows
df[2:7]                                 # row slice by position
df.loc[25]                              # row by label
df.iloc[25]                             # row by position
df.loc[2:6, ["Name", "Team"]]           # row + column    
df.iloc[2:6, [0,1]]                     # row + column    
df.take([0,3])                          # rows bu position



Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0


- Filter: single condition, two conditions, OR, NOT.  

In [198]:
# Boolean condition
df[df["Weight"] > 180]

# Multiple conditions, OR, NOT
cond1 = df["Weight"] > 200
cond2 = df["Age"] < 20
df[cond1 | cond2]


# Types of filters
df.query("Age < 21 and Team == 'Boston Celtics'")   # query()
df[df["Team"].isin(["Boston Celtics"])]             # isin()
df[df["Age"].between(18,20)]                        # between()
df[df["Name"].isna()]                               # isna()
df[df["Name"].notna()]                              # notna()
df.dropna(inplace=True)
df[df["Name"].str.startswith("M")]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0
16,Markel Brown,Brooklyn Nets,22.0,SG,24.0,6-3,190.0,Oklahoma State,845059.0
88,Marreese Speights,Golden State Warriors,5.0,C,28.0,6-10,255.0,Florida,3815000.0
119,Metta World Peace,Los Angeles Lakers,37.0,SF,36.0,6-7,260.0,St. John's,947276.0
154,Mike Dunleavy,Chicago Bulls,34.0,SG,35.0,6-9,230.0,Duke,4500000.0
166,Matthew Dellavedova,Cleveland Cavaliers,8.0,PG,25.0,6-4,198.0,Saint Mary's,1147276.0
180,Mo Williams,Cleveland Cavaliers,52.0,PG,33.0,6-1,198.0,Alabama,2100000.0
194,Marcus Morris,Detroit Pistons,13.0,PF,26.0,6-9,235.0,Kansas,5000000.0
208,Myles Turner,Indiana Pacers,33.0,PF,20.0,6-11,243.0,Texas,2357760.0
213,Michael Carter-Williams,Milwaukee Bucks,5.0,PG,24.0,6-6,190.0,Syracuse,2399040.0


- Drop columns and rows

In [199]:
df1 = df
# columns
df1.drop(["Number", "Position"], axis=1, inplace=True)         # by column name
df1.drop("Height", axis=1, inplace=True)                       # single

# rows
df1.drop([1,3], inplace=True)
df1.drop(df.index[[69,68,67]], inplace=True)
df1 = df1[(df1["Age"] > 25) & (df1["Age"] < 35)]
df1["Age"].describe()

count    169.000000
mean      28.751479
std        2.321691
min       26.000000
25%       27.000000
50%       28.000000
75%       30.000000
max       34.000000
Name: Age, dtype: float64

In [200]:
df
chain2 = df["Age"] > 30
chain1 = df["Team"] == "Oklahoma City Thunder"
chain3 = df["Salary"] > 222888
chain4 = df["Weight"] < 250
chain5 = df["Age"] < 30 
df[chain1][chain2][chain3][chain4][chain5]

  df[chain1][chain2][chain3][chain4][chain5]
  df[chain1][chain2][chain3][chain4][chain5]
  df[chain1][chain2][chain3][chain4][chain5]
  df[chain1][chain2][chain3][chain4][chain5]


Unnamed: 0,Name,Team,Age,Weight,College,Salary


# Problems

In [201]:
# Extract age between 20 to 35
df[df["Age"].between(20,35)]

# Remove bottom 20 rows
df = df[:-20]
df
# ---- better way:
df.drop(df.tail(20).index, inplace=True)



# specific Columns
df.take([0,1,3,4,5], axis=1)

Unnamed: 0,Name,Team,Weight,College,Salary
0,Avery Bradley,Boston Celtics,180.0,Texas,7730337.0
6,Jordan Mickey,Boston Celtics,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,220.0,Oklahoma State,3431040.0
...,...,...,...,...,...
402,Zach LaVine,Minnesota Timberwolves,189.0,UCLA,2148360.0
403,Shabazz Muhammad,Minnesota Timberwolves,223.0,UCLA,2056920.0
404,Adreian Payne,Minnesota Timberwolves,237.0,Michigan State,1938840.0
406,Tayshaun Prince,Minnesota Timberwolves,212.0,Kentucky,947276.0


Unnamed: 0,Name,Team,Age,Weight,College,Salary
0,Avery Bradley,Boston Celtics,25.0,180.0,Texas,7730337.0
1,Jordan Mickey,Boston Celtics,21.0,235.0,LSU,1170960.0
2,Kelly Olynyk,Boston Celtics,25.0,238.0,Gonzaga,2165160.0
3,Terry Rozier,Boston Celtics,22.0,190.0,Louisville,1824360.0
4,Marcus Smart,Boston Celtics,22.0,220.0,Oklahoma State,3431040.0
...,...,...,...,...,...,...
314,Zach LaVine,Minnesota Timberwolves,21.0,189.0,UCLA,2148360.0
315,Shabazz Muhammad,Minnesota Timberwolves,23.0,223.0,UCLA,2056920.0
316,Adreian Payne,Minnesota Timberwolves,25.0,237.0,Michigan State,1938840.0
317,Tayshaun Prince,Minnesota Timberwolves,36.0,212.0,Kentucky,947276.0
