## Pandas Library

###### *Inspired by a Pandas cheat sheet from dataquest.io*
---

### Import pandas

In [None]:
import pandas as pd

### Generate Dataframes

In [None]:
df = pd.read_csv('ibm_dataset.csv')

### Viewing your dataframe

In [None]:
df.head() # show the first 5 rows

In [None]:
df.tail() # show the last 5 rows

In [None]:
df.shape # show the number of rows and columns

In [None]:
df.info() # show datatypes, non-null value counts, and memory usage

In [None]:
df['make'].value_counts() # show the number of times a value occurs in a column

In [None]:
df.columns # list column names

### Selection

In [None]:
df['make'] # select a specific column with df['column']

In [None]:
df[['drive-wheels', 'body-style', 'make']] # select multiple columns

In [None]:
df.iloc[2] # view the 2nd row

In [None]:
df.loc[2] # view the record with an index of 2

### Add/Drop Columns:

In [None]:
df['test_column'] = 47 # create test_column, set all values to 47

In [None]:
df = df.drop(columns=['test_column']) # drop test_column

### Null Values

#### Columns:

In [None]:
df.dropna(axis=1) # Drop columns that contain any null values

In [None]:
df.dropna(axis=1, how='all') # drop columns that contain only null values

#### Rows:

In [None]:
df.dropna() # Drop rows that contain any null values

In [None]:
df.dropna(how='all') # drop rows that contain only null values

In [None]:
df.isnull() # returns true for null values, fasle for not null. useful for boolean masking.

In [None]:
df.notnull() # returns true for non-null values, false for null values. useful for boolean masking.

In [None]:
df.fillna('returning to baker street') # replaces nan values with a value of your choosing

### Rename & Replace

In [None]:
df.columns.values[0:3] = ['Unnamed: 0', 'symboling', 'normalized-losses'] # change the first three column names

In [None]:
df.rename({'symboling': 'butterscotch', 'normalized': 'summer'}, axis=1) # Rename specific columns

In [None]:
df.set_index(df['make']) # use a list or series to update your index

In [None]:
df.reset_index() # convert the current index into a column,and generate a fresh index

In [None]:
df['num-of-doors'].replace('two', 2) # find and replace values

In [None]:
df['symboling'].astype(str) # convert column to specified type

### Filter & Sort

In [None]:
df[df['make']=='volkswagon'] # rows where make equals volkswagon

In [None]:
df[(df['make']=='audi') & (df['horsepower']>110)] # rows where make equals audi AND horsepower is greater than 100

In [None]:
df[(df['body-style']=='hatchback') | (df['body-style']=='wagon')] # rows where body-style is hatcback OR sedan

In [None]:
df.sort_values(by=['city-mpg'], ascending=False) # sort city-mpg from largest to smallest

### Statistics:

In [None]:
df.mean() # mean average of each column

In [None]:
df.median() # median value of each column

In [None]:
df.count() # count of non-null values in each column

In [None]:
df.max() # maximum value of each column

In [None]:
df.min() # minimum value of each column

In [None]:
df.std() # standard deviation of each column

In [None]:
df.describe() # statistics for numerical columns

In [None]:
df.corr() # returns a grid of pearson coefficients