In [None]:
import pandas as pd

In [None]:
## DATAFRAME CREATION

In [None]:
# Create Dataframes from a dict of Series or from a dict of dicts
df_from_dict_of_series = pd.DataFrame({
     'one' : pd.Series([2., 1., 3.], index=['a', 'b', 'c']),
     'two' : pd.Series([7., 9., 6., 8.], index=['a', 'b', 'c', 'd'])
})
df_from_dict_of_series

In [None]:
# Create Dataframes from a list of dicts
list_of_dicts = [
    {'a': 1, 'b': 2},
    {'a': 1, 'b': 2, 'c': 3},
    {'a': 1}
]
df_from_dict_of_dicts = pd.DataFrame(list_of_dicts)
df_from_dict_of_dicts

In [None]:
# you can also create a Dataframe with a subselection of the overall columns...
df_one_column_only = pd.DataFrame(list_of_dicts, columns=['b'])
df_one_column_only

In [None]:
# ...or adding extra empty columns
df_extra_columns = pd.DataFrame(list_of_dicts, columns=['a', 'b', 'c', 'd', 'e'])
df_extra_columns

In [None]:
## DATAFRAME METADATA
df = df_from_dict_of_series

In [None]:
# Index and columns range objects (iterables)
df.index, df.columns 

In [None]:
# Dimensions: (rows, columns)
df.shape

In [None]:
## COLUMNS ALGEBRA

In [None]:
# You can add columns, eg. by operating on the existing ones...
df['three'] = df['one'] + 0.5 * df['two']
df

In [None]:
# ... or by filling them in with scalars
df['four'] = 3.14
df

In [None]:
# You can also delete columns...
del df['four']
df

In [None]:
# ...or have them popped out the Dataframe as Series objects
column_three = df.pop('three')
column_three

In [None]:
## QUICK PEEK

In [None]:
df.head(1)  # shows the first N rows

In [None]:
df.tail(1)  # shows the last N rows

In [None]:
## SELECTION AND SLICING

In [None]:
# Select a column (pass in a column name, returns a Series object)
df['two']

In [None]:
# Slice rows (pass in start/end indexes, returns a Dataframe object)
df[0:2]

In [None]:
# Extract rows and columns by specifying integer indexes
df.iloc[1:,1:]  # from second row on and from second column on

In [None]:
df.iloc[:, 0]  # all rows and only 1st column

In [None]:
df.iloc[0, :]  # only first row and all columns

In [None]:
# Columns can also be extracted by label
df.loc[:, "two"]

In [None]:
## USEFUL OPERATIONS

In [None]:
# Statistic summary of the data stored in the Dataframe
df.describe()

In [None]:
# Sorting by value
df.sort_values(by='two')

In [None]:
# Transpose rows and columns
df.T

In [None]:
# Calculate mean on columns...
df.mean()

In [None]:
#...or on a rows
df.mean(1)

In [None]:
# Identifying missing data (that is: NaN)
pd.isna(df)

In [None]:
# Fill missing data with a specific value
df.fillna(value=999)