#### Syntax: Creating DataFrames
#### Specify values for each column

In [None]:
import pandas as pd
df = pd.DataFrame(
{"a" : [4 ,5, 6],
"b" : [7, 8, 9],
"c" : [10, 11, 12]},
index = [1, 2, 3])

#### Specify values for each row:

In [None]:
df = pd.DataFrame(
[[4, 7, 10],
[5, 8, 11],
[6, 9, 12]],
index=[1, 2, 3],
columns=['a', 'b', 'c'])

#### Create DataFrame with a MultiIndex:

In [None]:
df = pd.DataFrame(
{"a" : [4 ,5, 6],
"b" : [7, 8, 9],
"c" : [10, 11, 12]},
index = pd.MultiIndex.from_tuples(
[('d',1),('d',2),('e',2)],
names=['n','v']))

#### Reshaping Data: Change the layout of a data set:

In [None]:
#Gather columns into rows.
pd.melt(df)

In [None]:
#Append columns of DataFrames
#pd.concat([df1,df2], axis=1)

In [None]:
#Rename the columns of a DataFrame
df.rename(columns = {'y':'year'})

In [None]:
#Sort the index of a DataFrame
df.sort_index()

In [None]:
#Reset index of DataFrame to row numbers, moving index to columns.
df.reset_index()

#### Subset Observations (Rows)

In [None]:
#Extract rows that meet logical criteria
df[df.Length > 7]

In [None]:
#Remove duplicate rows (only considers columns).
df.drop_duplicates()

In [None]:
# Select first n rows
df.head(n)

In [None]:
# Select last n rows.
df.tail(n)

In [None]:
# Randomly select fraction of rows.
df.sample(frac=0.5) 

In [None]:
# Randomly select n rows.
df.sample(n=10)

In [None]:
# Select rows by position.
df.iloc[10:20]

#### Subset Variables (Columns)

In [None]:
# Select multiple columns with specific names.
df[['width','length','species']]

In [None]:
# Select single column with specific name.
df['width'] or df.width

#### Handling Missing Data

In [None]:
# Drop rows with any column having NA/null data.
df.dropna()

In [None]:
# Replace all NA/null data with value.
df.fillna(value)

#### Make New Columns

In [None]:
# Compute and append one or more new columns
df.assign(Area=lambda df: df.Length*df.Height)

In [None]:
# Add single column.
df['Volume'] = df.Length*df.Height*df.Depth

#### Combine Data Sets
#### Standard Joins

In [None]:
# Join matching rows from bdf to adf.
pd.merge(adf, bdf,
    how='left', on='x1')


In [None]:
# Join matching rows from adf to bdf.
pd.merge(adf, bdf,
    how='right', on='x1')

In [None]:
# Join data. Retain only rows in both sets.
pd.merge(adf, bdf,
how='inner', on='x1')

In [None]:
# Join data. Retain all values, all rows.
pd.merge(adf, bdf,
how='outer', on='x1'

#### Group Data

In [None]:
# Return a GroupBy object, grouped by values in column named "col".
df.groupby(by="col")

In [None]:
# Return a GroupBy object, grouped by values in index level named "ind".
df.groupby(level="ind")

All of the summary functions listed above can be applied to a group. Additional GroupBy functions:

In [None]:
# Size of each group.
size()

In [None]:
# Aggregate group using function.
agg(function)

#### Plotting

In [None]:
import matplotlib as plot
import pandas as pd
df = pd.DataFrame({'X':[78,85,96,80,86], 'Y':[84,94,89,83,86],'Z':[86,97,96,72,83]});
print(df)
df.plot.hist() # Histogram for each column

In [None]:
 # Scatter chart using pairs of points
import matplotlib.pyplot as plt
from pylab import randn
X = randn(200)
Y = randn(200)
plt.scatter(X,Y, color='r')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()