# Pandas

See: http://pandas.pydata.org/

*Author: Francesco Mosconi*

*Copyright &copy; 2017 CATALIT LLC*

In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

## 1. Read data

In [None]:
df = pd.read_csv('data/iris-2-classes.csv')

## 2. Quick look at data

In [None]:
type(df)

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.describe()

## 3. Indexing

In [None]:
# by index
df.loc[3]

In [None]:
# by row, column
df.loc[0:2,'sepal_length_cm']

In [None]:
# multiple columns
df[['sepal_length_cm', 'petal_length_cm']].head()

## 4. Selections

In [None]:
df[df['sepal_length_cm'] > 3]

In [None]:
df[(df['sepal_length_cm'] == 6.9) & (df['sepal_width_cm'] == 3.1)]

## 5. Unique values

In [None]:
df['iris_type'].unique()

In [None]:
df['iris_type'].value_counts()

## 6. Groupby and Pivot tables

In [None]:
df.groupby('iris_type')['sepal_width_cm'].mean()

In [None]:
df.groupby('iris_type').mean()

In [None]:
df.pivot_table(columns='iris_type',
               aggfunc='mean')

## 7. Maps and apply

In [None]:
label_dict = {'versicolor': 0, 'virginica': 1}

In [None]:
df['iris_type'].head().map(label_dict)

In [None]:
def label_func(s):
    if s == 'versicolor':
        return 0
    elif s == 'virginica':
        return 1
    else:
        return -1

In [None]:
df['iris_type'].head().apply(label_func)

## 8. Visual exploration

In [None]:
df.plot(title='Line plot')

In [None]:
df.plot(kind='hist',
        bins=20,
        title='Histogram',
        alpha=0.6)

In [None]:
import seaborn as sns

In [None]:
sns.pairplot(df, hue='iris_type')

## Exercise

- Add a new boolean column called `target` such that it's 0 if the record is `versicolor` and 1 if the record is `virginica`
- Create 2 new DataFrames `X` and `y` that contain the 4 features and the target respectively
- Save the `X` and `y` in `.csv` format without an index

- Visit: http://pandas.pydata.org/pandas-docs/stable/10min.html if you need help
