# Using Pandas

Pandas is powerful and easy-to-use library for data analysis. Is has two main object to represents data: 

- Series
- DataFrame


# Import libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
%matplotlib inline

# Working with Series

Series is an array like object.

In [None]:
x = pd.Series([1,2,3,4,5])
x

Notice that generated index for your item

## Basic Operation

In [None]:
x + 100


In [None]:
(x ** 2) + 100

In [None]:
x > 2

## `any()` and `all()`

In [None]:
larger_than_2 = x > 2
larger_than_2

In [None]:
larger_than_2.any()

In [None]:
larger_than_2.all()

## `apply()`

In [None]:
def f(x):
    if x % 2 == 0:
        return x * 2
    else:
        return x * 3

x.apply(f)

**Avoid looping over your data**

This is a `%%timeit` results from `apply()` and a for loop.

In [None]:
%%timeit
ds = pd.Series(range(10000))

for counter in range(len(ds)):
    ds[counter] = f(ds[counter])

In [None]:
%%timeit

ds = pd.Series(range(10000))

ds = ds.apply(f)

## `copy()`

In [None]:
y = x

In [None]:
y[0]

In [None]:
y[0] = 100

In [None]:
y

In [None]:
x

# DataFrame

In [None]:
data = [1,2,3,4,5,6,7,8,9]
df = pd.DataFrame(data, columns=["x"])

In [None]:
df

## Selecting Data

In [None]:
df["x"]

In [None]:
df["x"][0]

## Adding extra columns

In [None]:
df["x_plus_2"] = df["x"] + 2
df

In [None]:
df["x_square"] = df["x"] ** 2
df["x_factorial"] = df["x"].apply(np.math.factorial)
df

In [None]:
df["is_even"] = df["x"] % 2
df

### `map()`

In [None]:
df["odd_even"] = df["is_even"].map({1:"odd", 0:"even"})
df

### `drop()`

In [None]:
df = df.drop("is_even", 1)
df

## Multi Column Select

In [None]:
df[["x", "odd_even"]]

## Controlling display options

In [None]:
pd.options.display.max_columns= 60
pd.options.display.max_rows= 6
pd.options.display.notebook_repr_html = False
df

## Filtering

In [None]:
df[df["odd_even"] == "odd"]

In [None]:
df[df.odd_even == "even"]

### Chaining Filters

#### `|` OR

In [None]:
df[(df.odd_even == "even") | (df.x_square < 20)]

#### `&` AND

In [None]:
df[(df.odd_even == "even") & (df.x_square < 20)]

# `scatter_matrix()`

In [None]:
pd.scatter_matrix(df,diagonal="kde",figsize=(10,10));

In [None]:
df.describe()

# Reading Data from CSV/TSV Files

In [3]:
#url = "http://www.google.com/finance/historical?q=TADAWUL:TASI&output=csv"
#stocks_data = pd.read_csv(url)

stocks_data = pd.read_csv('./Datasets/stocks.csv')

In [4]:
stocks_data

Unnamed: 0,﻿Date,Open,High,Low,Close,Volume
0,21-Jul-16,6630.61,6648.10,6597.88,6601.00,183874848
1,20-Jul-16,6646.86,6655.54,6622.36,6630.61,166216847
2,19-Jul-16,6681.97,6686.63,6635.99,6646.86,180509004
3,18-Jul-16,6680.79,6702.99,6658.04,6681.97,206421139
4,17-Jul-16,6661.65,6698.13,6661.65,6680.79,173648935
5,14-Jul-16,6691.23,6698.88,6653.64,6661.65,259311234
6,13-Jul-16,6625.78,6694.05,6624.03,6691.23,308953769
7,12-Jul-16,6570.34,6627.04,6570.34,6625.78,252310156
8,11-Jul-16,6555.87,6584.70,6546.41,6570.34,241224883
9,10-Jul-16,6499.88,6556.31,6494.47,6555.87,242408432


In [None]:
stocks_data["change_amount"] = stocks_data["Close"] - stocks_data["Open"]
stocks_data["change_percentage"] = stocks_data["change_amount"] / stocks_data["Close"]
stocks_data