In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
x = pd.Series([1,2,3,4,5,6,7,8,9,0])
x

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
9    0
dtype: int64

In [3]:
x + 100

0    101
1    102
2    103
3    104
4    105
5    106
6    107
7    108
8    109
9    100
dtype: int64

In [4]:
(x ** 2) + 100

0    101
1    104
2    109
3    116
4    125
5    136
6    149
7    164
8    181
9    100
dtype: int64

In [5]:
x > 2

0    False
1    False
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9    False
dtype: bool

# `any()` and `all()`

In [6]:
larger_than_2 = x > 2
larger_than_2

0    False
1    False
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9    False
dtype: bool

In [7]:
larger_than_2.any()

True

In [8]:
larger_than_2.all()

False

## `apply()`

In [9]:
def f(x):
    if x % 2 == 0:
        return x * 2
    else:
        return x * 3

x.apply(f)

0     3
1     4
2     9
3     8
4    15
5    12
6    21
7    16
8    27
9     0
dtype: int64

**Avoid looping over your data**

This is a `%%timeit` results from `apply()` and a for loop.

In [10]:
%%timeit

ds = pd.Series(range(10000))

for counter in range(len(ds)):
    ds[counter] = f(ds[counter])

1 loop, best of 3: 731 ms per loop


In [11]:
%%timeit

ds = pd.Series(range(10000))

ds = ds.apply(f)

10 loops, best of 3: 24.9 ms per loop


## `astype()`

In [12]:
x.astype(np.float64)

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
7    8.0
8    9.0
9    0.0
dtype: float64

## `copy()`

In [13]:
y = x

In [14]:
y[0]

1

In [15]:
y[5]

6

In [16]:
y[0] = 100

In [17]:
y

0    100
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9      0
dtype: int64

In [18]:
x

0    100
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9      0
dtype: int64

**Avoid using copy (is you can) to save memory**

In [19]:
y = x.copy()

In [20]:
x[0]=1

In [21]:
x

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
9    0
dtype: int64

In [22]:
y

0    100
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9      0
dtype: int64

In [23]:
x.describe()

count    10.00000
mean      4.50000
std       3.02765
min       0.00000
25%       2.25000
50%       4.50000
75%       6.75000
max       9.00000
dtype: float64

# DataFrame
#### pd.DataFrame(self, data=None, index=None, columns=None, dtype=None, copy=False)

In [24]:
data = [1,2,3,4,5,6,7,8,9]
df = pd.DataFrame(data, columns=["x"])

In [25]:
df

Unnamed: 0,x
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8
8,9


## Selecting Data

In [26]:
df["x"]

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
Name: x, dtype: int64

In [27]:
df["x"][0]

1

## Adding extra columns

In [28]:
df["x_plus_2"] = df["x"] + 2
df

Unnamed: 0,x,x_plus_2
0,1,3
1,2,4
2,3,5
3,4,6
4,5,7
5,6,8
6,7,9
7,8,10
8,9,11


In [29]:
df["x_square"] = df["x"] ** 2
df["x_factorial"] = df["x"].apply(np.math.factorial)
df

Unnamed: 0,x,x_plus_2,x_square,x_factorial
0,1,3,1,1
1,2,4,4,2
2,3,5,9,6
3,4,6,16,24
4,5,7,25,120
5,6,8,36,720
6,7,9,49,5040
7,8,10,64,40320
8,9,11,81,362880


In [30]:
df["is_even"] = df["x"] % 2
df

Unnamed: 0,x,x_plus_2,x_square,x_factorial,is_even
0,1,3,1,1,1
1,2,4,4,2,0
2,3,5,9,6,1
3,4,6,16,24,0
4,5,7,25,120,1
5,6,8,36,720,0
6,7,9,49,5040,1
7,8,10,64,40320,0
8,9,11,81,362880,1


### `map()`

In [31]:
df["odd_even"] = df["is_even"].map({1:"odd", 0:"even"})
df

Unnamed: 0,x,x_plus_2,x_square,x_factorial,is_even,odd_even
0,1,3,1,1,1,odd
1,2,4,4,2,0,even
2,3,5,9,6,1,odd
3,4,6,16,24,0,even
4,5,7,25,120,1,odd
5,6,8,36,720,0,even
6,7,9,49,5040,1,odd
7,8,10,64,40320,0,even
8,9,11,81,362880,1,odd


### `drop()`

In [32]:
df = df.drop("is_even", 1)
df

Unnamed: 0,x,x_plus_2,x_square,x_factorial,odd_even
0,1,3,1,1,odd
1,2,4,4,2,even
2,3,5,9,6,odd
3,4,6,16,24,even
4,5,7,25,120,odd
5,6,8,36,720,even
6,7,9,49,5040,odd
7,8,10,64,40320,even
8,9,11,81,362880,odd


## Multi Column Select

In [33]:
df[["x", "odd_even"]]

Unnamed: 0,x,odd_even
0,1,odd
1,2,even
2,3,odd
3,4,even
4,5,odd
5,6,even
6,7,odd
7,8,even
8,9,odd


## Controlling display options

In [34]:
pd.options.display.max_columns= 60
pd.options.display.max_rows= 6
pd.options.display.notebook_repr_html = False
df

    x  x_plus_2  x_square  x_factorial odd_even
0   1         3         1            1      odd
1   2         4         4            2     even
2   3         5         9            6      odd
.. ..       ...       ...          ...      ...
6   7         9        49         5040      odd
7   8        10        64        40320     even
8   9        11        81       362880      odd

[9 rows x 5 columns]

## Filtering

In [35]:
df[df["odd_even"] == "odd"]

   x  x_plus_2  x_square  x_factorial odd_even
0  1         3         1            1      odd
2  3         5         9            6      odd
4  5         7        25          120      odd
6  7         9        49         5040      odd
8  9        11        81       362880      odd

In [36]:
df[df.odd_even == "even"]

   x  x_plus_2  x_square  x_factorial odd_even
1  2         4         4            2     even
3  4         6        16           24     even
5  6         8        36          720     even
7  8        10        64        40320     even

### Chaining Filters

#### `|` OR

In [37]:
df[(df.odd_even == "even") | (df.x_square < 20)]

   x  x_plus_2  x_square  x_factorial odd_even
0  1         3         1            1      odd
1  2         4         4            2     even
2  3         5         9            6      odd
3  4         6        16           24     even
5  6         8        36          720     even
7  8        10        64        40320     even

#### `&` AND

In [38]:
df[(df.odd_even == "even") & (df.x_square < 20)]

   x  x_plus_2  x_square  x_factorial odd_even
1  2         4         4            2     even
3  4         6        16           24     even

### Furter Chaining

In [39]:
df[(df.odd_even == "even") & (df.x_square < 20)]["x_plus_2"][:1]

1    4
Name: x_plus_2, dtype: int64

# `scatter_matrix()`

In [40]:
pd.scatter_matrix(df, diagonal="kde", figsize=(10,10));

In [41]:
df.describe()

              x   x_plus_2   x_square    x_factorial
count  9.000000   9.000000   9.000000       9.000000
mean   5.000000   7.000000  31.666667   45457.000000
std    2.738613   2.738613  28.080242  119758.341137
...         ...        ...        ...            ...
50%    5.000000   7.000000  25.000000     120.000000
75%    7.000000   9.000000  49.000000    5040.000000
max    9.000000  11.000000  81.000000  362880.000000

[8 rows x 4 columns]

# Reading Data from CSV/TSV Files

In [42]:
url = "http://www.google.com/finance/historical?q=TADAWUL:TASI&output=csv"
stocks_data = pd.read_csv(url)

In [43]:
stocks_data

         ﻿Date     Open     High      Low    Close     Volume
0    27-Mar-17  6874.12  6874.92  6838.23  6852.13   97311482
1    26-Mar-17  6878.68  6913.28  6866.14  6874.12  106139670
2    23-Mar-17  6832.30  6892.63  6832.30  6878.68  131270868
..         ...      ...      ...      ...      ...        ...
246  31-Mar-16  6215.65  6232.93  6153.55  6223.13  185368502
247  30-Mar-16  6176.84  6222.97  6163.99  6215.65  187742803
248  29-Mar-16  6268.71  6268.35  6141.98  6176.84  204334935

[249 rows x 6 columns]

In [44]:
stocks_data["change_amount"] = stocks_data["Close"] - stocks_data["Open"]
stocks_data["change_percentage"] = stocks_data["change_amount"] / stocks_data["Close"]
stocks_data

         ﻿Date     Open     High      Low    Close     Volume  change_amount  \
0    27-Mar-17  6874.12  6874.92  6838.23  6852.13   97311482         -21.99   
1    26-Mar-17  6878.68  6913.28  6866.14  6874.12  106139670          -4.56   
2    23-Mar-17  6832.30  6892.63  6832.30  6878.68  131270868          46.38   
..         ...      ...      ...      ...      ...        ...            ...   
246  31-Mar-16  6215.65  6232.93  6153.55  6223.13  185368502           7.48   
247  30-Mar-16  6176.84  6222.97  6163.99  6215.65  187742803          38.81   
248  29-Mar-16  6268.71  6268.35  6141.98  6176.84  204334935         -91.87   

     change_percentage  
0            -0.003209  
1            -0.000663  
2             0.006743  
..                 ...  
246           0.001202  
247           0.006244  
248          -0.014873  

[249 rows x 8 columns]