# Pandas
Pandas is an open source providing high-performance, easy-to-use
data structures and data analysis tools for the Python programming
language.

## Features
- concept of DataFrame, which is an in memory object with integrated indexing.
- easy ways to import export data from other formats (CSV..)
- merging and joining of data sets
- fancy slicing, indexing subsetting of data
- ways to reshape data

In [1]:
import pandas as pd

In [None]:
three = [(0, 0, 0), (1, 2, 3), (2, 4, 6)]
pd.DataFrame(three)

In [None]:
mult_table = []
for i in range(10):
    mult_table.append([i * j for j in range(10)])

pd.DataFrame(mult_table)

In [None]:
def mult_table(size):
    mult_table = []
    for i in range(size):
        mult_table.append([i * j for j in range(size)])
    return pd.DataFrame(mult_table)

big_table = mult_table(1000)

In [None]:
big_table.describe()

In [None]:
import sqlite3
from os import path, remove
DB_NAME = "stocks.db"
if path.isfile(DB_NAME):
    remove(DB_NAME)

connection = sqlite3.connect(DB_NAME)

In [None]:
cursor = connection.cursor()

In [None]:
cursor.execute('''CREATE TABLE stocks (date text, trans text, symbol text, qty real, price real)''')

In [None]:
cursor.execute("INSERT INTO stocks VALUES ('2006-01-05','BUY','RHAT',100,35.14)")

In [None]:
stocks = pd.read_sql("SELECT * FROM stocks", connection)
stocks

In [None]:
stocks.dtypes

In [None]:
stocks.dtypes

In [None]:
%matplotlib inline

In [None]:
boring_plot = stocks.qty.plot(kind='bar')

![more data](https://s-media-cache-ak0.pinimg.com/736x/cf/93/eb/cf93ebaa36952a708f6e1851a8e26e9b.jpg)

In [None]:
stocks.dtypes

In [None]:
import random
# clear the table before populating
# cursor.execute('DELETE FROM stocks')
for n in range(1000):
    rand_date = '2015-{}-{}'.format(random.randrange(1, 12), random.randrange(1, 31))
    trans = random.choice(['BUY', 'SELL'])
    symbol = random.choice(['APL', 'RHAT', 'MSFT'])
    qty = random.randrange(1, 1000)
    price = random.random() * 100
    data = str((rand_date, trans, symbol, qty, price))
    cursor.execute('INSERT INTO stocks VALUES %s' % data)

In [None]:
new_stocks = pd.read_sql("SELECT * FROM stocks", connection)
new_stocks[:10]

In [None]:
# plot the apple stock options over time
apple_stocks = pd.DataFrame(data=new_stocks[new_stocks.symbol == 'APL'],
                            columns=['date', 'price'])

apple_stocks.sort('date').plot(x='date', y='price')

### And finally export to Excel

In [None]:
new_stocks.to_excel('output.xls')

In [None]:
!libreoffice output.xls