# Pandas Learning

In [1]:
import pandas as pd
import numpy as np

## Object Creation

#### Creating a Series by passing a list of values, letting pandas create a default integer index:

In [12]:
s = pd.Series([1, 3, 5, np.nan])
s

0    1.0
1    3.0
2    5.0
3    NaN
dtype: float64

#### Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns:

In [13]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.868306,0.864877,-0.816898,0.640774
2013-01-02,0.062956,0.114967,0.795878,-0.670062
2013-01-03,-0.575924,0.381107,-2.84547,-1.086379
2013-01-04,-1.665099,-0.211675,0.164674,0.254328
2013-01-05,0.062845,0.340384,1.264154,-0.995906
2013-01-06,-1.388159,-0.565981,-1.186834,-0.415051


#### Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [17]:
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [21]:
type(df2)

pandas.core.frame.DataFrame

In [22]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing data

In [24]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.868306,0.864877,-0.816898,0.640774
2013-01-02,0.062956,0.114967,0.795878,-0.670062
2013-01-03,-0.575924,0.381107,-2.84547,-1.086379
2013-01-04,-1.665099,-0.211675,0.164674,0.254328
2013-01-05,0.062845,0.340384,1.264154,-0.995906


In [25]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [26]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [32]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.439179,0.153946,-0.437416,-0.378716
std,0.962894,0.499054,1.502213,0.693977
min,-1.665099,-0.565981,-2.84547,-1.086379
25%,-1.1851,-0.130015,-1.09435,-0.914445
50%,-0.25654,0.227675,-0.326112,-0.542557
75%,0.062928,0.370927,0.638077,0.086984
max,0.868306,0.864877,1.264154,0.640774
