In [1]:
import numpy as np
import pandas as pd


In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.286647,0.626342,-0.100411,0.648326
2013-01-02,0.963287,0.131503,-1.720733,0.882626
2013-01-03,0.859876,0.247565,-1.611934,-0.37436
2013-01-04,-0.068866,-0.093035,-1.019193,-0.115774
2013-01-05,-0.124179,-0.952984,-0.756521,0.23356
2013-01-06,-0.442669,2.101494,0.502043,1.843593


In [5]:
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'}) 
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [7]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [8]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-2.286647,0.626342,-0.100411,0.648326
2013-01-02,0.963287,0.131503,-1.720733,0.882626
2013-01-03,0.859876,0.247565,-1.611934,-0.37436
2013-01-04,-0.068866,-0.093035,-1.019193,-0.115774
2013-01-05,-0.124179,-0.952984,-0.756521,0.23356


In [9]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.068866,-0.093035,-1.019193,-0.115774
2013-01-05,-0.124179,-0.952984,-0.756521,0.23356
2013-01-06,-0.442669,2.101494,0.502043,1.843593


In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [12]:
df.to_numpy()

array([[-2.28664674,  0.62634211, -0.10041056,  0.64832646],
       [ 0.96328687,  0.13150278, -1.72073278,  0.88262615],
       [ 0.85987581,  0.24756453, -1.61193378, -0.37435998],
       [-0.06886595, -0.09303496, -1.01919328, -0.11577375],
       [-0.12417879, -0.95298441, -0.75652138,  0.23356031],
       [-0.44266925,  2.10149446,  0.50204257,  1.84359301]])

In [13]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [14]:
# create a dataframe with geo-numeric data
data = pd.DataFrame({'region': ['east', 'west', 'north', 'south'],
                     'gdp': [2000, 3245, 3421, 2099]})
data['gdp']

0    2000
1    3245
2    3421
3    2099
Name: gdp, dtype: int64

In [15]:
data.columns

Index(['region', 'gdp'], dtype='object')

In [16]:
data.iloc(0)

<pandas.core.indexing._iLocIndexer at 0x114c745e0>

In [17]:
for x in data.iloc[0]:
    print(x)

east
2000


In [18]:
data['gdp'] = 2 * data['gdp']

In [19]:
data['gdp']

0    4000
1    6490
2    6842
3    4198
Name: gdp, dtype: int64

In [20]:
# sorting on a Dataframe
data.sort_values(by='gdp', ascending=False)

Unnamed: 0,region,gdp
2,north,6842
1,west,6490
3,south,4198
0,east,4000


In [21]:
gdp = data.gdp.iloc[:]
gdp

0    4000
1    6490
2    6842
3    4198
Name: gdp, dtype: int64

In [23]:
np.average(gdp, axis=0)

5382.5