In [1]:
import numpy as np
import pandas as pd


In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.649093,1.334174,1.423481,1.220944
2013-01-02,0.250969,-0.336454,0.496243,-0.335951
2013-01-03,0.671674,0.903201,1.646538,-0.095725
2013-01-04,1.640086,0.435207,1.169858,-0.781295
2013-01-05,1.268859,-1.000114,-0.995906,0.007204
2013-01-06,-0.017515,-0.221595,2.138903,1.605365


In [5]:
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'}) 
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [7]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [8]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,1.649093,1.334174,1.423481,1.220944
2013-01-02,0.250969,-0.336454,0.496243,-0.335951
2013-01-03,0.671674,0.903201,1.646538,-0.095725
2013-01-04,1.640086,0.435207,1.169858,-0.781295
2013-01-05,1.268859,-1.000114,-0.995906,0.007204


In [9]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,1.640086,0.435207,1.169858,-0.781295
2013-01-05,1.268859,-1.000114,-0.995906,0.007204
2013-01-06,-0.017515,-0.221595,2.138903,1.605365


In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [12]:
df.to_numpy()

array([[ 1.64909253,  1.33417397,  1.42348091,  1.22094383],
       [ 0.25096891, -0.33645417,  0.49624311, -0.33595139],
       [ 0.6716744 ,  0.90320116,  1.64653794, -0.09572536],
       [ 1.64008625,  0.43520741,  1.1698577 , -0.78129512],
       [ 1.2688587 , -1.00011425, -0.99590623,  0.00720419],
       [-0.01751549, -0.221595  ,  2.13890267,  1.60536509]])

In [13]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [14]:
# create a dataframe with geo-numeric data
data = pd.DataFrame({'region': ['east', 'west', 'north', 'south'],
                     'gdp': [2000, 3245, 3421, 2099]})
data['gdp']

0    2000
1    3245
2    3421
3    2099
Name: gdp, dtype: int64

In [15]:
data.columns

Index(['region', 'gdp'], dtype='object')

In [16]:
data.iloc(0)

<pandas.core.indexing._iLocIndexer at 0x11af1c950>

In [18]:
for x in data.iloc[0]:
    print(x)

east
2000


In [19]:
data['gdp'] = 2 * data['gdp']

In [20]:
data['gdp']

0    4000
1    6490
2    6842
3    4198
Name: gdp, dtype: int64

In [21]:
# sorting on a Dataframe
data.sort_values(by='gdp', ascending=False)

Unnamed: 0,region,gdp
2,north,6842
1,west,6490
3,south,4198
0,east,4000


In [22]:
gdp = data.gdp.iloc[:]
gdp

0    4000
1    6490
2    6842
3    4198
Name: gdp, dtype: int64

In [23]:
np.average(gdp, axis=0)

5382.5