In [1]:
import pandas as pd
import numpy as np

We saw that if we didn't add any indices (key but not unique), it automatically became np.range(n). Let's initialize a dataframe and see what index does for us.  

In [7]:
area_dict = {'Copenhagen': np.array([423967, 540000]), 'Oslo': np.array([454552, 2342341])}
df = pd.DataFrame(area_dict,index=['2012','2013'])
print(df)

      Copenhagen     Oslo
2012      423967   454552
2013      540000  2342341


In [8]:
print(df.index)

Index(['2012', '2013'], dtype='object')


Let's try to change the index 2012 to 2011.

In [9]:
df.index[0] = 2011

TypeError: Index does not support mutable operations

This make it "safe" to share indices between multiple Datafram i.e df2 = pd.Dataframe(np.array([[123,23],[123,15]],index=df.index))

In [11]:
N=20

df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
})

print(df)

df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])
print(df)
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])

print(df_reindexed)
print(df)

            A     x         y       C           D
0  2016-01-01   0.0  0.353108  Medium  102.330961
1  2016-01-02   1.0  0.476267     Low   98.144807
2  2016-01-03   2.0  0.934286     Low   99.963559
3  2016-01-04   3.0  0.229305     Low   96.339177
4  2016-01-05   4.0  0.428432     Low   74.617710
5  2016-01-06   5.0  0.704079     Low   91.886425
6  2016-01-07   6.0  0.663186    High  105.101330
7  2016-01-08   7.0  0.671174    High  117.767696
8  2016-01-09   8.0  0.410049    High  108.324860
9  2016-01-10   9.0  0.327010    High   95.709702
10 2016-01-11  10.0  0.453116  Medium  109.682390
11 2016-01-12  11.0  0.980820     Low  114.095773
12 2016-01-13  12.0  0.557528    High   95.541339
13 2016-01-14  13.0  0.525147  Medium  106.705714
14 2016-01-15  14.0  0.235280  Medium  103.221339
15 2016-01-16  15.0  0.738980    High   85.608438
16 2016-01-17  16.0  0.514501     Low  107.532875
17 2016-01-18  17.0  0.307569     Low  118.031047
18 2016-01-19  18.0  0.486414     Low   89.081890


Usually in dataframe time is a natural index, i.e timeserie. Let's see if we can use the index system with a datetime-object 

In [12]:
from datetime import datetime, date, time

In [13]:
year = 2015
month = 1
day = 20
hour = 7
minute = 28
second = 15

dt = datetime(year, month, day, hour, minute, second)

In [17]:
dt.hour, dt.minute, dt.second

(7, 28, 15)

In [16]:
dt.date()

datetime.date(2015, 1, 20)

Let's print it out in a string format.

In [18]:
dt.strftime('%m/%d/%Y %H:%M')

'01/20/2015 07:28'

We can also convert a string to a dateframe.

In [19]:
datetime.strptime('20150120', '%Y%m%d')

datetime.datetime(2015, 1, 20, 0, 0)

Datetime also has the ability to get current time

In [21]:
dt_now = datetime.now()
delta = dt_now - dt
print(delta)

2039 days, 7:48:32.504124


Another good thing is that pandas has some datetime function

In [23]:
pd.to_datetime('20150120', format='%Y%m%d', errors='ignore')

Timestamp('2015-01-20 00:00:00')

In [24]:
pd.date_range('2018-01-01', periods=3, freq='H')

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')

In [28]:
s = pd.Series(range(3), index=pd.date_range('2000', freq='D', periods=3))
print(s)

2000-01-01    0
2000-01-02    1
2000-01-03    2
Freq: D, dtype: int64


In [29]:
s.index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], dtype='datetime64[ns]', freq='D')

In [33]:
print(s['1/1/2000'])

print(s[datetime(2000,1,1)])

print(s['2000'])

0
0
2000-01-01    0
2000-01-02    1
2000-01-03    2
Freq: D, dtype: int64


We have thus seen pandas is essentially numpy but with index, so we can structure it with index and different dtype.  