In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib notebook

In [3]:
df = pd.DataFrame({'key1': ['a','b','b','a','a'],
                   'key2': ['one', 'two', 'one', 'two', 'two'],
                  'data1': np.random.randn(5),
                  'data2': np.random.randn(5)})

In [4]:
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,1.073478,0.224846
1,b,two,-0.498127,1.439343
2,b,one,-0.447999,-0.405143
3,a,two,-1.056031,-1.181328
4,a,two,1.055335,1.468757


In [6]:
df['data1'].groupby(df['key1']).mean()

key1
a    0.357594
b   -0.473063
Name: data1, dtype: float64

In [7]:
df.groupby(df['key1']).mean()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.357594,0.170758
b,-0.473063,0.5171


In [8]:
df.groupby(['key1', 'key2']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,1.073478,0.224846
a,two,1.055335,1.468757
b,one,-0.447999,-0.405143
b,two,-0.498127,1.439343


In [9]:
df.groupby(['key1', 'key2']).max().unstack()

Unnamed: 0_level_0,data1,data1,data2,data2
key2,one,two,one,two
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1.073478,1.055335,0.224846,1.468757
b,-0.447999,-0.498127,-0.405143,1.439343


In [10]:
for key, group in df.groupby('key1'):
    print(key, group)
    print('-' * 50)

a   key1 key2     data1     data2
0    a  one  1.073478  0.224846
3    a  two -1.056031 -1.181328
4    a  two  1.055335  1.468757
--------------------------------------------------
b   key1 key2     data1     data2
1    b  two -0.498127  1.439343
2    b  one -0.447999 -0.405143
--------------------------------------------------


In [13]:
for (key1, key2), group in df.groupby(['key1', 'key2']):
    print(key1, key2, group)
    print('-' * 50)

a one   key1 key2     data1     data2
0    a  one  1.073478  0.224846
--------------------------------------------------
a two   key1 key2     data1     data2
3    a  two -1.056031 -1.181328
4    a  two  1.055335  1.468757
--------------------------------------------------
b one   key1 key2     data1     data2
2    b  one -0.447999 -0.405143
--------------------------------------------------
b two   key1 key2     data1     data2
1    b  two -0.498127  1.439343
--------------------------------------------------


In [15]:
list(df.groupby('key1'))

[('a',   key1 key2     data1     data2
  0    a  one  1.073478  0.224846
  3    a  two -1.056031 -1.181328
  4    a  two  1.055335  1.468757), ('b',   key1 key2     data1     data2
  1    b  two -0.498127  1.439343
  2    b  one -0.447999 -0.405143)]

In [16]:
dict(list(df.groupby('key1')))

{'a':   key1 key2     data1     data2
 0    a  one  1.073478  0.224846
 3    a  two -1.056031 -1.181328
 4    a  two  1.055335  1.468757, 'b':   key1 key2     data1     data2
 1    b  two -0.498127  1.439343
 2    b  one -0.447999 -0.405143}

In [17]:
data_grouped = df.groupby('key1')

In [19]:
data_grouped['data1'].quantile(.1)

key1
a   -0.633758
b   -0.493115
Name: data1, dtype: float64

In [20]:
def range_min_max(a):
    return a.max() - a.min()

data_grouped.agg(range_min_max)

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,2.129509,2.650085
b,0.050128,1.844486


In [23]:
def weighted_mean(a):
    weights = np.array([.5, .4, .1])
    b = a * weights[:a.size]
    return b.sum()

In [24]:
df.groupby('key1').agg(weighted_mean)

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.21986,-0.213233
b,-0.428263,0.557614


In [27]:
df.groupby(['key1','key2'], as_index=False).mean()

Unnamed: 0,key1,key2,data1,data2
0,a,one,1.073478,0.224846
1,a,two,-0.000348,0.143714
2,b,one,-0.447999,-0.405143
3,b,two,-0.498127,1.439343


In [28]:
from datetime import datetime

In [30]:
now = datetime.now()

In [31]:
now.hour

18

In [32]:
from datetime import timedelta

In [33]:
start = datetime(2019, 1, 11)

In [34]:
start + timedelta(12)

datetime.datetime(2019, 1, 23, 0, 0)

In [36]:
start.strftime('%Y-%m-%d')

'2019-01-11'

In [37]:
import dateutil

In [38]:
dateutil.parser.parse('2019-1-11')

datetime.datetime(2019, 1, 11, 0, 0)

In [39]:
dateutil.parser.parse('11/1/2019', dayfirst=True)

datetime.datetime(2019, 1, 11, 0, 0)

In [42]:
datetime.strptime('Sep 2019/01', '%b %Y/%d')

datetime.datetime(2019, 9, 1, 0, 0)

In [41]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7), datetime(2011, 1, 8),
        datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -0.236447
2011-01-05   -1.985092
2011-01-07   -0.199936
2011-01-08    1.554183
2011-01-10    1.884657
2011-01-12   -0.925958
dtype: float64

In [43]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [44]:
ts[::2]

2011-01-02   -0.236447
2011-01-07   -0.199936
2011-01-10    1.884657
dtype: float64

In [45]:
ts + ts[::2]

2011-01-02   -0.472894
2011-01-05         NaN
2011-01-07   -0.399873
2011-01-08         NaN
2011-01-10    3.769314
2011-01-12         NaN
dtype: float64

In [46]:
ts['2011-1-10']

1.8846567942124686

In [47]:
ts['20110110']

1.8846567942124686

In [48]:
ts['20110110':]

2011-01-10    1.884657
2011-01-12   -0.925958
dtype: float64

In [49]:
ventas = [1, 2, 3, 2, 1, 1, 2]

In [55]:
ts_ventas = pd.Series(ventas, index=pd.date_range('2019-1-1', periods=len(ventas)))

In [56]:
ts_ventas

2019-01-01    1
2019-01-02    2
2019-01-03    3
2019-01-04    2
2019-01-05    1
2019-01-06    1
2019-01-07    2
Freq: D, dtype: int64

In [57]:
ts_ventas['2019-1-1':'2019-1-5'].mean()

1.8

In [58]:
ts_ventas.index[0].weekday()

1

In [59]:
df_ventas = pd.DataFrame({'ventas': ts_ventas})

In [60]:
df_ventas

Unnamed: 0,ventas
2019-01-01,1
2019-01-02,2
2019-01-03,3
2019-01-04,2
2019-01-05,1
2019-01-06,1
2019-01-07,2


In [61]:
df_ventas['workday'] = df_ventas.index.weekday < 5

In [62]:
df_ventas

Unnamed: 0,ventas,workday
2019-01-01,1,True
2019-01-02,2,True
2019-01-03,3,True
2019-01-04,2,True
2019-01-05,1,False
2019-01-06,1,False
2019-01-07,2,True


In [64]:
df_ventas[['workday']].applymap(lambda x: 'Laborable' if x else 'Finde')

Unnamed: 0,workday
2019-01-01,Laborable
2019-01-02,Laborable
2019-01-03,Laborable
2019-01-04,Laborable
2019-01-05,Finde
2019-01-06,Finde
2019-01-07,Laborable


In [66]:
pd.date_range(end='2012-04-1', periods=20)

DatetimeIndex(['2012-03-13', '2012-03-14', '2012-03-15', '2012-03-16',
               '2012-03-17', '2012-03-18', '2012-03-19', '2012-03-20',
               '2012-03-21', '2012-03-22', '2012-03-23', '2012-03-24',
               '2012-03-25', '2012-03-26', '2012-03-27', '2012-03-28',
               '2012-03-29', '2012-03-30', '2012-03-31', '2012-04-01'],
              dtype='datetime64[ns]', freq='D')

In [67]:
pd.date_range(end='2012-04-1', periods=20, freq='H')

DatetimeIndex(['2012-03-31 05:00:00', '2012-03-31 06:00:00',
               '2012-03-31 07:00:00', '2012-03-31 08:00:00',
               '2012-03-31 09:00:00', '2012-03-31 10:00:00',
               '2012-03-31 11:00:00', '2012-03-31 12:00:00',
               '2012-03-31 13:00:00', '2012-03-31 14:00:00',
               '2012-03-31 15:00:00', '2012-03-31 16:00:00',
               '2012-03-31 17:00:00', '2012-03-31 18:00:00',
               '2012-03-31 19:00:00', '2012-03-31 20:00:00',
               '2012-03-31 21:00:00', '2012-03-31 22:00:00',
               '2012-03-31 23:00:00', '2012-04-01 00:00:00'],
              dtype='datetime64[ns]', freq='H')

In [68]:
pd.date_range(end='2012-04-1', periods=20, freq='1h30min')

DatetimeIndex(['2012-03-30 19:30:00', '2012-03-30 21:00:00',
               '2012-03-30 22:30:00', '2012-03-31 00:00:00',
               '2012-03-31 01:30:00', '2012-03-31 03:00:00',
               '2012-03-31 04:30:00', '2012-03-31 06:00:00',
               '2012-03-31 07:30:00', '2012-03-31 09:00:00',
               '2012-03-31 10:30:00', '2012-03-31 12:00:00',
               '2012-03-31 13:30:00', '2012-03-31 15:00:00',
               '2012-03-31 16:30:00', '2012-03-31 18:00:00',
               '2012-03-31 19:30:00', '2012-03-31 21:00:00',
               '2012-03-31 22:30:00', '2012-04-01 00:00:00'],
              dtype='datetime64[ns]', freq='90T')

In [69]:
import pytz
pytz.common_timezones

['Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'Amer

In [70]:
ts.resample('2d').sum()

2019-01-01    3
2019-01-03    5
2019-01-05    2
2019-01-07    2
Freq: 2D, dtype: int64