In [1]:
import pandas as pd

# Group an ID by consecutive dates
Calculate the number of consecutive days for a given ID. If there is a gap of days for an ID, we should capture both streaks as different rows

In [26]:
df1 = pd.DataFrame({'ID': [1, 1, 1, 1, 2, 2, 2, 2],
                    'Date': ['2017-01-07', '2017-01-08', '2017-01-09', '2017-01-23',
                             '2017-01-05', '2017-01-06', '2017-01-10', '2017-01-11']
                    })
df1['Date'] = pd.to_datetime(df1['Date'])
df1

Unnamed: 0,ID,Date
0,1,2017-01-07
1,1,2017-01-08
2,1,2017-01-09
3,1,2017-01-23
4,2,2017-01-05
5,2,2017-01-06
6,2,2017-01-10
7,2,2017-01-11


## Method 1: using diff for datetime datatype

In [41]:
# Solution: Create a Series which checks for the difference between Dates within each ID. Check if that's not 1 day (ne method), and then groupby the ID and the cumulative sum of that Series.

# 1. Is there more than 1 day difference with the previous day? (use the not equal method ne(1))
df1['is_there_more_than_one_day_difference'] = df1.groupby('ID')['Date'].diff().dt.days.ne(1)

# 2. Group the booleans by using cumsum()
df1['grouped_streaks'] = df1['is_there_more_than_one_day_difference'].cumsum()

# Calculate the size of each grouped_streaks by ID
df1['size_of_streaks'] = df1.groupby(['ID', 'grouped_streaks'])['grouped_streaks'].transform('size')

# With this we could extract, for each ID, what is the longest streak
df1['longest_streak'] = df1.groupby('ID')['size_of_streaks'].rank(method='dense', ascending=False)
df1

Unnamed: 0,ID,Date,is_there_more_than_one_day_difference,grouped_streaks,size_of_streaks,longest_streak
0,1,2017-01-07,True,1,3,1.0
1,1,2017-01-08,False,1,3,1.0
2,1,2017-01-09,False,1,3,1.0
3,1,2017-01-23,True,2,1,2.0
4,2,2017-01-05,True,3,2,1.0
5,2,2017-01-06,False,3,2,1.0
6,2,2017-01-10,True,4,2,1.0
7,2,2017-01-11,False,4,2,1.0


In [45]:
df1[['ID', 'size_of_streaks', 'longest_streak']].drop_duplicates().sort_values(['ID','longest_streak'])

Unnamed: 0,ID,size_of_streaks,longest_streak
0,1,3,1.0
3,1,1,2.0
4,2,2,1.0


# Groupby an ID by consecutive events
For example, wins and losses

In [64]:
df2 = pd.DataFrame({'Group':['A','A', 'A','A','A','A','B','B','B','B','B','B','B'],
                   'Score':['win', 'loss', 'loss', 'loss', 'win', 'win', 'win', 'win', 'win', 'loss', 'win', 'loss', 'loss']})
df2

Unnamed: 0,Group,Score
0,A,win
1,A,loss
2,A,loss
3,A,loss
4,A,win
5,A,win
6,B,win
7,B,win
8,B,win
9,B,loss


## Overall win streak

In [65]:
# 1. Extract previous score by using the shift() method
df2['previous_score'] = df2['Score'].shift(periods=1)

# 2. Compare if they are equal
df2['is_score_equal_to_previous'] = df2['Score'] != df2['previous_score']

# 3. Calculate the grouped scores streaks by using cumsum() and the booleans from is_score_equal_to_previous
df2['equal_grouped_scores'] = df2['is_score_equal_to_previous'].cumsum()

# 4. Calculate the streaks
df2['streaks'] = df2.groupby('equal_grouped_scores')['Score'].cumcount()+1

df2

Unnamed: 0,Group,Score,previous_score,is_score_equal_to_previous,equal_grouped_scores,streaks
0,A,win,,True,1,1
1,A,loss,win,True,2,1
2,A,loss,loss,False,2,2
3,A,loss,loss,False,2,3
4,A,win,loss,True,3,1
5,A,win,win,False,3,2
6,B,win,win,False,3,3
7,B,win,win,False,3,4
8,B,win,win,False,3,5
9,B,loss,win,True,4,1


## Win streak by group

In [68]:
df2 = pd.DataFrame({'Group':['A','A', 'A','A','A','A','B','B','B','B','B','B','B'],
                   'Score':['win', 'loss', 'loss', 'loss', 'win', 'win', 'win', 'win', 'win', 'loss', 'win', 'loss', 'loss']})

# 1. Extract previous score by using the shift() method
df2['previous_score'] = df2.groupby(['Group'])['Score'].shift(periods=1)

# 2. Compare if they are equal
df2['is_score_equal_to_previous'] = df2['Score'] != df2['previous_score']

# 3. Calculate the grouped scores streaks by using cumsum() and the booleans from is_score_equal_to_previous
df2['equal_grouped_scores'] = df2['is_score_equal_to_previous'].cumsum()

# 4. Calculate the streaks
df2['streaks'] = df2.groupby('equal_grouped_scores')['Score'].cumcount()+1

df2

Unnamed: 0,Group,Score,previous_score,is_score_equal_to_previous,equal_grouped_scores,streaks
0,A,win,,True,1,1
1,A,loss,win,True,2,1
2,A,loss,loss,False,2,2
3,A,loss,loss,False,2,3
4,A,win,loss,True,3,1
5,A,win,win,False,3,2
6,B,win,,True,4,1
7,B,win,win,False,4,2
8,B,win,win,False,4,3
9,B,loss,win,True,5,1
