In [2]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import numpy as np

data = [['2018-12-28'], 
        ['2018-12-29'], 
        ['2019-01-04'], 
        ['2019-01-05']]
failed = pd.DataFrame(data, 
                      columns=['fail_date']).astype({
                      'fail_date':'datetime64[ns]'})
data = [['2018-12-30'], 
        ['2018-12-31'], 
        ['2019-01-01'], 
        ['2019-01-02'], 
        ['2019-01-03'], 
        ['2019-01-06']]
succeeded = pd.DataFrame(data, 
                         columns=['success_date']).astype({
                         'success_date':'datetime64[ns]'})

display(failed, succeeded)

range_start = pd.Timestamp("2019-01-01")
range_end = pd.Timestamp("2019-12-31")

Unnamed: 0,fail_date
0,2018-12-28
1,2018-12-29
2,2019-01-04
3,2019-01-05


Unnamed: 0,success_date
0,2018-12-30
1,2018-12-31
2,2019-01-01
3,2019-01-02
4,2019-01-03
5,2019-01-06


**Step 1: Add a state Column to the failed DataFrame**
- A new column, state, is added to the failed DataFrame with the value 'failed' for all rows.

**Step 2: Rename the fail_date Column in the failed DataFrame**
- The column fail_date in the failed DataFrame is renamed to date.

In [3]:
failed['state'] = 'failed'
failed = failed.rename(columns={'fail_date': 'date'})
display(failed)

Unnamed: 0,date,state
0,2018-12-28,failed
1,2018-12-29,failed
2,2019-01-04,failed
3,2019-01-05,failed


**Step 3: Add a state Column to the succeeded DataFrame**
- A new column, state, is added to the succeeded DataFrame with the value 'succeeded' for all rows.

**Step 4: Rename the success_date Column in the succeeded DataFrame**
- The column success_date in the succeeded DataFrame is renamed to date.


In [4]:
succeeded['state'] = 'succeeded'
succeeded = succeeded.rename(columns={'success_date': 'date'})
display(succeeded)

Unnamed: 0,date,state
0,2018-12-30,succeeded
1,2018-12-31,succeeded
2,2019-01-01,succeeded
3,2019-01-02,succeeded
4,2019-01-03,succeeded
5,2019-01-06,succeeded


**Step 5: Combine the failed and succeeded DataFrames**
- The two DataFrames, failed and succeeded, are concatenated into a single DataFrame, df, stacking their rows.

**Step 6: Sort the Combined DataFrame by the date Column**
- The combined DataFrame df is sorted in ascending order by the date column.

In [5]:
df = pd.concat([failed, succeeded])
df = df.sort_values(by='date', ascending=True)
display(df)

Unnamed: 0,date,state
0,2018-12-28,failed
1,2018-12-29,failed
0,2018-12-30,succeeded
1,2018-12-31,succeeded
2,2019-01-01,succeeded
3,2019-01-02,succeeded
4,2019-01-03,succeeded
2,2019-01-04,failed
3,2019-01-05,failed
5,2019-01-06,succeeded


**Step 7: Filter Rows by Date Range**
- Rows in df are filtered to only include dates between '2019-01-01' and '2019-12-31'.

In [6]:
df = df[df['date'].between('2019-01-01', '2019-12-31')]
display(df)

Unnamed: 0,date,state
2,2019-01-01,succeeded
3,2019-01-02,succeeded
4,2019-01-03,succeeded
2,2019-01-04,failed
3,2019-01-05,failed
5,2019-01-06,succeeded


**Step 8: Assign a Unique Period Identifier for Consecutive States**
- A new column period is added to identify consecutive groups of the same state.

In [7]:
df["period_state_previous"] = df["state"].shift(periods = 1)
df["period_switch"] = np.where(df["state"] != df["period_state_previous"], 1, 0)
df["period"] = df["period_switch"].cumsum()
display(df)

Unnamed: 0,date,state,period_state_previous,period_switch,period
2,2019-01-01,succeeded,,1,1
3,2019-01-02,succeeded,succeeded,0,1
4,2019-01-03,succeeded,succeeded,0,1
2,2019-01-04,failed,succeeded,1,2
3,2019-01-05,failed,failed,0,2
5,2019-01-06,succeeded,failed,1,3


**Step 9: Group by period and state and Aggregate Start/End Dates**
- The DataFrame is grouped by period and state, and the date column is aggregated to find:
- The earliest date (min) as start_date. 
- The latest date (max) as end_date.

**Step 10: Reset Index and Rename Columns**
- The index is reset, and the state column is renamed to period_state

In [8]:
df = df.groupby(['period', 
                 'state']).agg(start_date=('date', 'min'), 
                               end_date=('date', 'max'))
df = df.reset_index().rename(columns={'state': 'period_state'})
display(df)

Unnamed: 0,period,period_state,start_date,end_date
0,1,succeeded,2019-01-01,2019-01-03
1,2,failed,2019-01-04,2019-01-05
2,3,succeeded,2019-01-06,2019-01-06


**Step 11: Select and Reorganize Columns**
- The DataFrame is reduced to three columns: period_state, start_date, and end_date.

In [9]:
df = df[['period_state', 'start_date', 'end_date']]
display(df)

Unnamed: 0,period_state,start_date,end_date
0,succeeded,2019-01-01,2019-01-03
1,failed,2019-01-04,2019-01-05
2,succeeded,2019-01-06,2019-01-06


References:
[1] https://leetcode.com/problems/report-contiguous-dates/description/?lang=pythondata