In [1]:
import sqlite3
import pandas as pd

In [2]:
db_connection=sqlite3.connect('shonen_jump.sqlite3')
cursor=db_connection.cursor()

In [3]:
# As in update_batches() we will prepare a pandas series of Shonen Jump issues indexed by release date
# Creating a dataframe to record all magazine release dates in order.
issues= pd.read_sql_query("SELECT DISTINCT release_date FROM chapters ORDER BY release_date", db_connection)
issues.reset_index(inplace=True)
issues.set_index('release_date', inplace=True)
issues.rename(columns={'index':'issue'},inplace=True)
# We only need the result as a series indexed by date.
issues = issues['issue']

In [4]:
issues

release_date
2022-01-16      0
2022-01-23      1
2022-01-30      2
2022-02-06      3
2022-02-13      4
             ... 
2025-09-21    178
2025-09-28    179
2025-10-05    180
2025-10-12    181
2025-10-19    182
Name: issue, Length: 183, dtype: int64

In [5]:
# Loading the absences to decide which are hiatuses.
df=pd.read_sql_query('SELECT * FROM absences',db_connection)
df

Unnamed: 0,series,issue_date
0,Ayashimon,2022-02-20
1,Black Clover,2022-02-27
2,Black Clover,2022-03-27
3,Black Clover,2022-05-08
4,Black Clover,2022-05-15
...,...,...
324,WITCH WATCH,2022-09-04
325,WITCH WATCH,2023-11-05
326,WITCH WATCH,2024-07-07
327,WITCH WATCH,2024-11-03


In [6]:
# We make sure the dataframe is sorted by series and then date.
df.sort_values(['series','issue_date'],inplace=True)
df

Unnamed: 0,series,issue_date
0,Ayashimon,2022-02-20
1,Black Clover,2022-02-27
2,Black Clover,2022-03-27
3,Black Clover,2022-05-08
4,Black Clover,2022-05-15
...,...,...
324,WITCH WATCH,2022-09-04
325,WITCH WATCH,2023-11-05
326,WITCH WATCH,2024-07-07
327,WITCH WATCH,2024-11-03


In [7]:
# We will create a column which tracks what group an absence belongs to.  
# We will say two absences belong to the same group if A. They are from the same series and
# B. They occur in consecutive issues.

# This will track the group number.
group_number=0
# We will fill out a list that will become a new column of the dataframe.
group_column=[0]

for i in range(1,len(df)):
        if df['series'][i]!=df['series'][i-1] or issues.loc[df['issue_date'][i]]-issues.loc[df['issue_date'][i-1]]>1:
            group_number+=1
        group_column.append(group_number)
# We set the resulting column as a part of our dataframe
df['grouping']=group_column

In [8]:
# Now we add a column with the size of each group.
df['length']=df.groupby('grouping')['grouping'].transform('count')

In [9]:
# For our purposes we will consider a hiatus to be any absence of three consecutive issues or more.
# Now we restrict ourselves to absences belonging to hiatuses.
output_df=df[df['length']>2]

In [None]:
# We create a temporary table of absences belonging to hiatuses.
output_df.to_sql(name='temp_hiatuses', con=db_connection, if_exists='fail', index=False)
# Next we empty then fill the table of hiatuses anew.
cursor.execute("""DELETE FROM hiatuses;""")
cursor.execute("""INSERT OR IGNORE INTO hiatuses(series, start_date, end_date, length)
               SELECT MAX(series), MIN(issue_date), MAX(issue_date), COUNT(grouping)
               FROM temp_hiatuses
               GROUP BY grouping;""")
db_connection.commit()
# Now we delete the corresponding absences from the absences table.

In [11]:
cursor.execute("""DROP TABLE temp_hiatuses""")
db_connection.commit()

In [12]:
db_connection.close()