In [15]:
import re

def extract_numbers(title):
    return [int(num) for num in re.findall(r'\b\d+\b|(?<=[:\-])\d+', title)]
    # return [int(num) for num in re.findall(r'(?<!\d)\d+(?!\d)', title)]

In [16]:
from src.pipeline.data_processor import DataProcessor, CleanedData
from enum import Enum
from datetime import date, timedelta
import pandas as pd


dp = DataProcessor()
calendar = dp.load_table(CleanedData.CALENDAR)
print(calendar.columns)

class LearningCycle(Enum):
    DAF = ["Daf Yomi", "category_Gemara", "d_masechta", "d_num"]
    WEEKLY_DAF = ["Daf Hashvua", "category_Gemara", "dw_masechta", "dw_num"]
    MISHNAH = ["Mishna Yomi LZN Daniel Ari ben Avraham Kadesh", "category_Mishna", "m_masechta", "m_num1", "m_num2"]
    PARSHA = ["category_Parsha", "parashat"]
    NACH = ["Nach Yomi", "category_Nach", "n_sefer", "n_num"]
    YERUSHALMI = ["Yerushalmi Yomi", "category_Yerushalmi", "y_masechta", "y_num"]

def get_learning_cycle_recommendations(cycle:LearningCycle, date:date=date.today()):
     if str(date) not in calendar['date'].values:
          return []
     date_data = calendar[calendar['date'] == str(date)]
     if cycle in [LearningCycle.DAF, LearningCycle.WEEKLY_DAF, LearningCycle.NACH, LearningCycle.YERUSHALMI]:
          df = get_standard_learning(cycle, date_data)
     elif cycle == LearningCycle.PARSHA:
          df = get_parsha_recommendations(cycle, date_data)
     elif cycle == LearningCycle.MISHNAH:
          df = get_mishna_recommendation(cycle, date_data)
     else:
          return []
     # df.sort_values(by='date', inplace=True, ascending=False)
     return(df["shiur"].tolist())

def get_standard_learning(cycle:LearningCycle, row:pd.DataFrame):
     subcategory = row.iloc[0][cycle.value[2]]
     subcategory = f'[{subcategory}]' if ' ' in subcategory else subcategory
     df_categories = dp.load_table(CleanedData.CATEGORIES)
     df_shiurim = dp.load_table(CleanedData.SHIURIM)
     df_merged = pd.merge(df_categories, df_shiurim, on='shiur', suffixes=('_cat', '_shiur'))
     df = df_merged.loc[
     (df_merged[cycle.value[1]] == 1) & 
     (df_merged[row.iloc[0][cycle.value[2]]] == 1) &
     (df_merged['series_name'] == cycle.value[0])
     ].copy()
     df.loc[:, 'numbers'] = df['title'].apply(extract_numbers)
     cycle_value1 = int(row[cycle.value[3]].item() if hasattr(row[cycle.value[3]], 'item') else row[cycle.value[3]])
     filtered_df = df[df['numbers'].apply(lambda x: x[0] == cycle_value1 if len(x) > 0 else False)]
     filtered_df = filtered_df.drop(columns=['numbers'])
     return filtered_df

def get_parsha_recommendations(cycle:LearningCycle, row:pd.DataFrame):
     subcategory = row.iloc[0][cycle.value[1]]
     subcategory = f'[{subcategory}]' if ' ' in subcategory else subcategory
     df_categories = dp.load_table(CleanedData.CATEGORIES)
     df_shiurim = dp.load_table(CleanedData.SHIURIM)
     df_merged = pd.merge(df_categories, df_shiurim, on='shiur', suffixes=('_cat', '_shiur'))
     filtered_df = df_merged[
     (df_merged[cycle.value[0]] == 1) & 
     (df_merged[subcategory] == 1)
     ]
     return filtered_df

def get_mishna_recommendation(cycle:LearningCycle, row:pd.DataFrame):
     subcategory = row.iloc[0][cycle.value[2]]
     subcategory = f'[{subcategory}]' if ' ' in subcategory else subcategory
     df_categories = dp.load_table(CleanedData.CATEGORIES)
     df_shiurim = dp.load_table(CleanedData.SHIURIM)
     df_merged = pd.merge(df_categories, df_shiurim, on='shiur', suffixes=('_cat', '_shiur'))
     df = df_merged.loc[
     (df_merged[cycle.value[1]] == 1) & 
     (df_merged[row.iloc[0][cycle.value[2]]] == 1) &
     (df_merged['series_name'] == cycle.value[0])
     ].copy()
     df.loc[:, 'numbers'] = df['title'].apply(extract_numbers)
     print(df[['title', 'numbers']])
     cycle_value1 = int(row[cycle.value[3]].item() if hasattr(row[cycle.value[3]], 'item') else row[cycle.value[3]])
     cycle_value2 = int(row[cycle.value[4]].item() if hasattr(row[cycle.value[4]], 'item') else row[cycle.value[4]])
     filtered_df = df[df['numbers'].apply(lambda x: (x[0] == cycle_value1 and x[1] == cycle_value2) if len(x) > 1 else False)]
     filtered_df = filtered_df.drop(columns=['numbers'])
     return filtered_df

print(get_learning_cycle_recommendations(LearningCycle.MISHNAH))

2024-07-23 17:03:16,326 - root - INFO - DataProcessor instance created
2024-07-23 17:03:16,347 - root - INFO - Loading data from: cycles_calendar
2024-07-23 17:03:16,461 - root - INFO - Loading data from: categories_cleaned


Index(['date', 'holiday', 'parashat', 'roshchodesh', 'n_sefer', 'n_num',
       'd_masechta', 'd_num', 'dw_masechta', 'dw_num', 'y_masechta', 'y_num',
       'm_masechta', 'm_num1', 'm_num2'],
      dtype='object')


2024-07-23 17:04:03,200 - root - INFO - Loading data from: shiurim_cleaned


                                        title    numbers
1962                           Kidushin 41314    [41314]
1963                           Kidushin 41112    [41112]
1964                            Kidushin 4910     [4910]
1965                             Kidushin 478      [478]
1966                             Kidushin 456      [456]
1967                             Kidushin 434      [434]
1968                             Kidushin 412      [412]
1969                           Kidushin 31213    [31213]
1970                           Kidushin 31011    [31011]
1971                             Kidushin 389      [389]
1973                             Kidushin 367      [367]
1974                             Kidushin 345      [345]
1975                             Kidushin 323      [323]
1976                           Kidushin 21031    [21031]
1977                             Kidushin 289      [289]
1978                             Kidushin 267      [267]
1979                           

In [19]:
def get_holiday(start_date:date=date.today(), end_date:date=date.today()+timedelta(3)):
      if str(start_date) not in calendar['date'].values:
            return []
      holiday_data = calendar[(calendar['date'] >= str(start_date)) & (calendar['date'] <= str(end_date))]
      no_holiday = holiday_data['holiday'].isna().all()
      no_roshchodesh = holiday_data['roshchodesh'].isna().all()
      if no_holiday == False:
            first_holiday = holiday_data['holiday'].dropna().iloc[0]
            df_categories = dp.load_table(CleanedData.CATEGORIES)
            df_shiurim = dp.load_table(CleanedData.SHIURIM)
            df_merged = pd.merge(df_categories, df_shiurim, on='shiur', suffixes=('_cat', '_shiur'))
            filtered_df = df_merged[(df_merged[first_holiday] == 1)]
            return(filtered_df["shiur"].tolist())
      elif no_roshchodesh == False:
            first_roshchodesh = holiday_data['holiday'].dropna().iloc[0]
            df_categories = dp.load_table(CleanedData.CATEGORIES)
            df_shiurim = dp.load_table(CleanedData.SHIURIM)
            df_merged = pd.merge(df_categories, df_shiurim, on='shiur', suffixes=('_cat', '_shiur'))
            filtered_df = df_merged[(df_merged[first_roshchodesh] == 1)]
            return(filtered_df["shiur"].tolist())
      else:
            return []
print(get_holiday())


2024-07-23 17:10:29,386 - root - INFO - Loading data from: categories_cleaned
2024-07-23 17:11:12,914 - root - INFO - Loading data from: shiurim_cleaned


[1075363, 1071128, 1070688, 1070686, 1070563, 1070415, 1070083, 1069552, 1069457, 1069454, 1069436, 1069435, 1069411, 1069409, 1069396, 1069376, 1069348, 1069338, 1069335, 1069330, 1069328, 1069318, 1069315, 1069302, 1069296, 1069291, 1069198, 1069159, 1069089, 1069079, 1068715, 1068658, 1059710, 1053099, 1047968, 1042645, 1039336, 1039297, 1039284, 1039264, 1039262, 1039257, 1039247, 1039113, 1039111, 1039110, 1039080, 1038666, 1038534, 1038489, 1038228, 1018968, 1017863, 1013512, 1007861, 1006471, 1006013, 1005338, 1005196, 1005194, 1005073, 1005010, 1005009, 1005005, 1004974, 1004962, 1004957, 1004954, 1004929, 1004917, 1004818, 1004810, 1004797, 1004795, 1004717, 1004697, 1004618, 1004602, 1004453, 1004362, 1004241, 1004075, 991336, 972892, 967905, 967898, 965933, 965913, 965311, 964298, 963289, 963034, 962955, 962929, 962871, 962828, 962814, 962811, 962782, 962773, 962760, 962754, 962737, 962733, 962703, 962702, 962694, 962692, 962451, 962438, 961716, 961561, 961038, 958469, 94434