In [6]:
from src.pipeline.data_processor import DataProcessor, CleanedData
from enum import Enum
from datetime import date, timedelta, datetime
import pandas as pd
import re


class LearningCycle(Enum):
    DAF = ["Daf Yomi", "category_Gemara", "d_masechta", "d_num"]
    WEEKLY_DAF = ["Daf Hashvua", "category_Gemara", "dw_masechta", "dw_num"]
    MISHNAH = ["Mishna Yomi LZN Daniel Ari ben Avraham Kadesh", "category_Mishna", "m_masechta", "m_num1", "m_num2"]
    PARSHA = ["category_Parsha", "parashat"]
    NACH = ["Nach Yomi", "category_Nach", "n_sefer", "n_num"]
    YERUSHALMI = ["Yerushalmi Yomi", "category_Yerushalmi", "y_masechta", "y_num"]


class CycleRecommendations():
     def __init__(self):
          self.dp = DataProcessor()
          self.calendar = self.dp.load_table(CleanedData.CALENDAR)
          df_categories = self.dp.load_table(CleanedData.CATEGORIES)
          df_shiurim = self.dp.load_table(CleanedData.SHIURIM)
          self.df_merged = pd.merge(df_categories, df_shiurim, on='shiur', suffixes=('_cat', '_shiur'))

     def get_all_recommendations(self, date:date=date.today()):
          if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d").date()
          all_recommendations = []
          for cycle in LearningCycle:
               recommendations = self.get_learning_cycle_recommendations(cycle, date)
               all_recommendations.extend(recommendations)
          all_recommendations.extend(self.get_holiday(date, date+timedelta(3)))
          return all_recommendations

     def get_learning_cycle_recommendations(self, cycle:LearningCycle, date:date=date.today()):
          if isinstance(date, str):
            date = datetime.strptime(date, "%Y-%m-%d").date()
          if str(date) not in self.calendar['date'].values:
               return []
          date_data = self.calendar[self.calendar['date'] == str(date)]
          if cycle in [LearningCycle.DAF, LearningCycle.WEEKLY_DAF, LearningCycle.NACH, LearningCycle.YERUSHALMI]:
               df = self.get_standard_learning(cycle, date_data)
          elif cycle == LearningCycle.PARSHA:
               df = self.get_parsha_recommendations(cycle, date_data)
          elif cycle == LearningCycle.MISHNAH:
               df = self.get_mishna_recommendation(cycle, date_data)
          else:
               return []
          return(df["shiur"].tolist())

     def get_standard_learning(self, cycle:LearningCycle, row:pd.DataFrame):
          subcategory = row.iloc[0][cycle.value[2]]
          subcategory = f'[{subcategory}]' if ' ' in subcategory else subcategory
          df = self.df_merged.loc[
          (self.df_merged[cycle.value[1]] == 1) & 
          (self.df_merged[row.iloc[0][cycle.value[2]]] == 1) &
          (self.df_merged['series_name'] == cycle.value[0])
          ].copy()
          df.loc[:, 'numbers'] = df['title'].apply(self.__extract_numbers)
          cycle_value1 = int(row[cycle.value[3]].item() if hasattr(row[cycle.value[3]], 'item') else row[cycle.value[3]])
          filtered_df = df[df['numbers'].apply(lambda x: x[0] == cycle_value1 if len(x) > 0 else False)]
          filtered_df = filtered_df.drop(columns=['numbers'])
          return filtered_df

     def get_parsha_recommendations(self, cycle:LearningCycle, row:pd.DataFrame):
          subcategory = row.iloc[0][cycle.value[1]]
          subcategory = f'[{subcategory}]' if ' ' in subcategory else subcategory
          filtered_df = self.df_merged[
          (self.df_merged[cycle.value[0]] == 1) & 
          (self.df_merged[subcategory] == 1)
          ]
          return filtered_df

     def get_mishna_recommendation(self, cycle:LearningCycle, row:pd.DataFrame):
          subcategory = row.iloc[0][cycle.value[2]]
          subcategory = f'[{subcategory}]' if ' ' in subcategory else subcategory
          df = self.df_merged.loc[
          (self.df_merged[cycle.value[1]] == 1) & 
          (self.df_merged[row.iloc[0][cycle.value[2]]] == 1) &
          (self.df_merged['series_name'] == cycle.value[0])
          ].copy()
          df.loc[:, 'numbers'] = df['title'].apply(self.__extract_numbers)
          num1 = int(row[cycle.value[3]].item() if hasattr(row[cycle.value[3]], 'item') else row[cycle.value[3]])
          num2 = int(row[cycle.value[4]].item() if hasattr(row[cycle.value[4]], 'item') else row[cycle.value[4]])
          filtered_df = df[df['numbers'].apply(lambda x: (x[0] == num1 and x[1] == num2) if len(x) > 1 else False)]
          filtered_df = filtered_df.drop(columns=['numbers'])
          return filtered_df

     def __extract_numbers(self, title):
          return [int(num) for num in re.findall(r'\b\d+\b|(?<=[:\-])\d+', title)]

     def get_holiday(self, start_date:date=date.today(), end_date:date=date.today()+timedelta(3)):
          if isinstance(start_date, str):
            start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
            end_date = datetime.strptime(end_date, "%Y-%m-%d").date()
          if str(start_date) not in self.calendar['date'].values:
               return []
          holiday_data = self.calendar[(self.calendar['date'] >= str(start_date)) & (self.calendar['date'] <= str(end_date))]
          no_holiday = holiday_data['holiday'].isna().all()
          no_roshchodesh = pd.isna(holiday_data['roshchodesh'].iloc[0])
          if not no_holiday:
               first_holiday = holiday_data['holiday'].dropna().iloc[0]
               filtered_df = self.df_merged[(self.df_merged[first_holiday] == 1) & (self.df_merged['category_Holidays'] == 1)]
               return(filtered_df["shiur"].tolist())
          elif not no_roshchodesh:
               first_roshchodesh = holiday_data['holiday'].dropna().iloc[0]
               filtered_df = self.df_merged[(self.df_merged[first_roshchodesh] == 1) & (self.df_merged['category_Holidays'] == 1)]
               return(filtered_df["shiur"].tolist())
          else:
               return []


In [7]:
model = CycleRecommendations()


2024-07-25 20:55:19,747 - root - INFO - DataProcessor instance created
2024-07-25 20:55:19,751 - root - INFO - Loading data from: cycles_calendar
2024-07-25 20:55:19,886 - root - INFO - Loading data from: categories_cleaned
2024-07-25 20:56:06,234 - root - INFO - Loading data from: shiurim_cleaned


In [15]:
print(model.get_all_recommendations())

[873341, 873340, 873338, 873337, 873335, 873334, 1099057, 1098910, 1094170, 1093170, 1090947, 1090263, 1090195, 1087541, 1085979, 1085453, 1081408, 1080511, 1079655, 1079253, 1070914, 1070444, 1069914, 1069802, 1069790, 1069789, 1069761, 1069717, 1069608, 1069543, 1069481, 1069473, 1069462, 1069461, 1069460, 1069455, 1069437, 1069433, 1069430, 1069422, 1069420, 1069418, 1069415, 1069411, 1069410, 1069409, 1069408, 1069405, 1069403, 1069391, 1069388, 1069382, 1069381, 1069377, 1069372, 1069364, 1069363, 1069354, 1069342, 1069338, 1069337, 1069334, 1069329, 1069328, 1069327, 1069324, 1069304, 1069245, 1069198, 1069195, 1069194, 1069191, 1069145, 1069144, 1069140, 1069139, 1069127, 1069109, 1069095, 1069092, 1069088, 1069082, 1069072, 1069030, 1069010, 1069004, 1068980, 1068977, 1064958, 1061213, 1057724, 1055965, 1055411, 1055018, 1054349, 1053722, 1047419, 1046459, 1046456, 1044520, 1044516, 1040323, 1040212, 1040211, 1040051, 1040017, 1039877, 1039872, 1039845, 1039791, 1039769, 103976