# טעינת ספריות

In [35]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from datetime import datetime, timedelta
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import Input
from tensorflow.keras.layers import LSTM, Dropout, Dense, Bidirectional, BatchNormalization
from tensorflow.keras.models import load_model


# טעינת הנתונים

In [36]:
cc_df = pd.read_csv("CC_2020-2025_New.csv")
holidays_df = pd.read_csv("Holidays_New.csv")

# תהליך EDA

In [37]:
print("🔹 מידע כללי:")
print(cc_df.info())
print("\n🔹 תיאור סטטיסטי:")
print(cc_df.describe())
print("\n🔹 ערכים חסרים:")
print(cc_df.isna().sum())

🔹 מידע כללי:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34088 entries, 0 to 34087
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   QueueStartDate        34088 non-null  object
 1   QueueStartDateNumber  34088 non-null  int64 
 2   QueueStartDateName    34088 non-null  object
 3   HourInterval          34088 non-null  object
 4   HalfHourInterval      34088 non-null  object
 5   TotalCallsOffered     34088 non-null  int64 
 6   TotalCallsAnswered    34088 non-null  int64 
 7   TotalCallsAbandoned   34088 non-null  int64 
 8   TotalCB               34088 non-null  int64 
 9   TotalTransfered       34088 non-null  int64 
 10  TotalWaitDuration     34088 non-null  int64 
 11  TotalAgents           34088 non-null  int64 
dtypes: int64(8), object(4)
memory usage: 3.1+ MB
None

🔹 תיאור סטטיסטי:
       QueueStartDateNumber  TotalCallsOffered  TotalCallsAnswered  \
count          34088.000000       3

In [38]:
# דוגמה לגרף לפי ממוצע נציגים פר שעה
fig = px.line(cc_df.groupby('HourInterval')['TotalAgents'].mean().reset_index(),
              x='HourInterval', y='TotalAgents',
              title="ממוצע נציגים פר שעה")
fig.show()

## המרת תאריכים

In [39]:
cc_df['QueueStartDate'] = pd.to_datetime(cc_df['QueueStartDate'], format="%d/%m/%Y")
cc_df.rename(columns={'QueueStartDate': 'Date'}, inplace=True)
holidays_df['Date'] = pd.to_datetime(holidays_df['Date'])

## שינוי שמות עמודות (לשמות באנגלית אחידים)

In [40]:
cc_df.columns = [col.strip().replace(" ", "_").replace("-", "_") for col in cc_df.columns]
holidays_df.columns = [col.strip().replace(" ", "_").replace("-", "_") for col in holidays_df.columns]

## מיזוג חגים לפי תאריך

In [41]:
cc_df = cc_df.merge(holidays_df[['Date', 'IsHoliday', 'IsHolidayEve']], on='Date', how='left')

In [42]:
# אם יש ערכים חסרים – נמלא ב-0
cc_df['IsHoliday'] = cc_df['IsHoliday'].fillna(0).astype(int)
cc_df['IsHolidayEve'] = cc_df['IsHolidayEve'].fillna(0).astype(int)

## יצירת עמודה מספרית Interval

In [43]:
cc_df['Interval'] = cc_df['HourInterval'].apply(lambda x: int(x.split(':')[0]))

## הכנת פיצ'רים

In [44]:
if 'Weekday' not in cc_df.columns and 'QueueStartDateNumber' in cc_df.columns:
    weekday_map = {
        1: 6, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4, 7: 5
    }
    cc_df['Weekday'] = cc_df['QueueStartDateNumber'].map(weekday_map)

In [45]:
cc_df['HourSin'] = np.sin(2 * np.pi * cc_df['Interval']/24)
cc_df['HourCos'] = np.cos(2 * np.pi * cc_df['Interval']/24)
cc_df['WeekdaySin'] = np.sin(2 * np.pi * cc_df['Weekday']/7)
cc_df['WeekdayCos'] = np.cos(2 * np.pi * cc_df['Weekday']/7)

# Lag מינימלי
cc_df = cc_df.sort_values(['Date','Interval'])
cc_df['Lag1'] = cc_df['TotalAgents'].shift(24)
cc_df.bfill(inplace=True)


## סינון עמודות רוונטיות

In [46]:
feature_cols = [
    'Interval','Weekday','IsHoliday','IsHolidayEve',
    'HourSin','HourCos','WeekdaySin','WeekdayCos',
    'Lag1','TotalAgents'
]
data = cc_df[feature_cols]


## נורמליזציה

In [47]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(data)
scaled_df = pd.DataFrame(scaled_features, columns=feature_cols)
scaled_df['Date'] = cc_df['Date'].values

# בניית מודל LSTM

##  הכנת הנתונים למודל


In [48]:
def create_sequences(df, seq_length=24):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df.iloc[i:i+seq_length, :-1].values)
        y.append(df.iloc[i+seq_length, -1])
    return np.array(X), np.array(y)

scaled_df = scaled_df.sort_values('Date')
X, y = create_sequences(scaled_df[feature_cols])

split = int(len(X)*0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

## בניית המודל

In [49]:
model = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2])),
    LSTM(64, return_sequences=True, dropout=0.2),
    LSTM(32, dropout=0.2),
    Dense(16, activation='relu'),
    Dense(1)
])

In [50]:
model.summary()

In [51]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

In [52]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

In [53]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

Epoch 1/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - loss: 0.0538 - val_loss: 0.0686 - learning_rate: 0.0010
Epoch 2/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - loss: 0.0377 - val_loss: 0.0620 - learning_rate: 0.0010
Epoch 3/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0349 - val_loss: 0.0612 - learning_rate: 0.0010
Epoch 4/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 0.0330 - val_loss: 0.0618 - learning_rate: 0.0010
Epoch 5/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 0.0325 - val_loss: 0.0515 - learning_rate: 0.0010
Epoch 6/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0314 - val_loss: 0.0609 - learning_rate: 0.0010
Epoch 7/50
[1m852/852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - loss: 0.0304 - val_loss: 0.0534 - learning_r

## הערכת ביצועים

In [59]:
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=history.history['loss'], mode='lines', name='Train Loss'))
fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'))
fig_loss.update_layout(title="Training vs Validation Loss", xaxis_title="Epoch", yaxis_title="Loss")
fig_loss.show()

In [54]:
y_pred = model.predict(X_test)
y_pred_rescaled = scaler.inverse_transform(
    np.concatenate([np.zeros((len(y_pred), len(feature_cols)-1)), y_pred], axis=1)
)[:, -1]
y_test_rescaled = scaler.inverse_transform(
    np.concatenate([np.zeros((len(y_test), len(feature_cols)-1)), y_test.reshape(-1,1)], axis=1)
)[:, -1]

mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))
r2 = r2_score(y_test_rescaled, y_pred_rescaled)
print(f"MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")


[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
MAE: 17.92, RMSE: 24.58, R²: 0.55


In [55]:
# גרף השוואה

fig = go.Figure()
fig.add_trace(go.Scatter(y=y_test_rescaled[:200], mode='lines', name='Actual'))
fig.add_trace(go.Scatter(y=y_pred_rescaled[:200], mode='lines', name='Predicted'))
fig.update_layout(title="Actual vs Predicted", xaxis_title="Time", yaxis_title="TotalAgents")
fig.show()

## שמירת המודל

In [60]:
model.save("lstm_model.keras", include_optimizer=False)
print("✅ Model saved as lstm_model.keras")

✅ Model saved as lstm_model.keras


# פונקציה לחיזוי יומי

In [61]:
def predict_future(date_str):
    model_loaded = load_model("lstm_model.keras", compile=False)
    holidays_new_df = pd.read_csv("Holidays_New.csv")
    holidays_new_df['Date'] = pd.to_datetime(holidays_new_df['Date'])

    date_obj = pd.to_datetime(date_str, format="%d/%m/%Y")
    preds = []

    for hour in range(24):
        weekday = date_obj.dayofweek
        is_holiday = 1 if date_obj in holidays_new_df['Date'].values else 0
        is_holiday_eve = 1 if date_obj - pd.Timedelta(days=1) in holidays_new_df['Date'].values else 0

        hour_sin = np.sin(2 * np.pi * hour/24)
        hour_cos = np.cos(2 * np.pi * hour/24)
        weekday_sin = np.sin(2 * np.pi * weekday/7)
        weekday_cos = np.cos(2 * np.pi * weekday/7)

        df_row = pd.DataFrame([[hour, weekday, is_holiday, is_holiday_eve,
                                hour_sin, hour_cos, weekday_sin, weekday_cos,
                                0,0]],
                              columns=feature_cols)

        scaled_row = scaler.transform(df_row)[0]
        row_9_features = scaled_row[:-1]
        row_reshaped = np.array(row_9_features).reshape((1,1,len(feature_cols)-1))
        pred = model_loaded.predict(row_reshaped, verbose=0)[0][0]
        preds.append(pred)

    preds = np.array(preds).reshape(-1,1)
    preds_rescaled = scaler.inverse_transform(
        np.concatenate([np.zeros((24, len(feature_cols)-1)), preds], axis=1)
    )[:, -1]

    predictions = pd.DataFrame({
        'Date':[date_obj.strftime("%d/%m/%Y")]*24,
        'HourInterval':[f"{h:02d}:00 - {h+1:02d}:00" for h in range(24)],
        'PredictedAgents':preds_rescaled.astype(int)
    })
    print(f"✅ תחזית ליום {date_obj.strftime('%d/%m/%Y')}")
    return predictions


In [62]:
predictions =predict_future("20/07/2025")
print(predictions)

✅ תחזית ליום 20/07/2025
          Date   HourInterval  PredictedAgents
0   20/07/2025  00:00 - 01:00               10
1   20/07/2025  01:00 - 02:00               31
2   20/07/2025  02:00 - 03:00               24
3   20/07/2025  03:00 - 04:00               24
4   20/07/2025  04:00 - 05:00               24
5   20/07/2025  05:00 - 06:00                7
6   20/07/2025  06:00 - 07:00               28
7   20/07/2025  07:00 - 08:00               19
8   20/07/2025  08:00 - 09:00               31
9   20/07/2025  09:00 - 10:00               18
10  20/07/2025  10:00 - 11:00               39
11  20/07/2025  11:00 - 12:00               36
12  20/07/2025  12:00 - 13:00               38
13  20/07/2025  13:00 - 14:00               33
14  20/07/2025  14:00 - 15:00               15
15  20/07/2025  15:00 - 16:00               13
16  20/07/2025  16:00 - 17:00               23
17  20/07/2025  17:00 - 18:00               21
18  20/07/2025  18:00 - 19:00               10
19  20/07/2025  19:00 - 20:00       