In [14]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder

# === 🧠 Load test data
test_original = pd.read_csv("test.csv")  # 👈 هنستخدمها في الأخير
test_df = test_original.copy()           # 👈 دي اللي هنشتغل عليها

# === 🕐 Step 1: Time-based features
def prepare_datetime_features(df):
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    df['day_of_week'] = df['date'].dt.dayofweek
    df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
    df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)
    df['dow_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['dow_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    df.drop(columns=['month', 'day', 'day_of_week'], inplace=True)
    return df

test_df = prepare_datetime_features(test_df)

# === 🔠 Step 2: Encode 'family'
def encode_family_column(df, mode="transform", encoder_path="family_label_encoder.pkl"):
    if mode == "transform":
        with open(encoder_path, "rb") as f:
            encoder = pickle.load(f)
        df['family_encoded'] = encoder.transform(df['family'])
        df.drop(columns=['family'], inplace=True)
    return df

test_df = encode_family_column(test_df, mode="transform")

# === 🔢 Step 3: Scale inputs
numerical_cols = ['onpromotion', 'year', 'is_weekend',
                  'month_sin', 'month_cos', 'day_sin', 'day_cos',
                  'dow_sin', 'dow_cos']

with open("x_scaler.pkl", "rb") as f:
    x_scaler = pickle.load(f)

X_test = x_scaler.transform(test_df[numerical_cols])

# === 🤖 Step 4: Load model & predict
with open("best_model.pkl", "rb") as f:
    model = pickle.load(f)

y_pred_scaled = model.predict(X_test).reshape(-1, 1)

# === 🔁 Step 5: Inverse scale y
with open("y_scaler.pkl", "rb") as f:
    y_scaler = pickle.load(f)

y_pred = y_scaler.inverse_transform(y_pred_scaled)

# === 📝 Step 6: Add sales to original test & save
test_original["sales"] = y_pred.flatten()
test_original.head()


Unnamed: 0,id,date,store_nbr,family,onpromotion,sales
0,3000888,2017-08-16,1,AUTOMOTIVE,0,53.571266
1,3000889,2017-08-16,1,BABY CARE,0,53.571266
2,3000890,2017-08-16,1,BEAUTY,2,102.192978
3,3000891,2017-08-16,1,BEVERAGES,20,937.10791
4,3000892,2017-08-16,1,BOOKS,0,53.571266


In [None]:
test_original.to_csv("submission.csv", index=False)
print("submission.csv جاهز وفيه نفس أعمدة test الأصلية + sales")

submission.csv فيه كل بيانات test + sales جاهز 
