# LOAD TRAINED MODELS


In [7]:
import joblib
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.preprocessing import LabelEncoder

lgb_sales = joblib.load("/kaggle/input/new_v/other/default/1/model_sales (1).pkl")
lgb_profit = joblib.load("/kaggle/input/new_v/other/default/1/model_profit (1).pkl")
encoders = joblib.load("/kaggle/input/encoder/other/default/1/label_encoders (1).pkl")

print(f"Encoders available for: {list(encoders.keys())}")

model_features = lgb_sales.booster_.feature_name()
print(f"\nModel expects {len(model_features)} features:")
print(model_features)


Encoders available for: ['Region', 'Category', 'Sub-Category']

Model expects 28 features:
['Region_x', 'Category_x', 'Sub-Category_x', 'year', 'month', 'day', 'dayofweek', 'quarter', 'sales_lag_1', 'sales_lag_7', 'profit_lag_1', 'profit_lag_7', 'sales_roll_Mean_7', 'sales_roll_std_7', 'sales_roll_max_7', 'sales_roll_min_7', 'sales_roll_sum_7', 'profit_roll_Mean_7', 'profit_roll_std_7', 'profit_roll_max_7', 'profit_roll_min_7', 'profit_roll_sum_7', 'Region_y', 'Category_y', 'Sub-Category_y', 'Region', 'Category', 'Sub-Category']


# Handle Test Inputs

In [8]:
# Handle test case
def create_test_input(date, region, category, sub_category, 
                      sales_lag_1, sales_lag_7, 
                      profit_lag_1, profit_lag_7):
    if isinstance(date, str):
        date = pd.to_datetime(date)
    
    # Rolling Features
    sales_roll_mean = np.mean([sales_lag_1, sales_lag_7])
    sales_roll_std = np.std([sales_lag_1, sales_lag_7])
    sales_roll_max = max(sales_lag_1, sales_lag_7)
    sales_roll_min = min(sales_lag_1, sales_lag_7)
    sales_roll_sum = sales_lag_1 + sales_lag_7

    profit_roll_mean = np.mean([profit_lag_1, profit_lag_7])
    profit_roll_std = np.std([profit_lag_1, profit_lag_7])
    profit_roll_max = max(profit_lag_1, profit_lag_7)
    profit_roll_min = min(profit_lag_1, profit_lag_7)
    profit_roll_sum = profit_lag_1 + profit_lag_7

    # Date features
    year = date.year
    month = date.month
    day = date.day
    dayofweek = date.weekday()
    quarter = (month - 1) // 3 + 1

    # All features
    features = {
        'year': year,
        'month': month,
        'day': day,
        'dayofweek': dayofweek,
        'quarter': quarter,
        'sales_lag_1': sales_lag_1,
        'sales_lag_7': sales_lag_7,
        'profit_lag_1': profit_lag_1,
        'profit_lag_7': profit_lag_7,
        'sales_roll_Mean_7': sales_roll_mean,
        'sales_roll_std_7': sales_roll_std,
        'sales_roll_max_7': sales_roll_max,
        'sales_roll_min_7': sales_roll_min,
        'sales_roll_sum_7': sales_roll_sum,
        'profit_roll_Mean_7': profit_roll_mean,
        'profit_roll_std_7': profit_roll_std,
        'profit_roll_max_7': profit_roll_max,
        'profit_roll_min_7': profit_roll_min,
        'profit_roll_sum_7': profit_roll_sum,
        'Region': region,
        'Category': category,
        'Sub-Category': sub_category,
        'Region_x': region,
        'Category_x': category,
        'Sub-Category_x': sub_category,
        'Region_y': region,
        'Category_y': category,
        'Sub-Category_y': sub_category
    }

    return pd.DataFrame([features])

#  7-DAY Prediction Function


In [9]:
def predict_sales_profit(input_df, encoders, lgb_sales, lgb_profit):
    input_df = input_df.copy()

    # Encode categorical columns safely
    for col in input_df.columns:
        if any(name in col for name in ['Region', 'Category', 'Sub-Category']):
            for base_col in ['Region', 'Category', 'Sub-Category']:
                if base_col in col:
                    if base_col in encoders:
                        val = input_df[col].iloc[0]
                        if isinstance(val, str):
                            if val in encoders[base_col].classes_:
                                input_df[col] = encoders[base_col].transform([val])[0]
                            else:
                                input_df[col] = -1
                        break
                    else:
                        print(f"Encoder for '{base_col}' not found, skipping.")
                        break

    # Ensure feature alignment with model
    X_input = input_df[lgb_sales.feature_name_]

    # Predict
    sales_pred = lgb_sales.predict(X_input)[0]
    profit_pred = lgb_profit.predict(X_input)[0]

    return sales_pred, profit_pred

# Test Case

In [10]:
# Forecast settings
start_date = datetime(2017, 12, 1)
region = 'East'
category = 'Technology'
sub_category = 'Phones'

sales_lag_1 = 1500.0
sales_lag_7 = 1200.0
profit_lag_1 = 450.0
profit_lag_7 = 380.0

forecast = []

date = start_date + timedelta(days=1)
test_input = create_test_input( 
    date=date,
    region=region,
    category=category,
    sub_category=sub_category, 
    sales_lag_1=sales_lag_1,
    sales_lag_7=sales_lag_7,
    profit_lag_1=profit_lag_1,
    profit_lag_7=profit_lag_7
)

# Predict
sales_pred, profit_pred = predict_sales_profit(test_input, encoders, lgb_sales, lgb_profit)

forecast.append({
    'Date': date.strftime('%Y-%m-%d'),
    'Predicted_Sales': round(sales_pred, 2),
    'Predicted_Profit': round(profit_pred, 2),
})


# Show forecast results
forecast_df = pd.DataFrame(forecast)
print("7-Day Forecast Results:")
print(forecast_df.to_string(index=False))



7-Day Forecast Results:
      Date  Predicted_Sales  Predicted_Profit
2017-12-02          1812.52            331.28


In [18]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

def get_input(prompt, default=None, dtype=str):
    while True:
        val = input(f"{prompt} (default: {default}): ").strip()
        if val == "":
            return default
        try:
            if dtype == float:
                return float(val)
            elif dtype == int:
                return int(val)
            elif dtype == datetime:
                return datetime.strptime(val, "%Y-%m-%d")
            else:
                return val
        except ValueError:
            print(f"Invalid input. Please enter a valid {dtype.__name__} value.")

# Collect user inputs 
print("=== Forecast Inputs ===")

start_date = get_input("Enter start date (YYYY-MM-DD)", datetime(2017, 12, 1), dtype=datetime)
region = get_input("Enter region", "East")
category = get_input("Enter category", "Technology")
sub_category = get_input("Enter sub-category", "Paper")

sales_lag_1 = get_input("Enter sales_lag_1", 0.0, dtype=float)
sales_lag_7 = get_input("Enter sales_lag_7", 0.0, dtype=float)
profit_lag_1 = get_input("Enter profit_lag_1", 0.0, dtype=float)
profit_lag_7 = get_input("Enter profit_lag_7", 0.0, dtype=float)

# Forecast generation loop

forecast = []

for i in range(7):
    date = start_date + timedelta(days=i+1)

    test_input = create_test_input(
        date=date,
        region=region,
        category=category,
        sub_category=sub_category,
        sales_lag_1=sales_lag_1,
        sales_lag_7=sales_lag_7,
        profit_lag_1=profit_lag_1,
        profit_lag_7=profit_lag_7
    )

    # Predict
    sales_pred, profit_pred = predict_sales_profit(test_input, encoders, lgb_sales, lgb_profit)

    # Store results
    forecast.append({
        'Date': date.strftime('%Y-%m-%d'),
        'Predicted_Sales': round(sales_pred, 2),
        'Predicted_Profit': round(profit_pred, 2),
    })

    # Update lags for recursive prediction
    sales_lag_7 = sales_lag_1
    sales_lag_1 = sales_pred
    profit_lag_7 = profit_lag_1
    profit_lag_1 = profit_pred

# Display forecast results
forecast_df = pd.DataFrame(forecast)
print("\n 7-Day Forecast Results:")
print(forecast_df.to_string(index=False))

=== Forecast Inputs ===


Enter start date (YYYY-MM-DD) (default: 2017-12-01 00:00:00):  
Enter region (default: East):  
Enter category (default: Technology):  
Enter sub-category (default: Paper):  
Enter sales_lag_1 (default: 0.0):  100
Enter sales_lag_7 (default: 0.0):  
Enter profit_lag_1 (default: 0.0):  15
Enter profit_lag_7 (default: 0.0):  



 7-Day Forecast Results:
      Date  Predicted_Sales  Predicted_Profit
2017-12-02           509.22             28.24
2017-12-03           297.76             38.78
2017-12-04           387.20             51.29
2017-12-05           399.06             48.07
2017-12-06           183.12             39.70
2017-12-07           353.58             51.11
2017-12-08           362.58             54.63
