In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt


class RegularizedPredictor(nn.Module):
    def __init__(self, input_size=4, dropout_rate=0.2):
        super(RegularizedPredictor, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size * 2, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(16, 6)
        )

        self.l2_lambda = 0.01

    def forward(self, x):
        batch_size = x.size(0)
        x = x.view(batch_size, -1)
        return self.fc(x)

    def get_l2_loss(self):
        l2_loss = 0
        for param in self.parameters():
            l2_loss += torch.norm(param, 2)
        return self.l2_lambda * l2_loss

class EarlyStopping:
    def __init__(self, patience=7, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

def train_with_validation(model, X_train, y_train, epochs=100, batch_size=32, validation_split=0.2):
    n_val = int(len(X_train) * validation_split)
    X_val = X_train[-n_val:]
    y_val = y_train[-n_val:]
    X_train = X_train[:-n_val]
    y_train = y_train[:-n_val]

    train_dataset = torch.utils.data.TensorDataset(
        torch.FloatTensor(X_train),
        torch.FloatTensor(y_train)
    )
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )

    optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.01)
    criterion = nn.MSELoss()
    early_stopping = EarlyStopping(patience=5)

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            l2_loss = model.get_l2_loss()
            total_loss = loss + l2_loss

            total_loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        with torch.no_grad():
            val_outputs = model(torch.FloatTensor(X_val))
            val_loss = criterion(val_outputs, torch.FloatTensor(y_val))

        early_stopping(val_loss)
        if early_stopping.early_stop:
            break

    return model

def prepare_sliding_windows(data, window_size=4):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
    return np.array(X), np.array(y)


def calculate_moving_stats(data, window=4):
    series = pd.Series(data)
    non_zero_series = series[series > 0]

    if len(non_zero_series) > 0:
        non_zero_mean = non_zero_series.mean()
        non_zero_std = non_zero_series.std()
    else:
        non_zero_mean = 0
        non_zero_std = 0

    trend = np.mean(np.diff(series)) if len(series) > 1 else 0
    zero_ratio = (series == 0).mean()

    return non_zero_mean, non_zero_std, trend, zero_ratio

def detect_zero_pattern(data):
    zeros = data == 0
    non_zero_indices = np.where(~zeros)[0]
    zero_indices = np.where(zeros)[0]

    patterns = {
        'zero_ratio': zeros.mean(),
        'consecutive_zeros': any(zeros[i] and zeros[i+1] for i in range(len(zeros)-1)),
        'ends_with_zero': zeros[-1],
        'zero_runs': [],
        'non_zero_runs': [],
        'last_non_zero': data[~zeros][-1] if np.any(~zeros) else 0
    }

    if len(non_zero_indices) > 0:
        patterns['non_zero_gaps'] = np.diff(non_zero_indices)
        patterns['avg_gap'] = np.mean(patterns['non_zero_gaps'])
        patterns['max_gap'] = np.max(patterns['non_zero_gaps'])
    else:
        patterns['avg_gap'] = len(data)
        patterns['max_gap'] = len(data)

    current_run = 1
    for i in range(1, len(data)):
        if data[i] == 0 and data[i-1] == 0:
            current_run += 1
        elif data[i] == 0:
            patterns['zero_runs'].append(current_run)
            current_run = 1
        elif data[i] != 0 and data[i-1] != 0:
            current_run += 1
        else:
            patterns['non_zero_runs'].append(current_run)
            current_run = 1

    patterns['avg_zero_run'] = np.mean(patterns['zero_runs']) if patterns['zero_runs'] else 1
    patterns['avg_non_zero_run'] = np.mean(patterns['non_zero_runs']) if patterns['non_zero_runs'] else 1

    return patterns

def calculate_safe_correlation(data1, data2):
    if len(data1) != len(data2):
        return 0

    std1 = np.std(data1)
    std2 = np.std(data2)

    if std1 == 0 or std2 == 0:
        if std1 == 0 and std2 == 0:
            return 1.0 if np.array_equal(data1, data2) else 0.0
        return 0.0

    try:
        with np.errstate(divide='ignore', invalid='ignore'):
            correlation = np.corrcoef(data1, data2)[0,1]
            return correlation if not np.isnan(correlation) else 0.0
    except:
        return 0.0

def calculate_stats(data):
    if len(data) == 0:
        return {
            'is_zero_heavy': False,
            'is_stable': False,
            'zero_pattern': {
                'zero_ratio': 0,
                'last_non_zero': 0,
                'avg_gap': 0
            },
            'min_non_zero': 0,
            'median_non_zero': 0,
            'mean_non_zero': 0,
            'recent_value': 0,
            'recent_trend': 0,
            'volatility': 0,
            'seasonal_pattern': 0
        }

    non_zero_data = data[data > 0]

    zero_pattern = detect_zero_pattern(data)

    recent_trend = 0
    if len(data) >= 3:
        diffs = np.diff(data[-3:])
        if len(diffs) > 0:
            recent_trend = np.mean(diffs)

    stats = {
        'zero_pattern': zero_pattern,
        'min_non_zero': float(non_zero_data.min()) if len(non_zero_data) > 0 else 0,
        'median_non_zero': float(np.median(non_zero_data)) if len(non_zero_data) > 0 else 0,
        'mean_non_zero': float(np.mean(non_zero_data)) if len(non_zero_data) > 0 else 0,
        'recent_value': float(data[-1]) if len(data) > 0 else 0,
        'recent_trend': float(recent_trend),
        'volatility': float(np.std(non_zero_data) / np.mean(non_zero_data)) if len(non_zero_data) > 0 and np.mean(non_zero_data) > 0 else 0
    }

    stats['is_zero_heavy'] = 0.3 <= zero_pattern['zero_ratio'] <= 0.7
    stats['is_stable'] = stats['volatility'] < 0.4

    if len(data) >= 4:
        stats['seasonal_pattern'] = calculate_safe_correlation(data[:-2], data[2:])
    else:
        stats['seasonal_pattern'] = 0

    return stats

def detect_zero_pattern(data):
    if len(data) == 0:
        return {
            'zero_ratio': 0,
            'consecutive_zeros': False,
            'ends_with_zero': False,
            'zero_runs': [],
            'non_zero_runs': [],
            'last_non_zero': 0,
            'avg_gap': 0,
            'max_gap': 0,
            'regular_spacing': float('inf'),
            'is_alternating': False,
            'last_non_zero_position': -1
        }

    zeros = data == 0
    non_zero_indices = np.where(~zeros)[0]

    patterns = {
        'zero_ratio': float(np.mean(zeros)) if len(zeros) > 0 else 0,
        'consecutive_zeros': any(zeros[i] and zeros[i+1] for i in range(len(zeros)-1)) if len(zeros) > 1 else False,
        'ends_with_zero': bool(zeros[-1]) if len(zeros) > 0 else False,
        'zero_runs': [],
        'non_zero_runs': [],
        'last_non_zero': float(data[~zeros][-1]) if np.any(~zeros) else 0,
        'last_non_zero_position': int(non_zero_indices[-1]) if len(non_zero_indices) > 0 else -1
    }

    if len(non_zero_indices) > 1:
        gaps = np.diff(non_zero_indices)
        patterns['avg_gap'] = float(np.mean(gaps))
        patterns['max_gap'] = float(np.max(gaps))
        patterns['regular_spacing'] = float(np.std(gaps) / patterns['avg_gap']) if patterns['avg_gap'] > 0 else float('inf')
    else:
        patterns['avg_gap'] = float(len(data)) if len(data) > 0 else 0
        patterns['max_gap'] = float(len(data)) if len(data) > 0 else 0
        patterns['regular_spacing'] = float('inf')

    current_run = 1
    for i in range(1, len(data)):
        if data[i] == 0 and data[i-1] == 0:
            current_run += 1
        elif data[i] == 0:
            if current_run > 1:
                patterns['non_zero_runs'].append(current_run)
            current_run = 1
        elif data[i] != 0 and data[i-1] != 0:
            current_run += 1
        else:
            if current_run > 1:
                patterns['zero_runs'].append(current_run)
            current_run = 1

    patterns['avg_zero_run'] = float(np.mean(patterns['zero_runs'])) if patterns['zero_runs'] else 1
    patterns['avg_non_zero_run'] = float(np.mean(patterns['non_zero_runs'])) if patterns['non_zero_runs'] else 1

    if len(data) >= 4:
        alternating_count = sum(1 for i in range(len(data)-1) if (data[i] == 0) != (data[i+1] == 0))
        patterns['is_alternating'] = (alternating_count / (len(data)-1)) > 0.7 if len(data) > 1 else False
    else:
        patterns['is_alternating'] = False

    return patterns

def calculate_historical_variation(data):
    non_zero_data = data[data > 0]
    if len(non_zero_data) < 2:
        return 0

    try:
        variations = np.diff(non_zero_data) / non_zero_data[:-1]
        variations = variations[~np.isnan(variations)]
        variations = variations[~np.isinf(variations)]

        if len(variations) == 0:
            return 0

        return float(np.std(variations))
    except:
        return 0

def should_predict_zero(stats, position, periods_since_last_non_zero):
    zero_pattern = stats['zero_pattern']

    if zero_pattern['is_alternating']:
        should_be_zero = (stats['recent_value'] > 0)
        base_score = 0.8 if should_be_zero else 0.2
    else:
        recent_weight = 0.6
        pattern_weight = 0.4

        recent_factor = 1.2 if stats['recent_value'] == 0 else 0.8
        pattern_factor = zero_pattern['zero_ratio']

        base_score = (recent_weight * recent_factor + pattern_weight * pattern_factor)

        if periods_since_last_non_zero >= zero_pattern['avg_gap']:
            base_score *= 0.7

        if zero_pattern['regular_spacing'] < 0.3:
            expected_position = (periods_since_last_non_zero + 1) % max(2, round(zero_pattern['avg_gap']))
            base_score *= 1.3 if expected_position == 0 else 0.7

    position_factor = 1 + 0.2 * np.sin(position * np.pi / 3)
    final_score = base_score * position_factor

    base_threshold = 0.45
    if zero_pattern['consecutive_zeros']:
        base_threshold *= 0.9
    if stats['is_stable']:
        base_threshold *= 1.1

    threshold = base_threshold + 0.05 * np.sin(position * np.pi / 2)

    return final_score > threshold

def predict_non_zero_value(stats, position, historical_data):
    non_zero_data = historical_data[historical_data > 0]

    if len(non_zero_data) == 0:
        return 1

    avg_value = np.mean(non_zero_data)
    is_small_values = avg_value < 30

    if is_small_values:
        recent_values = non_zero_data[-3:] if len(non_zero_data) >= 3 else non_zero_data
        base = np.median(recent_values)

        value_range = np.ptp(non_zero_data)
        if value_range == 0:
            value_range = base * 0.4

        variation = value_range * 0.25 * np.exp(-position * 0.3)
        position_effect = np.sin(position * np.pi / 3) * variation

        prediction = base + position_effect

        min_val = max(min(non_zero_data) * 0.8, 1)
        max_val = max(non_zero_data) * 1.2

    else:
        if stats['zero_pattern']['last_non_zero'] > 0:
            recent_non_zero = stats['zero_pattern']['last_non_zero']
        else:
            recent_non_zero = stats['median_non_zero']

        historical_variation = calculate_historical_variation(historical_data)

        if stats['is_stable']:
            base = stats['median_non_zero']
            variation_factor = historical_variation * 0.3
        else:
            recent_weight = 0.7
            typical_weight = 0.3
            base = (recent_non_zero * recent_weight + stats['mean_non_zero'] * typical_weight)
            variation_factor = historical_variation * 0.35

        position_effect = np.sin(position * np.pi / 4) * variation_factor
        trend_adjustment = stats['recent_trend'] * position * 0.02

        prediction = base * (1 + position_effect) + trend_adjustment

        min_val = max(stats['min_non_zero'] * 0.8, 1)
        max_val = stats['mean_non_zero'] * 1.3

    prediction = np.clip(prediction, min_val, max_val)

    if stats['zero_pattern']['regular_spacing'] > 0.5:
        if stats['recent_value'] > 0:
            blend_factor = 0.6
            prediction = (prediction * blend_factor + stats['recent_value'] * (1 - blend_factor))

    return round(prediction)

def make_predictions(historical_data, n_periods=6):
    if len(historical_data) == 0:
        return np.zeros(n_periods)

    try:
        stats = calculate_stats(historical_data)
    except:
        return np.zeros(n_periods)

    predictions = []
    periods_since_non_zero = 0

    for i in range(n_periods):
        try:
            if should_predict_zero(stats, i, periods_since_non_zero):
                pred = 0
                periods_since_non_zero += 1
            else:
                pred = predict_non_zero_value(stats, i, historical_data)
                periods_since_non_zero = 0
        except:
            pred = 0

        predictions.append(pred)

    return np.array(predictions)

def calculate_error_metrics(actuals, predictions):
    zero_mask = actuals == 0
    non_zero_mask = ~zero_mask

    zero_accuracy = np.mean(predictions[zero_mask] == 0) if np.any(zero_mask) else 1

    if np.any(non_zero_mask):
        non_zero_mad = np.mean(np.abs(predictions[non_zero_mask] - actuals[non_zero_mask]))
        non_zero_mean = np.mean(actuals[non_zero_mask])
        mad_ratio = non_zero_mad / non_zero_mean if non_zero_mean > 0 else 0
    else:
        non_zero_mad = 0
        mad_ratio = 0

    total_mad = np.mean(np.abs(predictions - actuals))

    rmse = np.sqrt(mean_squared_error(actuals, predictions))

    return total_mad, mad_ratio, rmse

def make_predictions_with_cv(historical_data, n_periods=6):
    predictions = []
    stats = calculate_stats(historical_data)

    X, y = prepare_sliding_windows(historical_data, window_size=2)

    if len(X) < 4:
        return make_predictions(historical_data, n_periods)

    try:
        tscv = TimeSeriesSplit(n_splits=2)
        cv_predictions = []

        for train_idx, val_idx in tscv.split(X):
            if len(train_idx) == 0 or len(val_idx) == 0:
                continue

            X_train, X_val = X[train_idx], X[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]

            if len(X_train) < 2 or len(X_val) < 1:
                continue

            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_val_scaled = scaler.transform(X_val)

            model = RegularizedPredictor(input_size=4)
            model = train_with_validation(
                model,
                X_train_scaled,
                y_train,
                epochs=100,
                batch_size=min(32, len(X_train))
            )

            with torch.no_grad():
                model.eval()
                val_pred = model(torch.FloatTensor(X_val_scaled))
                cv_predictions.append(val_pred.numpy())

        if cv_predictions:
            ensemble_predictions = np.mean(cv_predictions, axis=0)
        else:
            return make_predictions(historical_data, n_periods)

    except Exception as e:
        print(f"Cross-validation failed, falling back to simple prediction: {str(e)}")
        return make_predictions(historical_data, n_periods)

    for i in range(n_periods):
        if should_predict_zero(stats, i, len(predictions)):
            pred = 0
        else:
            if i < len(ensemble_predictions):
                pred = round(max(0, ensemble_predictions[i]))
            else:
                pred = predict_non_zero_value(stats, i, historical_data)
        predictions.append(pred)

    return np.array(predictions)

def process_skus_full_history(df):
    skus = df['sku'].unique()
    results = {}

    for sku in skus:
        try:
            sku_data = np.abs(df[df['sku'] == sku]['demanda'].values)

            if len(sku_data) < 4:
                print(f"Warning: SKU {sku} has insufficient data points ({len(sku_data)})")
                continue

            full_history = sku_data.copy()

            all_predictions = []
            for i in range(4, len(sku_data)):
                window_data = sku_data[i-4:i]
                if len(window_data) == 4:
                    try:
                        pred = make_predictions_with_cv(window_data, n_periods=1)[0]
                        all_predictions.append(pred)
                    except Exception as e:
                        print(f"Warning: Prediction failed for window in SKU {sku}: {str(e)}")
                        all_predictions.append(np.nan)

            actual_for_comparison = sku_data[4:]

            if len(all_predictions) > 0 and len(actual_for_comparison) > 0:
                valid_mask = ~np.isnan(all_predictions)
                valid_predictions = np.array(all_predictions)[valid_mask]
                valid_actuals = actual_for_comparison[valid_mask]

                if len(valid_predictions) > 0:
                    mad = np.mean(np.abs(valid_actuals - valid_predictions))
                    non_zero_mask = valid_actuals != 0
                    if np.any(non_zero_mask):
                        mad_ratio = mad / np.mean(valid_actuals[non_zero_mask])
                    else:
                        mad_ratio = 0
                    rmse = np.sqrt(mean_squared_error(valid_actuals, valid_predictions))
                else:
                    mad = mad_ratio = rmse = 0
            else:
                mad = mad_ratio = rmse = 0

            retention_data = sku_data[-4:]
            if len(retention_data) == 4:
                try:
                    predictions = make_predictions_with_cv(retention_data)
                    retention_predictions = predictions[:4]
                    additional_predictions = predictions[4:]
                except Exception as e:
                    print(f"Warning: CV prediction failed for SKU {sku}, using simple prediction: {str(e)}")
                    all_preds = make_predictions(retention_data, n_periods=6)
                    retention_predictions = all_preds[:4]
                    additional_predictions = all_preds[4:]
            else:
                retention_predictions = make_predictions(retention_data, n_periods=4)
                additional_predictions = make_predictions(retention_data, n_periods=2)[4:]

            if len(retention_data) > 0 and len(retention_predictions) > 0:
                retention_mad, retention_mad_ratio, retention_rmse = calculate_error_metrics(
                    retention_data, retention_predictions
                )
            else:
                retention_mad = retention_mad_ratio = retention_rmse = 0

            try:
                stats = calculate_stats(retention_data)
            except Exception as e:
                print(f"Warning: Stats calculation failed for SKU {sku}: {str(e)}")
                stats = {
                    'is_zero_heavy': False,
                    'is_stable': False,
                    'zero_pattern': {'zero_ratio': 0}
                }

            results[sku] = {
                'Full_History': full_history,
                'Full_Predictions': all_predictions,
                'Full_MAD': mad,
                'Full_MAD/Mean': mad_ratio,
                'Full_RMSE': rmse,

                'MAD': retention_mad,
                'MAD/Mean': retention_mad_ratio,
                'RMSE': retention_rmse,
                'Retention_Data': retention_data,
                'Retention_Predictions': retention_predictions,
                'Additional_Predictions': additional_predictions,
                'Is_Zero_Heavy': stats['is_zero_heavy'],
                'Is_Stable': stats['is_stable'],
                'Zero_Ratio': stats['zero_pattern']['zero_ratio'],
                'Data_Points': len(retention_data)
            }

        except Exception as e:
            print(f"Warning: Error processing SKU {sku}: {str(e)}")
            continue

    return results
def plot_predictions(results, output_dir='plots'):
    import os
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    plt.style.use('ggplot')

    for sku, metrics in results.items():
        plt.figure(figsize=(12, 6))

        periods = [f'Period {i+1}' for i in range(len(metrics['Retention_Data']))]

        plt.plot(periods, metrics['Retention_Data'], 'b-o', label='Actual Data', linewidth=2)
        plt.plot(periods, metrics['Retention_Predictions'], 'r--o', label='Predictions', linewidth=2)

        plt.title(f'Retention Data vs Predictions - SKU: {sku}')
        plt.xlabel('Period')
        plt.ylabel('Demand')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.xticks(rotation=45)

        for i, (actual, pred) in enumerate(zip(metrics['Retention_Data'], metrics['Retention_Predictions'])):
            plt.annotate(f'{actual:.0f}', (i, actual), textcoords="offset points", xytext=(0,10), ha='center')
            plt.annotate(f'{pred:.0f}', (i, pred), textcoords="offset points", xytext=(0,-15), ha='center')

        plt.tight_layout()
        plt.savefig(f'{output_dir}/retention_comparison_{sku}.png', dpi=300, bbox_inches='tight')
        plt.close()

        plt.figure(figsize=(12, 6))

        all_periods = [f'Period {i+1}' for i in range(len(metrics['Retention_Data']) + len(metrics['Additional_Predictions']))]

        plt.plot(all_periods[:len(metrics['Retention_Data'])],
                metrics['Retention_Data'],
                'b-o',
                label='Historical Data',
                linewidth=2)

        all_predictions = np.concatenate([metrics['Retention_Predictions'], metrics['Additional_Predictions']])
        plt.plot(all_periods,
                all_predictions,
                'r--o',
                label='Predictions',
                linewidth=2)

        plt.title(f'Full Demand Predictions - SKU: {sku}')
        plt.xlabel('Period')
        plt.ylabel('Demand')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.xticks(rotation=45)

        for i, pred in enumerate(all_predictions):
            if i < len(metrics['Retention_Data']):
                actual = metrics['Retention_Data'][i]
                plt.annotate(f'{actual:.0f}', (i, actual), textcoords="offset points", xytext=(0,10), ha='center')
            plt.annotate(f'{pred:.0f}', (i, pred), textcoords="offset points", xytext=(0,-15), ha='center')

        plt.tight_layout()
        plt.savefig(f'{output_dir}/full_predictions_{sku}.png', dpi=300, bbox_inches='tight')
        plt.close()
def plot_full_history_predictions(results, output_dir='plots'):
    import os
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    plt.style.use('ggplot')

    for sku, metrics in results.items():
        if len(metrics['Full_Predictions']) > 0:
            try:
                plt.figure(figsize=(15, 7))

                full_history = metrics['Full_History']
                predictions = np.array(metrics['Full_Predictions'])

                all_periods = [f'Period {i+1}' for i in range(len(full_history))]
                prediction_periods = all_periods[4:]

                plt.plot(all_periods, full_history, 'b-o', label='Actual Data', linewidth=2)

                valid_mask = ~np.isnan(predictions)
                if np.any(valid_mask):
                    valid_predictions = predictions[valid_mask]
                    valid_periods = [prediction_periods[i] for i in range(len(prediction_periods)) if valid_mask[i]]
                    plt.plot(valid_periods, valid_predictions, 'r--o', label='Predictions', linewidth=2)

                plt.title(f'Full History Demand vs Predictions - SKU: {sku}\nMAD/Mean: {metrics["Full_MAD/Mean"]:.2f}, RMSE: {metrics["Full_RMSE"]:.2f}')
                plt.xlabel('Period')
                plt.ylabel('Demand')
                plt.legend()
                plt.grid(True, alpha=0.3)
                plt.xticks(rotation=45)

                for i, val in enumerate(full_history):
                    if not np.isnan(val):
                        plt.annotate(f'{val:.0f}', (i, val), textcoords="offset points", xytext=(0,10), ha='center')

                for i, pred in enumerate(predictions):
                    if not np.isnan(pred):
                        plt.annotate(f'{pred:.0f}', (i+4, pred), textcoords="offset points", xytext=(0,-15), ha='center', color='red')

                plt.tight_layout()
                plt.savefig(f'{output_dir}/full_history_comparison_{sku}.png', dpi=300, bbox_inches='tight')
                plt.close()
            except Exception as e:
                print(f"Warning: Error plotting SKU {sku}: {str(e)}")

def main(file_path):
    try:
        df = pd.read_excel(file_path, usecols=['periodo', 'sku', 'demanda'])
        df['demanda'] = df['demanda'].abs()

        results = process_skus_full_history(df)

        plot_predictions(results)
        plot_full_history_predictions(results)

        print("\nResultados:")
        for sku, metrics in results.items():
            try:
                print(f"\nSKU: {sku}")

                print("\nMétricas de Retención (últimos 4 períodos):")
                print(f"MAD/Media: {metrics['MAD/Mean']:.2f}")
                print(f"RMSE: {metrics['RMSE']:.2f}")

                print("\nPredicciones de Retención:")
                for i, (actual, pred) in enumerate(zip(metrics['Retention_Data'], metrics['Retention_Predictions'])):
                    print(f"Periodo {i+1}: Actual: {actual:.0f}, Predicción: {pred:.0f}")

                print("\nPredicciones Adicionales:")
                for i, pred in enumerate(metrics['Additional_Predictions'], start=5):
                    print(f"Periodo {i}: Predicción: {pred:.0f}")

                print("\nComparación de Historia Completa (36 períodos):")
                print("Periodo  |  Actual  |  Predicción")
                print("-" * 40)

                full_history = metrics['Full_History']
                full_predictions = metrics['Full_Predictions']

                all_actuals = []
                all_predictions = []

                for i in range(len(full_history)):
                    actual = full_history[i]
                    if i < 4:
                        available_data = full_history[:i+1]
                        pred = np.mean(available_data) if len(available_data) > 0 else actual
                    else:
                        pred = full_predictions[i-4] if i-4 < len(full_predictions) else actual

                    print(f"  {i+1:02d}    |   {actual:5.0f}  |    {pred:5.0f}")

                    all_actuals.append(actual)
                    all_predictions.append(pred)

                all_actuals = np.array(all_actuals)
                all_predictions = np.array(all_predictions)

                mad = np.mean(np.abs(all_actuals - all_predictions))
                mean_actual = np.mean(all_actuals)
                mad_mean_ratio = mad / mean_actual if mean_actual > 0 else 0

                rmse = np.sqrt(np.mean((all_actuals - all_predictions) ** 2))

                print("-" * 50)
                print(f"MAD/Media: {mad_mean_ratio:.2f}")
                print(f"RMSE: {rmse:.2f}")

                print(f"MAD/Media: {metrics['Full_MAD/Mean']:.2f}")
                print(f"RMSE: {metrics['Full_RMSE']:.2f}")
                print("-" * 50)
            except Exception as e:
                print(f"Warning: Error printing results for SKU {sku}: {str(e)}")

        print("\nGráficas se encuentran en 'plots'.")

    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    file_path = 'https://github.com/Dabut6412/Forecast-IA/raw/refs/heads/main/FNN/demanda.xlsx'
    main(file_path)


Resultados:

SKU: JSAB5

Métricas de Retención (últimos 4 períodos):
MAD/Media: 0.04
RMSE: 37.41

Predicciones de Retención:
Periodo 1: Actual: 717, Predicción: 734
Periodo 2: Actual: 816, Predicción: 749
Periodo 3: Actual: 727, Predicción: 755
Periodo 4: Actual: 742, Predicción: 748

Predicciones Adicionales:
Periodo 5: Predicción: 732
Periodo 6: Predicción: 715

Comparación de Historia Completa (36 períodos):
Periodo  |  Actual  |  Predicción
----------------------------------------
  01    |     517  |      517
  02    |     536  |      526
  03    |     575  |      543
  04    |     568  |      549
  05    |     614  |      552
  06    |     453  |      572
  07    |     572  |      572
  08    |     602  |      570
  09    |     669  |      587
  10    |     564  |      587
  11    |     623  |      587
  12    |     630  |      612
  13    |     613  |      626
  14    |     566  |      618
  15    |     599  |      618
  16    |     712  |      606
  17    |     667  |      606

In [None]:
import sys
print(sys.version)

3.10.12 (main, Sep 11 2024, 15:47:36) [GCC 11.4.0]


In [None]:
import pkg_resources

for package in ['pandas', 'numpy', 'torch', 'sklearn', 'matplotlib']:
  try:
    version = pkg_resources.get_distribution(package).version
    print(f'{package}=={version}')
  except pkg_resources.DistributionNotFound:
    print(f'{package}: No instalado')

pandas==2.2.2
numpy==1.26.4
torch==2.5.1+cu121
sklearn: No instalado
matplotlib==3.8.0
