# üìÖ Time Series Analysis: patient_demographics

**Generated:** 2025-12-06 14:58:18  
**Type:** Time Series Modeling  
**Dataset:** patient_demographics

## üéØ Objective
This notebook provides a comprehensive time series analysis workflow including decomposition, stationarity testing, forecasting, and model evaluation.

## üìã Workflow Steps
1. **Data Loading & Date Parsing**
2. **Time Series Visualization**
3. **Trend & Seasonality Decomposition**
4. **Stationarity Testing**
5. **Feature Engineering**
6. **Forecasting Models**
7. **Model Evaluation**
8. **Future Predictions**

In [None]:
# Import essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Time series specific imports
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler

# Set plotting style
plt.style.use('default')
plt.rcParams['figure.figsize'] = (14, 6)
%matplotlib inline

print("‚úÖ All libraries imported successfully!")
print("\nüìö Available Time Series Methods:")
print("‚Ä¢ Seasonal Decomposition")
print("‚Ä¢ Stationarity Tests (ADF, KPSS)")
print("‚Ä¢ Exponential Smoothing")
print("‚Ä¢ Moving Averages")
print("‚Ä¢ Prophet (if installed)")

## 1. üìÅ Data Loading & Date Parsing

In [None]:
# Load your dataset - REPLACE 'your_file.csv' with your actual file path
df = pd.read_csv('your_file.csv')

print("=== DATASET OVERVIEW ===")
print(f"Dataset shape: {df.shape}")
print(f"\nColumn names: {list(df.columns)}")
print(f"\nData types:\n{df.dtypes}")

display(df.head(10))

# IMPORTANT: Define your date and value columns
# REPLACE these with your actual column names
date_column = 'date'      # ‚ö†Ô∏è UPDATE: Column containing dates/timestamps
value_column = 'value'    # ‚ö†Ô∏è UPDATE: Column containing values to forecast

print(f"\n‚ö†Ô∏è  Please update the following variables:")
print(f"   date_column = '{date_column}'")
print(f"   value_column = '{value_column}'")

In [None]:
# Parse dates and set as index
if date_column in df.columns and value_column in df.columns:
    
    # Convert to datetime
    df[date_column] = pd.to_datetime(df[date_column])
    
    # Sort by date
    df = df.sort_values(date_column)
    
    # Set date as index
    df_ts = df.set_index(date_column)[[value_column]].copy()
    
    print("‚úÖ Date parsing successful!")
    print(f"\n=== TIME SERIES INFO ===")
    print(f"Start Date: {df_ts.index.min()}")
    print(f"End Date: {df_ts.index.max()}")
    print(f"Duration: {df_ts.index.max() - df_ts.index.min()}")
    print(f"Total observations: {len(df_ts)}")
    
    # Detect frequency
    if len(df_ts) > 1:
        time_diffs = pd.Series(df_ts.index).diff().dropna()
        most_common_diff = time_diffs.mode()[0]
        print(f"Detected frequency: {most_common_diff}")
    
    # Check for missing dates
    date_range = pd.date_range(start=df_ts.index.min(), end=df_ts.index.max(), freq='D')
    missing_dates = date_range.difference(df_ts.index)
    if len(missing_dates) > 0:
        print(f"\n‚ö†Ô∏è  Missing dates detected: {len(missing_dates)}")
    else:
        print(f"\n‚úÖ No missing dates in the series")
    
    display(df_ts.head())
    display(df_ts.describe())
    
else:
    print(f"‚ùå Columns not found!")
    print(f"Available columns: {list(df.columns)}")
    print("\nPlease update date_column and value_column variables.")

## 2. üìä Time Series Visualization

In [None]:
# Visualize the time series
if 'df_ts' in locals():
    
    fig, axes = plt.subplots(3, 2, figsize=(16, 12))
    
    # Original time series
    axes[0, 0].plot(df_ts.index, df_ts[value_column], linewidth=0.8)
    axes[0, 0].set_title(f'Time Series: {value_column}')
    axes[0, 0].set_xlabel('Date')
    axes[0, 0].set_ylabel(value_column)
    
    # Distribution
    axes[0, 1].hist(df_ts[value_column], bins=50, edgecolor='black', alpha=0.7)
    axes[0, 1].axvline(df_ts[value_column].mean(), color='red', linestyle='--', 
                       label=f'Mean: {df_ts[value_column].mean():.2f}')
    axes[0, 1].set_title(f'Distribution of {value_column}')
    axes[0, 1].legend()
    
    # Rolling statistics
    window = min(30, len(df_ts) // 10)  # Adaptive window size
    rolling_mean = df_ts[value_column].rolling(window=window).mean()
    rolling_std = df_ts[value_column].rolling(window=window).std()
    
    axes[1, 0].plot(df_ts.index, df_ts[value_column], label='Original', alpha=0.5)
    axes[1, 0].plot(df_ts.index, rolling_mean, label=f'{window}-period Moving Avg', color='red')
    axes[1, 0].set_title('Time Series with Rolling Mean')
    axes[1, 0].legend()
    
    axes[1, 1].plot(df_ts.index, rolling_std, color='orange')
    axes[1, 1].set_title(f'{window}-period Rolling Standard Deviation')
    
    # Box plot by period (if enough data)
    if len(df_ts) > 365:
        df_ts['year'] = df_ts.index.year
        df_ts.boxplot(column=value_column, by='year', ax=axes[2, 0])
        axes[2, 0].set_title('Distribution by Year')
        df_ts.drop('year', axis=1, inplace=True)
    else:
        df_ts['month'] = df_ts.index.month
        df_ts.boxplot(column=value_column, by='month', ax=axes[2, 0])
        axes[2, 0].set_title('Distribution by Month')
        df_ts.drop('month', axis=1, inplace=True)
    
    # Lag plot
    pd.plotting.lag_plot(df_ts[value_column], lag=1, ax=axes[2, 1])
    axes[2, 1].set_title('Lag Plot (lag=1)')
    
    plt.tight_layout()
    plt.show()
    
    print("\nüìä TIME SERIES STATISTICS:")
    print(f"Mean: {df_ts[value_column].mean():.4f}")
    print(f"Median: {df_ts[value_column].median():.4f}")
    print(f"Std Dev: {df_ts[value_column].std():.4f}")
    print(f"Min: {df_ts[value_column].min():.4f}")
    print(f"Max: {df_ts[value_column].max():.4f}")
    print(f"Skewness: {df_ts[value_column].skew():.4f}")
    
else:
    print("‚ùå Please complete data loading first!")

## 3. üìà Trend & Seasonality Decomposition

In [None]:
# Decompose time series into components
if 'df_ts' in locals():
    
    # Determine period for decomposition
    # Adjust based on your data frequency
    period = 12  # Monthly seasonality (change to 7 for weekly, 365 for yearly, etc.)
    
    print(f"=== SEASONAL DECOMPOSITION (period={period}) ===")
    print("Adjust 'period' variable based on your data:")
    print("‚Ä¢ Daily data with weekly pattern: period=7")
    print("‚Ä¢ Monthly data with yearly pattern: period=12")
    print("‚Ä¢ Hourly data with daily pattern: period=24")
    
    if len(df_ts) >= 2 * period:
        try:
            # Perform decomposition
            decomposition = seasonal_decompose(df_ts[value_column], model='additive', period=period)
            
            # Plot components
            fig, axes = plt.subplots(4, 1, figsize=(14, 12))
            
            decomposition.observed.plot(ax=axes[0], title='Original Series')
            decomposition.trend.plot(ax=axes[1], title='Trend Component')
            decomposition.seasonal.plot(ax=axes[2], title='Seasonal Component')
            decomposition.resid.plot(ax=axes[3], title='Residual Component')
            
            plt.tight_layout()
            plt.show()
            
            # Analyze components
            trend = decomposition.trend.dropna()
            seasonal = decomposition.seasonal.dropna()
            residual = decomposition.resid.dropna()
            
            print("\nüìä COMPONENT ANALYSIS:")
            print(f"Trend range: {trend.min():.4f} to {trend.max():.4f}")
            print(f"Seasonal amplitude: {seasonal.max() - seasonal.min():.4f}")
            print(f"Residual std: {residual.std():.4f}")
            
            # Seasonal strength
            var_resid = residual.var()
            var_seasonal = seasonal.var()
            seasonal_strength = 1 - (var_resid / (var_resid + var_seasonal))
            print(f"\nSeasonal strength: {seasonal_strength:.4f}")
            
            if seasonal_strength > 0.7:
                print("‚úÖ Strong seasonality detected")
            elif seasonal_strength > 0.3:
                print("üìä Moderate seasonality detected")
            else:
                print("üìâ Weak or no seasonality")
                
        except Exception as e:
            print(f"‚ùå Decomposition failed: {str(e)}")
            print("Try adjusting the 'period' parameter")
    else:
        print(f"‚ö†Ô∏è  Need at least {2*period} observations for decomposition")
        print(f"Current observations: {len(df_ts)}")
        
else:
    print("‚ùå Please complete data loading first!")

## 4. üîç Stationarity Testing

In [None]:
# Test for stationarity
if 'df_ts' in locals():
    
    print("=== STATIONARITY TESTS ===")
    print("A stationary series has constant mean and variance over time.")
    print("Most forecasting models require stationary data.\n")
    
    series = df_ts[value_column].dropna()
    
    # Augmented Dickey-Fuller Test
    print("üìä AUGMENTED DICKEY-FULLER (ADF) TEST:")
    adf_result = adfuller(series, autolag='AIC')
    
    print(f"   Test Statistic: {adf_result[0]:.4f}")
    print(f"   p-value: {adf_result[1]:.4f}")
    print(f"   Critical Values:")
    for key, value in adf_result[4].items():
        print(f"      {key}: {value:.4f}")
    
    if adf_result[1] < 0.05:
        print("   ‚úÖ Result: Series is STATIONARY (reject null hypothesis)")
        adf_stationary = True
    else:
        print("   ‚ö†Ô∏è  Result: Series is NON-STATIONARY (fail to reject null)")
        adf_stationary = False
    
    # KPSS Test
    print("\nüìä KPSS TEST:")
    try:
        kpss_result = kpss(series, regression='c', nlags='auto')
        
        print(f"   Test Statistic: {kpss_result[0]:.4f}")
        print(f"   p-value: {kpss_result[1]:.4f}")
        print(f"   Critical Values:")
        for key, value in kpss_result[3].items():
            print(f"      {key}: {value:.4f}")
        
        if kpss_result[1] >= 0.05:
            print("   ‚úÖ Result: Series is STATIONARY (fail to reject null)")
            kpss_stationary = True
        else:
            print("   ‚ö†Ô∏è  Result: Series is NON-STATIONARY (reject null)")
            kpss_stationary = False
    except:
        kpss_stationary = None
        print("   ‚ö†Ô∏è  KPSS test failed")
    
    # Summary
    print("\n=== STATIONARITY SUMMARY ===")
    if adf_stationary:
        print("‚úÖ Series appears to be stationary")
        print("Ready for modeling without differencing")
    else:
        print("‚ö†Ô∏è  Series is non-stationary")
        print("Recommendation: Apply differencing or detrending")
        
        # Show differenced series
        print("\nüìä FIRST DIFFERENCE:")
        diff_series = series.diff().dropna()
        adf_diff = adfuller(diff_series, autolag='AIC')
        print(f"   ADF p-value after differencing: {adf_diff[1]:.4f}")
        
        if adf_diff[1] < 0.05:
            print("   ‚úÖ First difference is stationary")
        else:
            print("   ‚ö†Ô∏è  May need second differencing")
            
else:
    print("‚ùå Please complete data loading first!")

## 5. üìä Autocorrelation Analysis

In [None]:
# Analyze autocorrelation
if 'df_ts' in locals():
    
    series = df_ts[value_column].dropna()
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # ACF of original series
    plot_acf(series, lags=40, ax=axes[0, 0], title='ACF - Original Series')
    
    # PACF of original series
    plot_pacf(series, lags=40, ax=axes[0, 1], title='PACF - Original Series')
    
    # ACF of differenced series
    diff_series = series.diff().dropna()
    plot_acf(diff_series, lags=40, ax=axes[1, 0], title='ACF - First Difference')
    
    # PACF of differenced series
    plot_pacf(diff_series, lags=40, ax=axes[1, 1], title='PACF - First Difference')
    
    plt.tight_layout()
    plt.show()
    
    print("üìä ACF/PACF INTERPRETATION GUIDE:")
    print("‚Ä¢ ACF shows correlation at different lags")
    print("‚Ä¢ PACF shows direct correlation (removing intermediate effects)")
    print("‚Ä¢ Significant spikes suggest important lags for modeling")
    print("‚Ä¢ Gradual decay in ACF suggests AR process")
    print("‚Ä¢ Sharp cutoff in ACF suggests MA process")
    
else:
    print("‚ùå Please complete data loading first!")

## 6. üöÇ Train-Test Split

In [None]:
# Split data for training and testing
if 'df_ts' in locals():
    
    # Use last 20% for testing
    test_size = 0.2
    split_idx = int(len(df_ts) * (1 - test_size))
    
    train = df_ts.iloc[:split_idx].copy()
    test = df_ts.iloc[split_idx:].copy()
    
    print("=== TRAIN-TEST SPLIT ===")
    print(f"Training period: {train.index.min()} to {train.index.max()}")
    print(f"Testing period: {test.index.min()} to {test.index.max()}")
    print(f"Training samples: {len(train)}")
    print(f"Testing samples: {len(test)}")
    
    # Visualize split
    plt.figure(figsize=(14, 6))
    plt.plot(train.index, train[value_column], label='Training', color='blue')
    plt.plot(test.index, test[value_column], label='Testing', color='orange')
    plt.axvline(x=train.index.max(), color='red', linestyle='--', label='Train/Test Split')
    plt.title('Train-Test Split')
    plt.xlabel('Date')
    plt.ylabel(value_column)
    plt.legend()
    plt.show()
    
else:
    print("‚ùå Please complete data loading first!")

## 7. ü§ñ Forecasting Models

In [None]:
# Train multiple forecasting models
if 'train' in locals() and 'test' in locals():
    
    forecast_results = {}
    
    print("=== TRAINING FORECASTING MODELS ===\n")
    
    # 1. Simple Moving Average
    print("üìä 1. Simple Moving Average...")
    window = min(12, len(train) // 4)
    sma_forecast = train[value_column].rolling(window=window).mean().iloc[-1]
    sma_predictions = pd.Series([sma_forecast] * len(test), index=test.index)
    
    sma_rmse = np.sqrt(mean_squared_error(test[value_column], sma_predictions))
    sma_mae = mean_absolute_error(test[value_column], sma_predictions)
    
    forecast_results['Simple Moving Average'] = {
        'predictions': sma_predictions,
        'rmse': sma_rmse,
        'mae': sma_mae
    }
    print(f"   RMSE: {sma_rmse:.4f}, MAE: {sma_mae:.4f}")
    
    # 2. Exponential Smoothing (Simple)
    print("\nüìä 2. Simple Exponential Smoothing...")
    try:
        ses_model = ExponentialSmoothing(train[value_column], trend=None, seasonal=None)
        ses_fit = ses_model.fit()
        ses_predictions = ses_fit.forecast(len(test))
        ses_predictions.index = test.index
        
        ses_rmse = np.sqrt(mean_squared_error(test[value_column], ses_predictions))
        ses_mae = mean_absolute_error(test[value_column], ses_predictions)
        
        forecast_results['Simple Exp Smoothing'] = {
            'predictions': ses_predictions,
            'rmse': ses_rmse,
            'mae': ses_mae
        }
        print(f"   RMSE: {ses_rmse:.4f}, MAE: {ses_mae:.4f}")
    except Exception as e:
        print(f"   ‚ö†Ô∏è  Failed: {str(e)}")
    
    # 3. Holt's Linear Trend
    print("\nüìä 3. Holt's Linear Trend...")
    try:
        holt_model = ExponentialSmoothing(train[value_column], trend='add', seasonal=None)
        holt_fit = holt_model.fit()
        holt_predictions = holt_fit.forecast(len(test))
        holt_predictions.index = test.index
        
        holt_rmse = np.sqrt(mean_squared_error(test[value_column], holt_predictions))
        holt_mae = mean_absolute_error(test[value_column], holt_predictions)
        
        forecast_results["Holt's Linear"] = {
            'predictions': holt_predictions,
            'rmse': holt_rmse,
            'mae': holt_mae
        }
        print(f"   RMSE: {holt_rmse:.4f}, MAE: {holt_mae:.4f}")
    except Exception as e:
        print(f"   ‚ö†Ô∏è  Failed: {str(e)}")
    
    # 4. Holt-Winters (if enough data for seasonality)
    print("\nüìä 4. Holt-Winters Exponential Smoothing...")
    seasonal_period = 12  # Adjust based on your data
    
    if len(train) >= 2 * seasonal_period:
        try:
            hw_model = ExponentialSmoothing(
                train[value_column], 
                trend='add', 
                seasonal='add', 
                seasonal_periods=seasonal_period
            )
            hw_fit = hw_model.fit()
            hw_predictions = hw_fit.forecast(len(test))
            hw_predictions.index = test.index
            
            hw_rmse = np.sqrt(mean_squared_error(test[value_column], hw_predictions))
            hw_mae = mean_absolute_error(test[value_column], hw_predictions)
            
            forecast_results['Holt-Winters'] = {
                'predictions': hw_predictions,
                'rmse': hw_rmse,
                'mae': hw_mae
            }
            print(f"   RMSE: {hw_rmse:.4f}, MAE: {hw_mae:.4f}")
        except Exception as e:
            print(f"   ‚ö†Ô∏è  Failed: {str(e)}")
    else:
        print(f"   ‚ö†Ô∏è  Not enough data for seasonal model (need {2*seasonal_period} points)")
    
    # 5. Naive Forecast (baseline)
    print("\nüìä 5. Naive Forecast (baseline)...")
    naive_predictions = pd.Series([train[value_column].iloc[-1]] * len(test), index=test.index)
    
    naive_rmse = np.sqrt(mean_squared_error(test[value_column], naive_predictions))
    naive_mae = mean_absolute_error(test[value_column], naive_predictions)
    
    forecast_results['Naive (Baseline)'] = {
        'predictions': naive_predictions,
        'rmse': naive_rmse,
        'mae': naive_mae
    }
    print(f"   RMSE: {naive_rmse:.4f}, MAE: {naive_mae:.4f}")
    
    print("\n‚úÖ Model training completed!")
    
else:
    print("‚ùå Please complete train-test split first!")

## 8. üìà Model Evaluation & Comparison

In [None]:
# Compare model performance
if 'forecast_results' in locals() and len(forecast_results) > 0:
    
    print("=== MODEL COMPARISON ===\n")
    
    # Create comparison dataframe
    comparison_df = pd.DataFrame({
        'Model': list(forecast_results.keys()),
        'RMSE': [results['rmse'] for results in forecast_results.values()],
        'MAE': [results['mae'] for results in forecast_results.values()]
    })
    
    comparison_df = comparison_df.sort_values('RMSE')
    display(comparison_df)
    
    # Best model
    best_model_name = comparison_df.iloc[0]['Model']
    best_predictions = forecast_results[best_model_name]['predictions']
    
    print(f"\nüèÜ BEST MODEL: {best_model_name}")
    print(f"   RMSE: {forecast_results[best_model_name]['rmse']:.4f}")
    print(f"   MAE: {forecast_results[best_model_name]['mae']:.4f}")
    
    # Visualization
    plt.figure(figsize=(16, 10))
    
    # Plot 1: All forecasts comparison
    plt.subplot(2, 1, 1)
    plt.plot(train.index, train[value_column], label='Training', color='blue', alpha=0.7)
    plt.plot(test.index, test[value_column], label='Actual', color='black', linewidth=2)
    
    colors = plt.cm.Set2(np.linspace(0, 1, len(forecast_results)))
    for (name, results), color in zip(forecast_results.items(), colors):
        plt.plot(test.index, results['predictions'], label=f'{name}', linestyle='--', color=color)
    
    plt.axvline(x=train.index.max(), color='red', linestyle=':', alpha=0.5)
    plt.title('Forecast Comparison')
    plt.xlabel('Date')
    plt.ylabel(value_column)
    plt.legend(loc='best')
    
    # Plot 2: Best model detailed view
    plt.subplot(2, 1, 2)
    plt.plot(test.index, test[value_column], label='Actual', color='black', linewidth=2)
    plt.plot(test.index, best_predictions, label=f'{best_model_name} Forecast', 
             color='red', linestyle='--', linewidth=2)
    
    # Error bands
    residuals = test[value_column] - best_predictions
    std_resid = residuals.std()
    plt.fill_between(test.index, 
                     best_predictions - 1.96*std_resid, 
                     best_predictions + 1.96*std_resid,
                     alpha=0.2, color='red', label='95% Confidence Interval')
    
    plt.title(f'Best Model: {best_model_name}')
    plt.xlabel('Date')
    plt.ylabel(value_column)
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Residual analysis
    print("\nüìä RESIDUAL ANALYSIS:")
    print(f"Mean Residual: {residuals.mean():.4f} (should be ~0)")
    print(f"Std Residual: {residuals.std():.4f}")
    
    # Calculate MAPE
    mape = np.mean(np.abs(residuals / test[value_column])) * 100
    print(f"MAPE: {mape:.2f}%")
    
    if mape < 10:
        print("\n‚úÖ Excellent forecast accuracy (MAPE < 10%)")
    elif mape < 20:
        print("\nüìä Good forecast accuracy (MAPE < 20%)")
    elif mape < 30:
        print("\n‚ö†Ô∏è  Moderate forecast accuracy (MAPE < 30%)")
    else:
        print("\n‚ùå Poor forecast accuracy (MAPE >= 30%)")
        
else:
    print("‚ùå Please complete model training first!")

## 9. üîÆ Future Predictions

In [None]:
# Generate future predictions
if 'best_model_name' in locals() and 'df_ts' in locals():
    
    # Number of periods to forecast
    forecast_periods = 30  # ‚ö†Ô∏è Adjust based on your needs
    
    print(f"=== GENERATING {forecast_periods}-PERIOD FORECAST ===\n")
    
    # Retrain best model on full data
    full_series = df_ts[value_column]
    
    try:
        if best_model_name == 'Holt-Winters':
            final_model = ExponentialSmoothing(
                full_series, 
                trend='add', 
                seasonal='add', 
                seasonal_periods=12
            )
        elif best_model_name == "Holt's Linear":
            final_model = ExponentialSmoothing(full_series, trend='add', seasonal=None)
        else:
            final_model = ExponentialSmoothing(full_series, trend=None, seasonal=None)
        
        final_fit = final_model.fit()
        future_forecast = final_fit.forecast(forecast_periods)
        
        # Create future dates
        last_date = df_ts.index.max()
        future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=forecast_periods)
        future_forecast.index = future_dates
        
        # Plot
        plt.figure(figsize=(14, 6))
        
        # Historical data (last 100 points)
        recent_data = df_ts.tail(min(100, len(df_ts)))
        plt.plot(recent_data.index, recent_data[value_column], label='Historical', color='blue')
        
        # Forecast
        plt.plot(future_forecast.index, future_forecast.values, label='Forecast', 
                color='red', linestyle='--', linewidth=2)
        
        # Confidence intervals
        std = full_series.std()
        plt.fill_between(future_forecast.index,
                        future_forecast.values - 1.96*std,
                        future_forecast.values + 1.96*std,
                        alpha=0.2, color='red', label='95% CI')
        
        plt.axvline(x=last_date, color='green', linestyle=':', label='Forecast Start')
        plt.title(f'{forecast_periods}-Period Forecast using {best_model_name}')
        plt.xlabel('Date')
        plt.ylabel(value_column)
        plt.legend()
        plt.show()
        
        # Display forecast values
        print("üìä FORECAST VALUES:")
        forecast_df = pd.DataFrame({
            'Date': future_forecast.index,
            'Forecast': future_forecast.values
        })
        display(forecast_df.head(15))
        
        print(f"\nüìà Forecast Summary:")
        print(f"   Start: {future_forecast.iloc[0]:.4f}")
        print(f"   End: {future_forecast.iloc[-1]:.4f}")
        print(f"   Mean: {future_forecast.mean():.4f}")
        print(f"   Trend: {'üìà Increasing' if future_forecast.iloc[-1] > future_forecast.iloc[0] else 'üìâ Decreasing'}")
        
    except Exception as e:
        print(f"‚ùå Forecast generation failed: {str(e)}")
        
else:
    print("‚ùå Please complete model evaluation first!")

## 10. ‚úÖ Summary & Next Steps

In [None]:
# Final summary
print("=== üìÖ TIME SERIES ANALYSIS COMPLETE ===")
print()

if 'best_model_name' in locals():
    best_rmse = forecast_results[best_model_name]['rmse']
    best_mae = forecast_results[best_model_name]['mae']
    
    print("üìä ANALYSIS SUMMARY:")
    print(f"‚Ä¢ Dataset: {dataset_name}")
    print(f"‚Ä¢ Time Period: {df_ts.index.min()} to {df_ts.index.max()}")
    print(f"‚Ä¢ Observations: {len(df_ts)}")
    print(f"‚Ä¢ Best Model: {best_model_name}")
    print(f"‚Ä¢ RMSE: {best_rmse:.4f}")
    print(f"‚Ä¢ MAE: {best_mae:.4f}")
    
    print("\nüöÄ RECOMMENDED NEXT STEPS:")
    print("1. üîß Try ARIMA/SARIMA models for potentially better results")
    print("2. üìä Experiment with different seasonal periods")
    print("3. üéØ Add external regressors (holidays, events, etc.)")
    print("4. üîÑ Set up regular model retraining pipeline")
    print("5. üìà Monitor forecast accuracy over time")
    
    print("\nüõ†Ô∏è  ADVANCED TECHNIQUES TO EXPLORE:")
    print("‚Ä¢ ARIMA/SARIMA models (statsmodels)")
    print("‚Ä¢ Prophet (Facebook's forecasting library)")
    print("‚Ä¢ LSTM neural networks (for complex patterns)")
    print("‚Ä¢ XGBoost with time-based features")
    print("‚Ä¢ Ensemble methods combining multiple models")
    
    print("\nüíæ SAVE YOUR MODEL:")
    print("# Uncomment to save")
    print("# import joblib")
    print("# joblib.dump(final_fit, 'timeseries_model.pkl')")
    
else:
    print("‚ö†Ô∏è  Analysis incomplete. Please run all previous cells.")

print("\nüéâ Time series analysis workflow completed!")