In [None]:

# ITSM Incident Volume Forecasting - Quarterly & Annual (ARIMA + Exponential Smoothing)

## 📦 Step 1: Load & Clean Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Time series & forecasting
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

# Load data
df = pd.read_csv('ITSM_data.csv')
df['Open_Time'] = pd.to_datetime(df['Open_Time'], errors='coerce')
df['timestamp'] = df['Open_Time']
df = df.dropna(subset=['timestamp'])

# Time features
df['year'] = df['timestamp'].dt.year
df['quarter'] = df['timestamp'].dt.quarter
df['year_quarter'] = df['year'].astype(str) + 'Q' + df['quarter'].astype(str)
df['year_month'] = df['timestamp'].dt.to_period('M')

# Clean category column
df['CI_Cat'] = df['CI_Cat'].fillna('Unknown')

## 📊 Step 2: Aggregate Quarterly & Annual Ticket Volume

# Quarterly
quarterly = df.groupby(['year_quarter', 'CI_Cat'])['Incident_ID'].count().reset_index()
quarterly.columns = ['year_quarter', 'category', 'ticket_count']
quarterly['year'] = quarterly['year_quarter'].str[:4].astype(int)
quarterly['quarter'] = quarterly['year_quarter'].str[-1].astype(int)
quarterly['date'] = pd.to_datetime((quarterly['year'] * 100 + quarterly['quarter'] * 3).astype(str), format='%Y%m')

# Annual
annual = df.groupby(['year', 'CI_Cat'])['Incident_ID'].count().reset_index()
annual.columns = ['year', 'category', 'ticket_count']
annual['date'] = pd.to_datetime(annual['year'], format='%Y')

## 📈 Step 3: Visualize Quarterly & Annual Ticket Volumes

plt.figure(figsize=(16, 6))
plt.subplot(1, 2, 1)
for cat in quarterly['category'].unique():
    qdata = quarterly[quarterly['category'] == cat]
    plt.plot(qdata['date'], qdata['ticket_count'], marker='o', label=cat)
plt.title('Quarterly Ticket Volume')
plt.xlabel('Quarter')
plt.ylabel('Tickets')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
for cat in annual['category'].unique():
    adata = annual[annual['category'] == cat]
    plt.plot(adata['date'], adata['ticket_count'], marker='o', label=cat)
plt.title('Annual Ticket Volume')
plt.xlabel('Year')
plt.ylabel('Tickets')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 🤖 Step 4: Forecast Next 4 Quarters (ARIMA + Exponential Smoothing)

models = {}
forecasts = {}
categories = quarterly['category'].unique()

for cat in categories:
    cat_data = quarterly[quarterly['category'] == cat].copy()
    if len(cat_data) < 8: continue  # Skip short series
    ts = cat_data.set_index('date')['ticket_count'].sort_index()
    if ts.std() == 0: continue
    
    train_size = int(len(ts) * 0.8)
    train, test = ts[:train_size], ts[train_size:]

    # ARIMA
    best_model = None
    best_aic = np.inf
    for p in range(3):
        for d in range(2):
            for q in range(3):
                try:
                    model = ARIMA(train, order=(p, d, q)).fit()
                    if model.aic < best_aic:
                        best_aic = model.aic
                        best_model = model
                except:
                    continue
    models[f'{cat}_arima'] = best_model

    # Exponential Smoothing
    best_es = None
    best_es_aic = np.inf
    configs = [
        {'trend': None, 'seasonal': None, 'name': 'Simple'},
        {'trend': 'add', 'seasonal': None, 'name': 'Holt'},
        {'trend': 'add', 'seasonal': 'add', 'seasonal_periods': 4, 'name': 'Additive'},
        {'trend': 'add', 'seasonal': 'mul', 'seasonal_periods': 4, 'name': 'Multiplicative'}
    ]
    for cfg in configs:
        try:
            if cfg['seasonal'] and len(train) < 2 * cfg['seasonal_periods']:
                continue
            model = ExponentialSmoothing(train, **cfg).fit()
            if model.aic < best_es_aic:
                best_es_aic = model.aic
                best_es = model
        except:
            continue
    models[f'{cat}_es'] = best_es

    # Forecast
    last_date = ts.index[-1]
    future_dates = pd.date_range(start=last_date + pd.DateOffset(months=3), periods=4, freq='Q')
    forecasts[cat] = {}

    if best_model:
        arima_forecast = best_model.forecast(steps=4)
        forecasts[cat]['ARIMA'] = pd.Series(arima_forecast.values, index=future_dates)
    if best_es:
        es_forecast = best_es.forecast(steps=4)
        forecasts[cat]['Exponential Smoothing'] = pd.Series(es_forecast.values, index=future_dates)

## 📊 Step 5: Plot Forecasts

for cat in categories:
    if cat not in forecasts: continue
    ts = quarterly[quarterly['category'] == cat].copy().set_index('date')['ticket_count']
    if len(ts) < 8: continue

    plt.figure(figsize=(10, 5))
    plt.plot(ts.index, ts.values, 'o-', label='Historical')
    for model_name, forecast in forecasts[cat].items():
        plt.plot(forecast.index, forecast.values, 'o--', label=f'{model_name} Forecast')
    plt.title(f'Forecast for {cat}')
    plt.xlabel('Quarter')
    plt.ylabel('Tickets')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
