<a href="https://colab.research.google.com/github/Reben80/MCEnrolmentProjection/blob/main/Projection_before_05_15_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

In [None]:
data=pd.read_csv("https://raw.githubusercontent.com/Reben80/MCEnrolmentProjection/main/cleaned_enrollment_data.csv")

In [None]:

# Clean the data by removing unnecessary rows and columns
data_cleaned = data.dropna(how='all').drop(columns=['Unnamed: 0'])

# Set the index to campus names including Unduplicated Headcount
data_cleaned.index = ['Germantown', 'Rockville', 'Takoma Park/Silver Spring', 'Grand Total', 'Unduplicated Headcount']

# Strip any extra spaces from column names
data_cleaned.columns = data_cleaned.columns.str.strip()

# Extract the Unduplicated Headcount data
unduplicated_headcount = data_cleaned.loc['Unduplicated Headcount']

# List of terms to plot
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']

# Plot each term in a separate window
for term in terms:
    # Filter columns for the specific term
    term_columns = [col for col in unduplicated_headcount.index if term in col]
    term_data = unduplicated_headcount[term_columns]

    # Plot the data
    plt.figure(figsize=(10, 6))
    term_data.plot(marker='o', linestyle='-', title=f'Unduplicated Headcount for {term}')
    plt.xlabel('Years')
    plt.ylabel('Unduplicated Headcount')
    plt.grid(True)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()


In [None]:
# List of terms to plot
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']

# Create subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 18))
axes = axes.flatten()  # Flatten the 2D array of axes to 1D for easy iteration

# Plot each term in a separate subplot
for i, term in enumerate(terms):
    # Filter columns for the specific term
    term_columns = [col for col in unduplicated_headcount.index if term in col]
    term_data = unduplicated_headcount[term_columns]

    # Plot the data
    term_data.plot(ax=axes[i], marker='o', linestyle='-', title=f'Unduplicated Headcount for {term}')
    axes[i].set_xlabel('Years')
    axes[i].set_ylabel('Unduplicated Headcount')
    axes[i].grid(True)
    axes[i].tick_params(axis='x', rotation=90)

# Remove any empty subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Assuming 'data_cleaned' and 'unduplicated_headcount' are already defined as in your initial code

# Extract the time series data for a specific term
term = 'Fall'  # Change this to the term you want to project
term_columns = [col for col in unduplicated_headcount.index if term in col]
term_data = unduplicated_headcount[term_columns].astype(float)

# Transpose the data to have years as the index
term_data = term_data.T

# Preprocess the index to extract the year
term_data.index = term_data.index.str.extract(r'(\d{4})')[0]  # Extract the year part
term_data.index = pd.to_datetime(term_data.index, format='%Y')  # Convert to datetime

# Fit the Holt-Winters model
hw_model = ExponentialSmoothing(term_data, seasonal='multiplicative', trend='additive', seasonal_periods=4)
hw_fit = hw_model.fit()

# Make a forecast for the next 8 periods (2 years assuming 4 terms per year)
forecast = hw_fit.forecast(8)

# Plot the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(term_data.index, term_data, label='Observed')
plt.plot(forecast.index, forecast, label='Forecast', linestyle='--')
plt.title(f'Holt-Winters Forecast for {term}')
plt.xlabel('Years')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()


#ARIMA Seem not doing good

In [None]:

# Assuming 'data_cleaned' and 'unduplicated_headcount' are already defined as in your initial code

# Extract the time series data for a specific term
term = 'Fall'  # Change this to the term you want to project
term_columns = [col for col in unduplicated_headcount.index if term in col]
term_data = unduplicated_headcount[term_columns].astype(float)

# Transpose the data to have years as the index
term_data = term_data.T

# Preprocess the index to extract the year
term_data.index = term_data.index.str.extract(r'(\d{4})')[0]  # Extract the year part
term_data.index = pd.to_datetime(term_data.index, format='%Y')  # Convert to datetime

# Fit the ARIMA model
arima_model = ARIMA(term_data, order=(1, 1, 1))
arima_fit = arima_model.fit()

# Make a forecast for the next 8 periods (2 years assuming 4 terms per year)
forecast = arima_fit.forecast(steps=8)

# Convert forecast index to datetime
forecast_index = pd.date_range(start=term_data.index[-1], periods=8, freq='Y')

# Plot the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(term_data.index, term_data, label='Observed')
plt.plot(forecast_index, forecast, label='Forecast', linestyle='--')
plt.title(f'ARIMA Forecast for {term}')
plt.xlabel('Years')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Function to get user input and forecast
def forecast_holt_winters(term_data, term, forecast_years):
    # Set frequency explicitly
    term_data = term_data.asfreq('AS')

    # Fit the Holt-Winters model
    hw_model = ExponentialSmoothing(term_data, seasonal='additive', trend='additive', seasonal_periods=4)
    hw_fit = hw_model.fit()

    # Make a forecast for the specified number of years (4 terms per year)
    forecast_periods = forecast_years * 4  # Assuming 4 terms per year
    forecast = hw_fit.forecast(steps=forecast_periods)

    # Convert forecast index to datetime
    forecast_index = pd.date_range(start=term_data.index[-1], periods=forecast_periods, freq='Q')  # Quarterly frequency

    # Create a DataFrame for the forecast
    forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])

    # Print the forecasted values
    print(f"\nForecasted values for the next {forecast_years} years:")
    print(forecast_df)

    # Plot the original data and the forecast
    plt.figure(figsize=(12, 6))
    plt.plot(term_data.index, term_data, label='Observed')
    plt.plot(forecast_index, forecast, label='Forecast', linestyle='--')
    plt.title(f'Holt-Winters Forecast for {term}')
    plt.xlabel('Years')
    plt.ylabel('Unduplicated Headcount')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

# Assuming 'data_cleaned' and 'unduplicated_headcount' are already defined as in your initial code

# Extract the time series data for a specific term
term = 'Fall'  # Change this to the term you want to project
term_columns = [col for col in unduplicated_headcount.index if term in col]
term_data = unduplicated_headcount[term_columns].astype(float)

# Transpose the data to have years as the index
term_data = term_data.T

# Preprocess the index to extract the year
term_data.index = term_data.index.str.extract(r'(\d{4})')[0]  # Extract the year part
term_data.index = pd.to_datetime(term_data.index, format='%Y')  # Convert to datetime

# Check the term_data before fitting the model
print("Term Data:\n", term_data)

# Check for missing values
print("Missing values in the data:", term_data.isnull().sum())

# Ask the user for the number of years to forecast
forecast_years = int(input("Enter the number of years to forecast: "))

# Call the function to forecast and plot the data
forecast_holt_winters(term_data, term, forecast_years)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Function to forecast and plot using Holt-Winters Exponential Smoothing
def forecast_holt_winters(term_data, term, forecast_years):
    # Set frequency explicitly
    term_data = term_data.asfreq('AS')

    # Fit the Holt-Winters model
    hw_model = ExponentialSmoothing(term_data, seasonal='additive', trend='additive', seasonal_periods=4)
    hw_fit = hw_model.fit()

    # Make a forecast for the specified number of years (4 terms per year)
    forecast_periods = forecast_years * 4  # Assuming 4 terms per year
    forecast = hw_fit.forecast(steps=forecast_periods)

    # Convert forecast index to datetime
    forecast_index = pd.date_range(start=term_data.index[-1], periods=forecast_periods, freq='Q')  # Quarterly frequency

    # Create a DataFrame for the forecast
    forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])

    # # Print the forecasted values
    # print(f"\nForecasted values for {term} for the next {forecast_years} years:")
    # print(forecast_df)

    # Plot the original data and the forecast
    plt.figure(figsize=(12, 6))
    plt.plot(term_data.index, term_data, label='Observed')
    plt.plot(forecast_index, forecast, label='Forecast', linestyle='--')
    plt.title(f'Holt-Winters Forecast for {term}')
    plt.xlabel('Years')
    plt.ylabel('Unduplicated Headcount')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

# Assuming 'data_cleaned' and 'unduplicated_headcount' are already defined as in your initial code

# List of terms to forecast
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']

# Ask the user for the number of years to forecast
forecast_years = int(input("Enter the number of years to forecast: "))

# Loop through each term and forecast
for term in terms:
    # Extract the time series data for the specific term
    term_columns = [col for col in unduplicated_headcount.index if term in col]
    term_data = unduplicated_headcount[term_columns].astype(float)

    # Transpose the data to have years as the index
    term_data = term_data.T

    # Preprocess the index to extract the year
    term_data.index = term_data.index.str.extract(r'(\d{4})')[0]  # Extract the year part
    term_data.index = pd.to_datetime(term_data.index, format='%Y')  # Convert to datetime

    # # Check the term_data before fitting the model
    # print(f"\nTerm Data for {term}:\n", term_data)

    # # Check for missing values
    # print(f"Missing values in the data for {term}: {term_data.isnull().sum()}")

    # Call the function to forecast and plot the data
    forecast_holt_winters(term_data, term, forecast_years)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Function to forecast using Holt-Winters Exponential Smoothing
def forecast_holt_winters(term_data, term, forecast_years):
    # Set frequency explicitly
    term_data = term_data.asfreq('AS')

    # Fit the Holt-Winters model
    hw_model = ExponentialSmoothing(term_data, seasonal='additive', trend='additive', seasonal_periods=4)
    hw_fit = hw_model.fit()

    # Make a forecast for the specified number of years (4 terms per year)
    forecast_periods = forecast_years * 4  # Assuming 4 terms per year
    forecast = hw_fit.forecast(steps=forecast_periods)

    # Convert forecast index to datetime
    forecast_index = pd.date_range(start=term_data.index[-1], periods=forecast_periods, freq='Q')  # Quarterly frequency

    # Create a DataFrame for the forecast
    forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])

    # Print the forecasted values
    print(f"\nForecasted values for {term} for the next {forecast_years} years:")
    print(forecast_df)

    return term_data, forecast, forecast_index

# Assuming 'data_cleaned' and 'unduplicated_headcount' are already defined as in your initial code

# List of terms to forecast
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']

# Ask the user for the number of years to forecast
forecast_years = int(input("Enter the number of years to forecast: "))

# Create subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 18))
axes = axes.flatten()  # Flatten the 2D array of axes to 1D for easy iteration

# Loop through each term and forecast
for i, term in enumerate(terms):
    # Extract the time series data for the specific term
    term_columns = [col for col in unduplicated_headcount.index if term in col]
    term_data = unduplicated_headcount[term_columns].astype(float)

    # Transpose the data to have years as the index
    term_data = term_data.T

    # Preprocess the index to extract the year
    term_data.index = term_data.index.str.extract(r'(\d{4})')[0]  # Extract the year part
    term_data.index = pd.to_datetime(term_data.index, format='%Y')  # Convert to datetime

    # Check the term_data before fitting the model
    print(f"\nTerm Data for {term}:\n", term_data)

    # Check for missing values
    print(f"Missing values in the data for {term}: {term_data.isnull().sum()}")

    # Call the function to forecast and plot the data
    term_data, forecast, forecast_index = forecast_holt_winters(term_data, term, forecast_years)

    # Plot the original data and the forecast
    axes[i].plot(term_data.index, term_data, label='Observed')
    axes[i].plot(forecast_index, forecast, label='Forecast', linestyle='--')
    axes[i].set_title(f'Holt-Winters Forecast for {term}')
    axes[i].set_xlabel('Years')
    axes[i].set_ylabel('Unduplicated Headcount')
    axes[i].legend()
    axes[i].grid(True)
    axes[i].tick_params(axis='x', rotation=90)

# Remove any empty subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()


In [None]:

# Function to forecast using Holt-Winters Seasonal Model
def forecast_holt_winters(term_data, term, forecast_years):
    # Set frequency explicitly
    term_data = term_data.asfreq('AS')

    # Fit the Holt-Winters Seasonal Model
    hw_model = ExponentialSmoothing(term_data, seasonal='additive', trend='additive', seasonal_periods=4)
    hw_fit = hw_model.fit()

    # Make a forecast for the specified number of years (4 terms per year)
    forecast_periods = forecast_years * 4 # Assuming 4 terms per year
    forecast = hw_fit.forecast(steps=forecast_periods)

    # Compute confidence intervals manually
    alpha = 0.05  # 95% confidence interval
    se = hw_fit.sse / len(term_data)  # Standard error
    conf_int_lower = forecast - 1.96 * np.sqrt(se)
    conf_int_upper = forecast + 1.96 * np.sqrt(se)
    conf_int = pd.DataFrame({'lower': conf_int_lower, 'upper': conf_int_upper}, index=forecast.index)

    # Create a DataFrame for the forecast
    forecast_df = pd.DataFrame(forecast, columns=['Forecast'])

    # Print the forecasted values
    print(f"\nForecasted values for {term} for the next {forecast_years} years:")
    print(forecast_df)

    return term_data, forecast, forecast.index, conf_int

# Assuming 'data_cleaned' and 'unduplicated_headcount' are already defined as in your initial code

# List of terms to forecast
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']

# Ask the user for the number of years to forecast
forecast_years = int(input("Enter the number of years to forecast: "))

# Create subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 18))
axes = axes.flatten()  # Flatten the 2D array of axes to 1D for easy iteration

# Loop through each term and forecast
for i, term in enumerate(terms):
    # Extract the time series data for the specific term
    term_columns = [col for col in unduplicated_headcount.index if term in col]
    term_data = unduplicated_headcount[term_columns].astype(float)

    # Transpose the data to have years as the index
    term_data = term_data.T

    # Preprocess the index to extract the year
    term_data.index = term_data.index.str.extract(r'(\d{4})')[0]  # Extract the year part
    term_data.index = pd.to_datetime(term_data.index, format='%Y')  # Convert to datetime

    # Check the term_data before fitting the model
    print(f"\nTerm Data for {term}:\n", term_data)

    # Check for missing values
    print(f"Missing values in the data for {term}: {term_data.isnull().sum()}")

    # Call the function to forecast and plot the data
    term_data, forecast, forecast_index, conf_int = forecast_holt_winters(term_data, term, forecast_years)

    # Plot the original data and the forecast
    axes[i].plot(term_data.index, term_data, label='Observed')
    axes[i].plot(forecast_index, forecast, label='Forecast', linestyle='--')
    axes[i].fill_between(forecast_index, conf_int['lower'], conf_int['upper'], color='gray', alpha=0.2)
    axes[i].set_title(f'Holt-Winters Seasonal Model Forecast for {term}')
    axes[i].set_xlabel('Years')
    axes[i].set_ylabel('Unduplicated Headcount')
    axes[i].legend()
    axes[i].grid(True)
    axes[i].tick_params(axis='x', rotation=90)

# Remove any empty subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()


In [None]:


# Sample data for demonstration
data = pd.DataFrame({
    '201440 (Summer 1)': [8334.0, 7833.0, 7793.0, 7638.0, 7376.0, 6856.0, 5930.0, 6284.0, 5577.0, 5800.0],
    '201410 (Summer 2)': [10440.0, 10330.0, 10220.0, 10110.0, 10000.0, 9850.0, 9720.0, 9600.0, 9480.0, 9360.0],
    '201420 (Fall)': [26160.0, 25518.0, 25323.0, 23916.0, 22875.0, 21720.0, 21260.0, 20041.0, 17284.0, 17137.0],
    '201425 (Winter)': [15000.0, 14500.0, 14000.0, 13500.0, 13000.0, 12500.0, 12000.0, 11500.0, 11000.0, 10500.0],
    '201430 (Spring)': [20200.0, 19800.0, 19400.0, 19000.0, 18600.0, 18200.0, 17800.0, 17400.0, 17000.0, 16600.0]
})

# Define terms and corresponding months
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']
term_to_month = {
    'Summer 1': 5,
    'Summer 2': 7,
    'Fall': 8,
    'Winter': 12,
    'Spring': 3
}

# Function to combine data into a single time series DataFrame
def combine_terms(data):
    combined_data = []
    for col in data.columns:
        year_term = col.split(' ')[0]
        year = int(year_term[:4])
        term_full = col.split('(')[1].strip(')')
        month = term_to_month[term_full]
        term_data = data[col].astype(float)
        for i, value in enumerate(term_data):
            date = pd.to_datetime(f"{year + i}-{month:02d}-01")
            combined_data.append([date, value])
    combined_data = pd.DataFrame(combined_data, columns=['Date', 'Unduplicated Headcount'])
    combined_data = combined_data.set_index('Date').sort_index()
    return combined_data

# Combine the data for all terms into a single time series
combined_series = combine_terms(data)

# Check the combined DataFrame
print("\nCombined DataFrame:\n", combined_series)


In [None]:
plt.style.use('ggplot')
# Sample data for demonstration
data = pd.DataFrame({
    '201440 (Summer 1)': [8334.0, 7833.0, 7793.0, 7638.0, 7376.0, 6856.0, 5930.0, 6284.0, 5577.0, 5800.0],
    '201410 (Summer 2)': [10440.0, 10330.0, 10220.0, 10110.0, 10000.0, 9850.0, 9720.0, 9600.0, 9480.0, 9360.0],
    '201420 (Fall)': [26160.0, 25518.0, 25323.0, 23916.0, 22875.0, 21720.0, 21260.0, 20041.0, 17284.0, 17137.0],
    '201425 (Winter)': [15000.0, 14500.0, 14000.0, 13500.0, 13000.0, 12500.0, 12000.0, 11500.0, 11000.0, 10500.0],
    '201430 (Spring)': [20200.0, 19800.0, 19400.0, 19000.0, 18600.0, 18200.0, 17800.0, 17400.0, 17000.0, 16600.0]
})

# Define terms and corresponding months
terms = ['Summer 1', 'Summer 2', 'Fall', 'Winter', 'Spring']
term_to_month = {
    'Summer 1': 6,
    'Summer 2': 7,
    'Fall': 9,
    'Winter': 12,
    'Spring': 3
}

# Function to combine data into a single time series DataFrame
def combine_terms(data):
    combined_data = []
    for col in data.columns:
        year_term = col.split(' ')[0]
        year = int(year_term[:4])
        term_full = col.split('(')[1].strip(')')
        month = term_to_month[term_full]
        term_data = data[col].astype(float)
        for i, value in enumerate(term_data):
            date = pd.to_datetime(f"{year + i}-{month:02d}-01")
            combined_data.append([date, value])
    combined_data = pd.DataFrame(combined_data, columns=['Date', 'Unduplicated Headcount'])
    combined_data = combined_data.set_index('Date').sort_index()
    return combined_data

# Combine the data for all terms into a single time series
combined_series = combine_terms(data)

# Check the combined DataFrame
print("\nCombined DataFrame:\n", combined_series)

# Plot the time series
plt.figure(figsize=(12, 6))
plt.plot(combined_series.index, combined_series['Unduplicated Headcount'], marker='o',color='b')
plt.title('Time Series of Combined Data')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

plt.style.use('ggplot')

# Your data (reformatted into separate lists per season)
summer_1 = [8334.0, 7833.0, 7793.0, 7638.0, 7376.0, 6856.0, 5930.0, 6284.0, 5577.0, 5800.0]
summer_2 = [6274.0, 6232.0, 6003.0, 5481.0, 5245.0, 5168.0, 5035.0, 7197.0, 4492.0, 4168.0]
fall = [26160.0, 25518.0, 25323.0, 23916.0, 22875.0, 21720.0, 21260.0, 17284.0, 20041.0, 17137.0]
winter = [1509.0, 1378.0, 1401.0, 1441.0, 1629.0, 1720.0, 1891.0, 2627.0, 2239.0, 2247.0]
spring = [24625.0, 24430.0, 23277.0, 22261.0, 20990.0, 20014.0, 19221.0, 17717.0, 15592.0, 15725.0]

# Plotting
plt.figure(figsize=(10, 6))  # Adjust figure size as needed

# Plot each season's data
plt.plot(summer_1, marker='o', linestyle='-', color='orange', label='Summer 1')
plt.plot(summer_2, marker='o', linestyle='-', color='red', label='Summer 2')
plt.plot(fall, marker='o', linestyle='-', color='brown', label='Fall')
plt.plot(winter, marker='o', linestyle='-', color='blue', label='Winter')
plt.plot(spring, marker='o', linestyle='-', color='green', label='Spring')

# Customize the plot
plt.xlabel('Time Point')
plt.ylabel('Value')
plt.title('Time Series of Values Across Seasons')
plt.xticks(range(10))  # Set x-ticks to match the data points
plt.legend()  # Show the legend

# Show the plot
plt.grid(axis='y', linestyle='--') # Optional: Add a grid
plt.show()


In [None]:


# Function to combine data into a single time series DataFrame
def combine_terms(years, data_dict):
    combined_data = []
    for term, data in data_dict.items():
        month = term_to_month[term]
        for i, value in enumerate(data):
            date = pd.to_datetime(f"{years[i]}-{month:02d}-01")
            combined_data.append([date, value])
    combined_data = pd.DataFrame(combined_data, columns=['Date', 'Unduplicated Headcount'])
    combined_data = combined_data.set_index('Date').sort_index()
    return combined_data

# Define the years
years = list(range(2014, 2024))

# Combine the data for all terms into a single time series
data_dict = {
    'Summer 1': summer_1,
    'Summer 2': summer_2,
    'Fall': fall,
    'Winter': winter,
    'Spring': spring
}
combined_series = combine_terms(years, data_dict)

# Check the combined DataFrame
print("\nCombined DataFrame:\n", combined_series)

# Save the DataFrame as a CSV file
combined_series.to_csv('combined_series.csv', index=True)

# Plot the time series
plt.figure(figsize=(12, 6))
plt.plot(combined_series.index, combined_series['Unduplicated Headcount'], marker='o', linestyle='-', color='b')
plt.title('Time Series of Combined Data')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Function to combine data into a single time series DataFrame
def combine_terms(years, data_dict):
    combined_data = []
    for year in years:
        for term in terms:
            month = term_to_month[term]
            value = data_dict[term][years.index(year)]
            date = pd.to_datetime(f"{year}-{month:02d}-01")
            combined_data.append([date, value])
    combined_data = pd.DataFrame(combined_data, columns=['Date', 'Unduplicated Headcount'])
    combined_data = combined_data.set_index('Date').sort_index()
    return combined_data

# Define the years
years = list(range(2014, 2024))

# Combine the data for all terms into a single time series
data_dict = {
    'Spring': spring,
    'Summer 1': summer_1,
    'Summer 2': summer_2,
    'Fall': fall,
    'Winter': winter
}
combined_series = combine_terms(years, data_dict)

# Ensure the index has a consistent frequency
combined_series = combined_series.asfreq('QS-MAR')

# Check the combined DataFrame
print("\nCombined DataFrame:\n", combined_series)

# Fit the ARIMA model
arma_model = ARIMA(combined_series, order=(2, 0, 2))
arma_fit = arma_model.fit()

# Forecasting the future values
forecast_periods = 5 * 5  # Number of periods to forecast (5 years, 5 terms per year)
forecast_results = arma_fit.get_forecast(steps=forecast_periods)
forecast = forecast_results.predicted_mean
conf_int = forecast_results.conf_int()

# Create a DataFrame for the forecast
forecast_index = pd.date_range(start=combined_series.index[-1] + pd.DateOffset(months=3), periods=forecast_periods, freq='QS-MAR')
forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])
conf_int_df = pd.DataFrame(conf_int, index=forecast_index, columns=['Lower CI', 'Upper CI'])

# Plot the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(combined_series.index, combined_series['Unduplicated Headcount'], label='Observed', marker='o')
plt.plot(forecast_df.index, forecast_df['Forecast'], label='Forecast', linestyle='--', marker='o')
plt.fill_between(forecast_index, conf_int_df['Lower CI'], conf_int_df['Upper CI'], color='gray', alpha=0.2)
plt.title('ARIMA Forecast for Combined Data')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

# Your data
spring = [24625.0, 24430.0, 23277.0, 22261.0, 20990.0, 20014.0, 19221.0, 17717.0, 15592.0, 15725.0]
summer_1 = [8334.0, 7833.0, 7793.0, 7638.0, 7376.0, 6856.0, 5930.0, 6284.0, 5577.0, 5800.0]
summer_2 = [6274.0, 6232.0, 6003.0, 5481.0, 5245.0, 5168.0, 5035.0, 7197.0, 4492.0, 4168.0]
fall = [26160.0, 25518.0, 25323.0, 23916.0, 22875.0, 21720.0, 21260.0, 17284.0, 20041.0, 17137.0]
winter = [1509.0, 1378.0, 1401.0, 1441.0, 1629.0, 1720.0, 1891.0, 2627.0, 2239.0, 2247.0]

# Define terms and corresponding months
terms = ['Spring', 'Summer 1', 'Summer 2', 'Fall', 'Winter']
term_to_month = {
    'Spring': 3,
    'Summer 1': 6,
    'Summer 2': 7,
    'Fall': 9,
    'Winter': 12,
}

# Function to combine data into a single time series DataFrame
def combine_terms(years, data_dict):
    combined_data = []
    for year in years:
        for term in terms:
            month = term_to_month[term]
            value = data_dict[term][years.index(year)]
            date = pd.to_datetime(f"{year}-{month:02d}-01")
            combined_data.append([date, value])
    combined_data = pd.DataFrame(combined_data, columns=['Date', 'Unduplicated Headcount'])
    combined_data = combined_data.set_index('Date').sort_index()
    return combined_data

# Define the years
years = list(range(2014, 2024))

# Combine the data for all terms into a single time series
data_dict = {
    'Spring': spring,
    'Summer 1': summer_1,
    'Summer 2': summer_2,
    'Fall': fall,
    'Winter': winter
}
combined_series = combine_terms(years, data_dict)

# Ensure the index has a consistent frequency
combined_series.index = pd.date_range(start=combined_series.index[0], periods=len(combined_series), freq='QS-MAR')

# Check the combined DataFrame
print("\nCombined DataFrame:\n", combined_series)

# Fit the ARIMA model
arma_model = ARIMA(combined_series, order=(2, 0, 2))
arma_fit = arma_model.fit()

# Forecasting the future values
forecast_periods = 5 * 5  # Number of periods to forecast (5 years, 5 terms per year)
forecast_results = arma_fit.get_forecast(steps=forecast_periods)
forecast = forecast_results.predicted_mean
conf_int = forecast_results.conf_int()

# Create a DataFrame for the forecast
forecast_index = pd.date_range(start=combined_series.index[-1] + pd.DateOffset(months=3), periods=forecast_periods, freq='QS-MAR')
forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])
conf_int_df = pd.DataFrame(conf_int, index=forecast_index, columns=['Lower CI', 'Upper CI'])

# Check the forecasted DataFrame
print("\nForecasted DataFrame:\n", forecast_df)

# Plot the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(combined_series.index, combined_series['Unduplicated Headcount'], label='Observed', marker='o')
plt.plot(forecast_df.index, forecast_df['Forecast'], label='Forecast', linestyle='--', marker='o')
plt.fill_between(forecast_index, conf_int_df['Lower CI'], conf_int_df['Upper CI'], color='gray', alpha=0.2)
plt.title('ARIMA Forecast for Combined Data')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:


# Function to combine data into a single time series DataFrame
def combine_terms(years, data_dict):
    combined_data = []
    for year in years:
        for term in terms:
            month = term_to_month[term]
            value = data_dict[term][years.index(year)]
            date = pd.to_datetime(f"{year}-{month:02d}-01")
            combined_data.append([date, value])
    combined_data = pd.DataFrame(combined_data, columns=['Date', 'Unduplicated Headcount'])
    combined_data = combined_data.set_index('Date').sort_index()
    return combined_data

# Define the years
years = list(range(2014, 2024))

# Combine the data for all terms into a single time series
data_dict = {
    'Spring': spring,
    'Summer 1': summer_1,
    'Summer 2': summer_2,
    'Fall': fall,
    'Winter': winter
}
combined_series = combine_terms(years, data_dict)

# Ensure the index has a consistent frequency
combined_series.index = pd.date_range(start=combined_series.index[0], periods=len(combined_series), freq='QS-MAR')

# Check the combined DataFrame
print("\nCombined DataFrame:\n", combined_series)

# Fit the Holt-Winters Exponential Smoothing model
hw_model = ExponentialSmoothing(combined_series['Unduplicated Headcount'], seasonal='add', seasonal_periods=5).fit()

# Forecasting the future values
forecast_periods = 5 * 5  # Number of periods to forecast (5 years, 5 terms per year)
forecast = hw_model.forecast(steps=forecast_periods)

# Create a DataFrame for the forecast
forecast_index = pd.date_range(start=combined_series.index[-1] + pd.DateOffset(months=3), periods=forecast_periods, freq='QS-MAR')
forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])

# Plot the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(combined_series.index, combined_series['Unduplicated Headcount'], label='Observed', marker='o')
plt.plot(forecast_df.index, forecast_df['Forecast'], label='Forecast', linestyle='--', marker='o')
plt.title('Holt-Winters Forecast for Combined Data')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Check the forecasted DataFrame
print("\nForecasted DataFrame:\n", forecast_df)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import numpy as np

# Data for each term (unchanged)
spring = [24625.0, 24430.0, 23277.0, 22261.0, 20990.0, 20014.0, 19221.0, 17717.0, 15592.0, 15725.0]
summer_1 = [8334.0, 7833.0, 7793.0, 7638.0, 7376.0, 6856.0, 5930.0, 6284.0, 5577.0, 5800.0]
summer_2 = [6274.0, 6232.0, 6003.0, 5481.0, 5245.0, 5168.0, 5035.0, 7197.0, 4492.0, 4168.0]
fall = [26160.0, 25518.0, 25323.0, 23916.0, 22875.0, 21720.0, 21260.0, 17284.0, 20041.0, 17137.0]
winter = [1509.0, 1378.0, 1401.0, 1441.0, 1629.0, 1720.0, 1891.0, 2627.0, 2239.0, 2247.0]

# ... (rest of the code for combining terms and ensuring index frequency)

# Fit the Holt-Winters Exponential Smoothing model (unchanged)
hw_model = ExponentialSmoothing(combined_series['Unduplicated Headcount'], seasonal='add', seasonal_periods=5).fit()

# Forecasting the future values (unchanged)
forecast_periods = 5 * 5  # Number of periods to forecast (5 years, 5 terms per year)
forecast = hw_model.forecast(steps=forecast_periods)

# Calculate Confidence Intervals (Updated)
simulations = hw_model.simulate(forecast_periods, repetitions=1000, error="add")
lower_bound = np.mean(np.percentile(simulations, 2.5, axis=0))
upper_bound = np.mean(np.percentile(simulations, 97.5, axis=0))

# Create a DataFrame for the forecast (same as before)
forecast_index = pd.date_range(start=combined_series.index[-1] + pd.DateOffset(months=3), periods=forecast_periods, freq='QS-MAR')
forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])

# Plot the original data, forecast, and confidence intervals
plt.figure(figsize=(12, 6))
plt.plot(combined_series.index, combined_series['Unduplicated Headcount'], label='Observed', marker='o')
plt.plot(forecast_df.index, forecast_df['Forecast'], label='Forecast', linestyle='--', marker='o')

# Fill the confidence intervals (Corrected)
plt.fill_between(forecast_df.index, [lower_bound]*len(forecast_df), [upper_bound]*len(forecast_df), color='gray', alpha=0.3, label='95% Confidence Interval')

plt.title('Holt-Winters Forecast with Confidence Intervals')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Function to combine data into a single time series DataFrame
def combine_terms(data_dict, terms):
    """Combines data for multiple terms into a single time series DataFrame.

    Args:
        data_dict (dict): Dictionary of data, with terms as keys and lists of values as values.
        terms (list): List of terms to include.

    Returns:
        pd.DataFrame: DataFrame with 'Date' as index and 'Unduplicated Headcount' as column.
    """

    term_to_month = {'Spring': 3, 'Summer 1': 6, 'Summer 2': 7, 'Fall': 9, 'Winter': 12}

    df_list = []
    for term in terms:
        month = term_to_month[term]
        df_term = pd.DataFrame(
            {'Date': pd.date_range(start=f'2014-{month:02d}-01', periods=10, freq='Y'),
             'Unduplicated Headcount': data_dict[term]}
        )
        df_list.append(df_term)

    combined_df = pd.concat(df_list, axis=0)
    combined_df = combined_df.set_index('Date').sort_index().asfreq('QS-MAR')
    return combined_df

# Define the data
data_dict = {
    'Spring': [24625.0, 24430.0, 23277.0, 22261.0, 20990.0, 20014.0, 19221.0, 17717.0, 15592.0, 15725.0],
    'Summer 1': [8334.0, 7833.0, 7793.0, 7638.0, 7376.0, 6856.0, 5930.0, 6284.0, 5577.0, 5800.0],
    'Summer 2': [6274.0, 6232.0, 6003.0, 5481.0, 5245.0, 5168.0, 5035.0, 7197.0, 4492.0, 4168.0],
    'Fall': [26160.0, 25518.0, 25323.0, 23916.0, 22875.0, 21720.0, 21260.0, 17284.0, 20041.0, 17137.0],
    'Winter': [1509.0, 1378.0, 1401.0, 1441.0, 1629.0, 1720.0, 1891.0, 2627.0, 2239.0, 2247.0]
}

# Specify the terms
terms = ['Spring', 'Summer 1', 'Summer 2', 'Fall', 'Winter']

# Combine the data
combined_series = combine_terms(data_dict, terms)

# ARIMA Model
arma_model = ARIMA(combined_series, order=(2, 0, 2))  # Adjust the order if needed
arma_fit = arma_model.fit()

# Forecasting
forecast_periods = 20  # Forecast for 5 years (20 quarters)
forecast_results = arma_fit.get_forecast(steps=forecast_periods)
forecast = forecast_results.predicted_mean
conf_int = forecast_results.conf_int()

# Create forecast DataFrame
forecast_index = pd.date_range(start=combined_series.index[-1] + pd.DateOffset(months=3), periods=forecast_periods, freq='QS-MAR')
forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['Forecast'])

# Print forecast
print("\nForecasted DataFrame:\n", forecast_df.round(2))

# Plot
plt.figure(figsize=(12, 6))
plt.plot(combined_series, label='Observed', marker='o')
plt.plot(forecast_df, label='Forecast', linestyle='--', marker='o')
plt.fill_between(forecast_df.index, conf_int['lower Unduplicated Headcount'], conf_int['upper Unduplicated Headcount'], color='gray', alpha=0.2)
plt.title('ARIMA Forecast for Combined Data')
plt.xlabel('Date')
plt.ylabel('Unduplicated Headcount')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# List of values representing a single time series
data = [
    8334.0, 6274.0, 26160.0, 1509.0, 24625.0,
    7833.0, 6232.0, 25518.0, 1378.0, 24430.0,
    7793.0, 6003.0, 25323.0, 1401.0, 23277.0,
    7638.0, 5481.0, 23916.0, 1441.0, 22261.0,
    7376.0, 5245.0, 22875.0, 1629.0, 20990.0,
    6856.0, 5168.0, 21720.0, 1720.0, 20014.0,
    5930.0, 5035.0, 21260.0, 1891.0, 19221.0,
    6284.0, 7197.0, 17284.0, 2627.0, 17717.0,
    5577.0, 4492.0, 20041.0, 2239.0, 15592.0,
    5800.0, 4168.0, 17137.0, 2247.0, 15725.0
]

# Generate a date range for the index (assuming monthly data starting from January 2020)
date_range = pd.date_range(start='2020-01-01', periods=len(data), freq='M')

# Create a DataFrame
df = pd.DataFrame(data, index=date_range, columns=['Metric'])

# Plot the time series
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Metric'], marker='o', linestyle='-', color='b')
plt.title('Time Series Plot of Metric')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# List of values representing a single time series
data = [
    8334.0, 6274.0, 26160.0, 1509.0, 24625.0,
    7833.0, 6232.0, 25518.0, 1378.0, 24430.0,
    7793.0, 6003.0, 25323.0, 1401.0, 23277.0,
    7638.0, 5481.0, 23916.0, 1441.0, 22261.0,
    7376.0, 5245.0, 22875.0, 1629.0, 20990.0,
    6856.0, 5168.0, 21720.0, 1720.0, 20014.0,
    5930.0, 5035.0, 21260.0, 1891.0, 19221.0,
    6284.0, 7197.0, 17284.0, 2627.0, 17717.0,
    5577.0, 4492.0, 20041.0, 2239.0, 15592.0,
    5800.0, 4168.0, 17137.0, 2247.0, 15725.0
]

# Generate a date range for the index
# Each year has 5 terms, spanning from 2014 to 2023
start_year = 2014
end_year = 2023
terms_per_year = 5
total_terms = (end_year - start_year + 1) * terms_per_year

date_range = pd.date_range(start=f'{start_year}-01-01', periods=total_terms, freq='2M')

# Create a DataFrame
df = pd.DataFrame(data, index=date_range, columns=['Metric'])

# Plot the time series
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Metric'], marker='o', linestyle='-', color='b')
plt.title('Time Series Plot of Metric')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# List of values representing a single time series
data = [
    8334.0, 6274.0, 26160.0, 1509.0, 24625.0,
    7833.0, 6232.0, 25518.0, 1378.0, 24430.0,
    7793.0, 6003.0, 25323.0, 1401.0, 23277.0,
    7638.0, 5481.0, 23916.0, 1441.0, 22261.0,
    7376.0, 5245.0, 22875.0, 1629.0, 20990.0,
    6856.0, 5168.0, 21720.0, 1720.0, 20014.0,
    5930.0, 5035.0, 21260.0, 1891.0, 19221.0,
    6284.0, 7197.0, 17284.0, 2627.0, 17717.0,
    5577.0, 4492.0, 20041.0, 2239.0, 15592.0,
    5800.0, 4168.0, 17137.0, 2247.0, 15725.0
]

# Generate a custom date range for the index
# Each year has 5 terms, spanning from 2014 to 2023
start_year = 2014
end_year = 2023
terms_per_year = 5
years = end_year - start_year + 1

# Create a list of dates with 5 terms per year
dates = []
for year in range(start_year, end_year + 1):
    for term in range(terms_per_year):
        month = term * (12 // terms_per_year) + 1
        dates.append(pd.Timestamp(year=year, month=month, day=1))

# Create a DataFrame
df = pd.DataFrame(data, index=pd.DatetimeIndex(dates), columns=['Metric'])

# Fit the Holt-Winters model with a seasonal component
model = ExponentialSmoothing(
    df['Metric'],
    trend='add',
    seasonal='add',
    seasonal_periods=terms_per_year,
    use_boxcox=False,
    initialization_method='estimated'
)
fit = model.fit()

# Generate forecast for the next 10 terms
forecast_periods = 40
forecast = fit.forecast(forecast_periods)

# Create a date range for the forecast
forecast_dates = [df.index[-1] + pd.DateOffset(months=(term * (12 // terms_per_year))) for term in range(1, forecast_periods + 1)]

# Plot the original time series and the forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Metric'], marker='o', linestyle='-', color='b', label='Observed')
plt.plot(forecast_dates, forecast, marker='o', linestyle='--', color='r', label='Forecast')
plt.title('Time Series Plot of Metric with Holt-Winters Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# List of values representing a single time series
data = [
    8334.0, 6274.0, 26160.0, 1509.0, 24625.0,
    7833.0, 6232.0, 25518.0, 1378.0, 24430.0,
    7793.0, 6003.0, 25323.0, 1401.0, 23277.0,
    7638.0, 5481.0, 23916.0, 1441.0, 22261.0,
    7376.0, 5245.0, 22875.0, 1629.0, 20990.0,
    6856.0, 5168.0, 21720.0, 1720.0, 20014.0,
    5930.0, 5035.0, 21260.0, 1891.0, 19221.0,
    6284.0, 7197.0, 17284.0, 2627.0, 17717.0,
    5577.0, 4492.0, 20041.0, 2239.0, 15592.0,
    5800.0, 4168.0, 17137.0, 2247.0, 15725.0
]

# Generate a custom date range for the index
# Each year has 5 terms, spanning from 2014 to 2023
start_year = 2014
end_year = 2023
terms_per_year = 5
years = end_year - start_year + 1

# Create a list of dates with 5 terms per year
dates = []
for year in range(start_year, end_year + 1):
    for term in range(terms_per_year):
        month = term * (12 // terms_per_year) + 1
        dates.append(pd.Timestamp(year=year, month=month, day=1))

# Create a DataFrame
df = pd.DataFrame(data, index=pd.DatetimeIndex(dates), columns=['Metric'])

# Fit the Holt-Winters model with a seasonal component
model = ExponentialSmoothing(
    df['Metric'],
    trend='add',
    seasonal='add',
    seasonal_periods=terms_per_year,
    use_boxcox=False,
    initialization_method='estimated'
)
fit = model.fit()

# Generate forecast for the next 30 terms
forecast_periods = 20
forecast = fit.forecast(forecast_periods)

# Create a date range for the forecast
forecast_dates = [df.index[-1] + pd.DateOffset(months=(term * (12 // terms_per_year))) for term in range(1, forecast_periods + 1)]

# Create a DataFrame for the forecast
forecast_df = pd.DataFrame({'Date': forecast_dates, 'Forecast': forecast})

# Save the forecast to a CSV file
forecast_df.to_csv('forecast.csv', index=False)

# Plot the original time series and the forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Metric'], marker='o', linestyle='-', color='b', label='Observed')
plt.plot(forecast_dates, forecast, marker='o', linestyle='--', color='r', label='Forecast')
plt.title('Time Series Plot of Metric with Holt-Winters Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# List of values representing a single time series
data = [
    8334.0, 6274.0, 26160.0, 1509.0, 24625.0,
    7833.0, 6232.0, 25518.0, 1378.0, 24430.0,
    7793.0, 6003.0, 25323.0, 1401.0, 23277.0,
    7638.0, 5481.0, 23916.0, 1441.0, 22261.0,
    7376.0, 5245.0, 22875.0, 1629.0, 20990.0,
    6856.0, 5168.0, 21720.0, 1720.0, 20014.0,
    5930.0, 5035.0, 21260.0, 1891.0, 19221.0,
    6284.0, 7197.0, 17284.0, 2627.0, 17717.0,
    5577.0, 4492.0, 20041.0, 2239.0, 15592.0,
    5800.0, 4168.0, 17137.0, 2247.0, 15725.0
]

# Generate a custom date range for the index
# Each year has 5 terms, spanning from 2014 to 2023
start_year = '2014-01-01'
end_year = '2023-12-01'
terms_per_year = 5

# Create a date range with consistent frequency
dates = pd.date_range(start=start_year, periods=len(data), freq='2M')

# Create a DataFrame
df = pd.DataFrame(data, index=dates, columns=['Metric'])

# Ensure the frequency is set
df.index.freq = '2M'

# Split the data into training and test sets
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]

# Fit the Holt-Winters model on the training set
model = ExponentialSmoothing(
    train['Metric'],
    trend='add',
    seasonal='add',
    seasonal_periods=terms_per_year,
    use_boxcox=False,
    initialization_method='estimated'
)
fit = model.fit()

# Make predictions on the test set
test = test.copy()  # Ensure we are not modifying a view
test['Predicted'] = fit.forecast(len(test))

# Check for NaN values and handle them
if test['Predicted'].isna().any():
    test['Predicted'].fillna(method='ffill', inplace=True)

# Ensure there are no NaN values in the test set for metric calculation
test = test.dropna()

# Calculate performance metrics
mae = mean_absolute_error(test['Metric'], test['Predicted'])
mse = mean_squared_error(test['Metric'], test['Predicted'])
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

# Plot the original data, training data, and predictions
plt.figure(figsize=(12, 6))
plt.plot(train.index, train['Metric'], marker='o', linestyle='-', color='b', label='Training Data')
plt.plot(test.index, test['Metric'], marker='o', linestyle='-', color='g', label='Test Data')
plt.plot(test.index, test['Predicted'], marker='o', linestyle='--', color='r', label='Predicted Data')
plt.title('Time Series Plot of Metric with Holt-Winters Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Generate forecast for the next 30 terms
forecast_periods = 30
forecast = fit.forecast(forecast_periods)

# Create a date range for the forecast
forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq='2M')[1:]

# Create a DataFrame for the forecast
forecast_df = pd.DataFrame({'Date': forecast_dates, 'Forecast': forecast})

# Save the forecast to a CSV file
forecast_df.to_csv('forecast.csv', index=False)

# Plot the original data, training data, and extended forecast
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Metric'], marker='o', linestyle='-', color='b', label='Observed')
plt.plot(forecast_dates, forecast, marker='o', linestyle='--', color='r', label='Forecast')
plt.title('Time Series Plot of Metric with Holt-Winters Forecast (Next 30 Terms)')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()
