In [3]:
import pandas as pd
import statsmodels.api as sm

# Input data
data = {
    'Year': [2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008, 2009, 2009, 2009, 2009, 2010, 2010, 2010, 2010, 2011, 2011, 2011, 2011],
    'Quarter': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
    'Quarter#': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
    'Sales': [3015, 2886, 3262, 5873, 4135, 4063, 4264, 6704, 4889, 4651, 5449, 9519, 7131, 6566, 7560, 12948, 9857, 9913, 10876, 17431]
}

# Create a DataFrame from the data
df = pd.DataFrame(data)

# Add an intercept column to the DataFrame
df['Intercept'] = 1

# Define the independent variables (features) and the dependent variable (target)
X = df[['Year', 'Quarter', 'Quarter#', 'Intercept']]
y = df['Sales']

# Create the multiple linear regression model
model = sm.OLS(y, X)

# Fit the model
results = model.fit()

# Predict sales for each quarter in 2012
quarter_2012 = {
    'Year': [2012] * 4,
    'Quarter': [1, 2, 3, 4],
    'Quarter#': [21, 22, 23, 24],
    'Intercept': 1
}

df_2012 = pd.DataFrame(quarter_2012)
sales_2012 = results.predict(df_2012)

# Add the predicted sales to the DataFrame
df_2012['Sales'] = sales_2012

print("Predicted Sales for Each Quarter in 2012:")
print(df_2012[['Year', 'Quarter', 'Sales']])



Predicted Sales for Each Quarter in 2012:
   Year  Quarter      Sales
0  2012        1  10923.395
1  2012        2  12396.915
2  2012        3  13870.435
3  2012        4  15343.955


In [4]:
# Print the summary of the multiple linear regression model
print("\nMultiple Linear Regression Statistics:")
print(results.summary())


Multiple Linear Regression Statistics:
                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.818
Model:                            OLS   Adj. R-squared:                  0.796
Method:                 Least Squares   F-statistic:                     38.18
Date:                Tue, 01 Aug 2023   Prob (F-statistic):           5.16e-07
Time:                        21:25:26   Log-Likelihood:                -175.44
No. Observations:                  20   AIC:                             356.9
Df Residuals:                      17   BIC:                             359.9
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Year        

In [22]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Input data
data = {
    'Year': [2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008, 2009, 2009, 2009, 2009, 2010, 2010, 2010, 2010, 2011, 2011, 2011, 2011],
    'Quarter': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
    'Sales': [3015, 2886, 3262, 5873, 4135, 4063, 4264, 6704, 4889, 4651, 5449, 9519, 7131, 6566, 7560, 12948, 9857, 9913, 10876, 17431]
}

# Create a DataFrame from the data
df = pd.DataFrame(data)

# Calculate the total sales for each quarter (Year-Quarter combination)
quarterly_sales = df.groupby(['Year', 'Quarter'])['Sales'].sum().reset_index()

# Calculate the average sales for each quarter (Quarterly average)
quarterly_average_sales = quarterly_sales.groupby('Quarter')['Sales'].mean().reset_index()

# Calculate the seasonality index by dividing each quarter's average sales by the overall average sales
overall_average_sales = quarterly_average_sales['Sales'].mean()
quarterly_average_sales['Seasonality Index'] = quarterly_average_sales['Sales'] / overall_average_sales

# Display the seasonality index
seasonality_index = quarterly_average_sales.set_index('Quarter')['Seasonality Index'].to_dict()
print("Seasonality Index:", seasonality_index)

# Predict quarterly sales for each quarter in 2012 using the seasonality index
year_2012 = 2012
quarters_2012 = [1, 2, 3, 4]

# Calculate the predicted sales for each quarter in 2012 using the seasonality index
predicted_sales = []
for quarter in quarters_2012:
    predicted_sales.append(quarterly_average_sales[quarterly_average_sales['Quarter'] == quarter]['Sales'].values[0] * seasonality_index[quarter])

# Create a DataFrame for the prediction
prediction_df = pd.DataFrame({
    'Year': [year_2012] * len(quarters_2012),
    'Quarter': quarters_2012,
    'Predicted_Sales': predicted_sales
})

print(prediction_df)

# Linear regression for the seasonality index
X = np.array(quarterly_average_sales['Quarter'])
X = sm.add_constant(X)
y = np.array(quarterly_average_sales['Sales'])

model = sm.OLS(y, X)
results = model.fit()

print("\nLinear Regression Statistics:")
print(results.summary())


Seasonality Index: {1: 0.8235077167498864, 2: 0.7966125737630504, 3: 0.8911427598729005, 4: 1.4887369496141625}
   Year  Quarter  Predicted_Sales
0  2012        1      4780.791699
1  2012        2      4473.616892
2  2012        3      5598.337046
3  2012        4     15624.294286

Linear Regression Statistics:
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.676
Model:                            OLS   Adj. R-squared:                  0.514
Method:                 Least Squares   F-statistic:                     4.170
Date:                Tue, 01 Aug 2023   Prob (F-statistic):              0.178
Time:                        21:44:12   Log-Likelihood:                -33.834
No. Observations:                   4   AIC:                             71.67
Df Residuals:                       2   BIC:                             70.44
Df Model:                           1                  

  warn("omni_normtest is not valid with less than 8 observations; %i "
