In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
import xgboost as xgb
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
df_laptop = pd.read_csv("../EDA/laptop_data.csv")
df_tablet = pd.read_csv("../EDA/tablet_data.csv")
df_smartphone = pd.read_csv("../EDA/smartphone_data.csv")
df_accessory = pd.read_csv("../EDA/accessory_data.csv")

In [4]:
df_laptop

Unnamed: 0,Category,Date,Sales_Volume,Price,Market_Trend_Index,Competitor_Activity_Score,Consumer_Confidence_Index
0,Laptop,2009-01-01,46.470588,85.377647,0.080248,0.218278,73.928403
1,Laptop,2009-01-02,52.500000,50.957500,1.063133,0.094953,72.052359
2,Laptop,2009-01-03,31.000000,165.760000,-0.408246,1.489919,48.705781
3,Laptop,2009-01-04,77.500000,56.342500,-0.127733,-0.462146,68.243504
4,Laptop,2009-01-05,65.875000,103.065000,0.649302,0.066341,69.895559
...,...,...,...,...,...,...,...
4358,Laptop,2020-12-25,177.000000,107.065000,-0.435945,-0.745994,51.586337
4359,Laptop,2020-12-26,210.600000,139.109000,-0.521547,-0.005733,63.129587
4360,Laptop,2020-12-27,172.000000,122.346667,-0.185838,-0.966691,72.936456
4361,Laptop,2020-12-28,163.000000,116.900000,0.960097,-0.347966,74.511546


In [6]:
# Drop unnecessary columns for time series analysis
ts_laptop = df_laptop.drop(columns=['Category', 'Price','Market_Trend_Index', 
       'Competitor_Activity_Score', 'Consumer_Confidence_Index'])
ts_laptop['Date'] = pd.to_datetime(ts_laptop['Date'])

ts_smartphone = df_smartphone.drop(columns=['Category', 'Price','Market_Trend_Index',
       'Competitor_Activity_Score', 'Consumer_Confidence_Index'])
ts_smartphone['Date'] = pd.to_datetime(ts_laptop['Date'])

ts_tablet = df_tablet.drop(columns=['Category', 'Price','Market_Trend_Index',
       'Competitor_Activity_Score', 'Consumer_Confidence_Index'])
ts_tablet['Date'] = pd.to_datetime(ts_laptop['Date'])

ts_accessory = df_accessory.drop(columns=['Category', 'Price','Market_Trend_Index',
       'Competitor_Activity_Score', 'Consumer_Confidence_Index'])
ts_accessory['Date'] = pd.to_datetime(ts_laptop['Date'])

In [8]:
# Check for stationarity
from statsmodels.tsa.stattools import adfuller
result = adfuller(ts_laptop['Sales_Volume'])
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")

# If p-value > 0.05, the series is non-stationary
if result[1] < 0.05:
    print("Series is Stationary")
else:
    print("Series is Non-Stationary")

ADF Statistic: -5.984407154363437
p-value: 1.8064856551641487e-07
Series is Stationary


In [9]:
# Set the date as index
ts_laptop.set_index('Date', inplace=True)
ts_smartphone.set_index('Date', inplace=True)
ts_tablet.set_index('Date', inplace=True)
ts_accessory.set_index('Date', inplace=True)

In [10]:
# Check the shape of the dataset
print(f"Shape of the dataset: {ts_laptop.shape}")
print(f"Shape of the dataset: {ts_smartphone.shape}")
print(f"Shape of the dataset: {ts_tablet.shape}")
print(f"Shape of the dataset: {ts_accessory.shape}")

Shape of the dataset: (4363, 1)
Shape of the dataset: (4361, 1)
Shape of the dataset: (4371, 1)
Shape of the dataset: (4360, 1)


### **TIME SERIES ANALYSIS**

#### **Decompose Data**

##### Additive Model

In [None]:
# Specify frequency of the time series
ts_laptop.index = pd.to_datetime(ts_laptop.index)  
ts_laptop = ts_laptop.asfreq('M')

ts_smartphone.index = pd.to_datetime(ts_smartphone.index)  
ts_smartphone = ts_smartphone.asfreq('M')

ts_tablet.index = pd.to_datetime(ts_tablet.index)  
ts_tablet = ts_tablet.asfreq('M')

ts_accessory.index = pd.to_datetime(ts_accessory.index)  
ts_accessory = ts_accessory.asfreq('M')


  ts_laptop = ts_laptop.asfreq('M')
  ts_smartphone = ts_smartphone.asfreq('M')


In [None]:
ts_laptop

Unnamed: 0_level_0,Sales_Volume
Date,Unnamed: 1_level_1
2009-01-01,46.470588
2009-01-02,52.500000
2009-01-03,31.000000
2009-01-04,77.500000
2009-01-05,65.875000
...,...
2020-12-25,177.000000
2020-12-26,210.600000
2020-12-27,172.000000
2020-12-28,163.000000


In [None]:
# Decompose the time series 
decomposition_smartphone = seasonal_decompose(ts_smartphone, model='additive')
