In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [8]:
mandi_data = pd.read_csv("Seasonality_Analysis_data.csv")
mandi_data['date'] = pd.to_datetime(mandi_data['date'])

In [12]:
mandi_data.head()

Unnamed: 0,APMC,commodity,Year,Month,arrivals_in_qtl,min_price,max_price,modal_price,date,district_name
0,Yeotmal,soybean,2015,April,12384,3311,3739,3527.0,2015-04-01,Yewatmal
1,Yeotmal,soybean,2016,April,20978,3661,4045,3857.0,2016-04-01,Yewatmal
2,Yeotmal,soybean,2015,August,5958,3029,3321,3240.0,2015-08-01,Yewatmal
3,Yeotmal,soybean,2016,August,3539,3089,3438,3282.0,2016-08-01,Yewatmal
4,Yeotmal,soybean,2014,December,18921,3014,3300,3163.0,2014-12-01,Yewatmal


# Creating Seasonality Type Detection Function that adds a new column to the DataFrame with-
-  Seasonality types
  -  Multiplicative ( Observed = Level* Trend* Seasonal* Noise )
  -  Additive       ( Observed = Level+ Trend+ Seasonal+ Noise )
  

#  Using the seasonal_decompose method to detect the type of Seasonality using the function below

-  We work with quarterly frequency, to appropriately judge Se

In [67]:
def Seasonality_Type(apmc,commodity):
    dframe = mandi_data.groupby(['APMC','commodity']).get_group((apmc,commodity))[['date','modal_price']]
    dframe['date'] = pd.to_datetime(dframe['date'])
    dframe = dframe.sort_values(by=['date'])
    dframe = dframe.set_index('date')
    type_add = seasonal_decompose(dframe,model='additive', freq = 3).resid[1:-1] # Quarterly Analysis, freq=3, every 3 values
    type_mul = seasonal_decompose(dframe,model='multiplicative', freq = 3).resid[1:-1] # Resid Array Slicing
    detect_add = np.linalg.norm(acf(type_add)) # Correlation fucntion 'acf' to detect type
    detect_mul = np.linalg.norm(acf(type_mul))
    if(detect_add>detect_mul):
        return 'Multiplicative'
    else:
        return 'Additive'

# Creating a Temporary DataFrame to extract unique pairs of APMC's and Commodities

In [36]:
temp_df= mandi_data.groupby(['APMC','commodity']).size().reset_index().rename(columns={0:'Frequency'})

In [38]:
temp_df.head()

Unnamed: 0,APMC,commodity,Frequency
0,Aamgaon,paddy-unhusked,27
1,Akhadabalapur,soybean,27
2,Akluj,bitter gourd,27
3,Akluj,brinjal,26
4,Akluj,cabbage,27


# Detecting the Seasonality Type for each unique pair, and creating the final Dataframe

In [68]:
Seasonality_Detected = pd.DataFrame()
for i,row in temp_df.iterrows():
    ses_type = Seasonality_Type(row.APMC,row.commodity)
    df = mandi_data[(mandi_data['APMC']==row.APMC)&(mandi_data['commodity']==row.commodity)]
    df['Seasonality_Type'] = ses_type
    Seasonality_Detected=pd.concat([df,Seasonality_Detected])
    

# Observing the Data to look for any anomaly or Algorithmic/Coding error 

In [69]:
show_df = Seasonality_Detected.groupby(['APMC','commodity','Seasonality_Type']).size().reset_index()

In [72]:
show_df.Seasonality_Type.value_counts()

Additive          441
Multiplicative    404
Name: Seasonality_Type, dtype: int64

# We don't observe significant Skewness in the Data based on the frequency of Types, above.




# Trying to hard-code the seasonal_decompose method, creating new Function

-  Manually Adding a Quarter Column as we are Analysing Quarterly

In [82]:
mandi_data['Quarter'] = pd.to_datetime(mandi_data.date).dt.quarter

In [84]:
mandi_data.head()

Unnamed: 0,APMC,commodity,Year,Month,arrivals_in_qtl,min_price,max_price,modal_price,date,district_name,Quarter
0,Yeotmal,soybean,2015,April,12384,3311,3739,3527.0,2015-04-01,Yewatmal,2
1,Yeotmal,soybean,2016,April,20978,3661,4045,3857.0,2016-04-01,Yewatmal,2
2,Yeotmal,soybean,2015,August,5958,3029,3321,3240.0,2015-08-01,Yewatmal,3
3,Yeotmal,soybean,2016,August,3539,3089,3438,3282.0,2016-08-01,Yewatmal,3
4,Yeotmal,soybean,2014,December,18921,3014,3300,3163.0,2014-12-01,Yewatmal,4


# We go through the following Process-

-  First, remove the trend from the observed, using rolling mean with quarterly window
-  Second,get the seasonal component from the remaining ,using mean over grouped quarters
-  Third, get the residual component of the price by substracting seasonal component from the trend component
-  Lastly, use ACF again and find the Type

In [111]:
def Seasonality_Detect(apmc,comm):
    dframe = mandi_data.groupby(['APMC','commodity']).get_group((apmc,comm))[['date','modal_price','Quarter']]
    dframe['date'] = pd.to_datetime(dframe['date'])
    dframe = dframe.sort_values(by=['date'])
    dframe = dframe.set_index('date')
    # FIRST
    dframe['Trend'] = dframe['modal_price'].rolling(window = 2).mean() # To know the real trend of the data, by averaging
    dframe['add_remove'] = dframe['modal_price'] - dframe['Trend'] # Additive type, Trend Substracted
    dframe['mul_remove'] = dframe['modal_price'] / dframe['Trend'] # Multiplicative type, Trend Divided
    #SECOND
    add_qrt_mean = dframe.groupby(['Quarter'])[['add_remove','Quarter']].mean().to_dict()['add_remove']
    mul_qrt_mean = dframe.groupby(['Quarter'])[['mul_remove','Quarter']].mean().to_dict()['mul_remove']
    
    dframe['add_ses_com'] = dframe['Quarter'].apply(lambda x: add_qrt_mean[x])
    dframe['mul_ses_com'] = dframe['Quarter'].apply(lambda x: mul_qrt_mean[x])
    #THIRD
    dframe['add_residual'] = dframe['add_remove'] - dframe['add_ses_com']
    dframe['mul_residual'] = dframe['mul_remove'] - dframe['mul_ses_com']
    #FOURTH and LAST
    detect_add = sum(np.asarray(acf(dframe['add_residual']))*2) # Correlation fucntion 'acf' to detect type
    detect_mul = sum(np.asarray(acf(dframe['mul_residual']))*2)
    if(detect_add>detect_mul):
        return 'Multiplicative'
    else:
        return 'Additive'
    
    

In [112]:
temp_df= mandi_data.groupby(['APMC','commodity']).size().reset_index().rename(columns={0:'Frequency'})
Seasonality_Detected_2 = pd.DataFrame()
for i,row in temp_df.iterrows():
    ses_type = Seasonality_Detect(row.APMC,row.commodity)
    df = mandi_data[(mandi_data['APMC']==row.APMC)&(mandi_data['commodity']==row.commodity)]
    df['Seasonality_Type'] = ses_type
    Seasonality_Detected_2=pd.concat([df,Seasonality_Detected_2])

In [None]:
show_df = Seasonality_Detected_2.groupby(['APMC','commodity','Seasonality_Type']).size().reset_index()
show_df.Seasonality_Type.value_counts()

# We finally export the Data,for Deseasonalizing based on Detected Type

In [115]:
Seasonality_Detected.to_csv('Seasonality_Type_Detected_Data.csv',index=False)