### WALMART STORE SALES PREDICTION-FORECASTING

    ********************** WALMART STORE SALES PREDICTION -FORCASTING (ANALYSIS & VISUALIZATION) *******************
                                        (PRE PROCESSING)

In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
#Importing datsets
dfTrain = pd.read_csv('train.csv')
dfFeature = pd.read_csv('features.csv')
dfTest = pd.read_csv('test.csv')
dfStores = pd.read_csv('stores.csv')
submission = pd.read_csv('sampleSubmission.csv')

In [3]:
#Adding information about the stores and features to test and train dataset
dfTrainTmp           = pd.merge(dfTrain, dfStores)
dfTestTmp            = pd.merge(dfTest, dfStores)   

train                = pd.merge(dfTrainTmp, dfFeature)
test                 = pd.merge(dfTestTmp, dfFeature)

#Separating Date in Separate Train Fields
train['Year']        = pd.to_datetime(train['Date']).dt.year
train['Month']       = pd.to_datetime(train['Date']).dt.month
train['Day']         = pd.to_datetime(train['Date']).dt.day
train['Days']        = train['Month']*30+train['Day']

#Log of sales + 4990 as intercept
train['logSales']    = np.log(4990+train['Weekly_Sales'])

#Separating Date in Separate Test Fields
test['Year']         = pd.to_datetime(test['Date']).dt.year
test['Month']        = pd.to_datetime(test['Date']).dt.month
test['Day']          = pd.to_datetime(test['Date']).dt.day
test['Days']         = test['Month']*30+test['Day']

#Print the test and train count
train_count = train['Store'].count()
test_count = test['Store'].count()
print("Total count of instances for train dataset : " + str(train_count))
print("Total count of instances for train dataset : " + str(test_count))

Total count of instances for train dataset : 421570
Total count of instances for train dataset : 115064


In [4]:
#Visualization of analysis (pre-processing)

#Plotting Fuel_Price VS logSales
fig = sns.regplot(x="Fuel_Price", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('Fuel_Price.png')
fig.clf()

#Plotting Temperature VS logSales
fig = sns.regplot(x="Temperature", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('Temperature.png')
fig.clf()

#Plotting Size VS logSales
fig = sns.regplot(x="Size", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('Size.png')
fig.clf()

#Transport TYPE to numeric
train['Type']        = train['Type'].replace('A',1)
train['Type']        = train['Type'].replace('B',2)
train['Type']        = train['Type'].replace('C',3)

#Plotting Type VS logSales
fig = sns.regplot(x="Type", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('Type.png')
fig.clf()

#Plotting IsHoliday VS logSales
fig = sns.regplot(x="IsHoliday", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('isHoliday.png')
fig.clf()

#Plotting Year VS logSales
fig = sns.regplot(x="Year", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('Year.png')
fig.clf()

#Plotting Month VS logSales
fig = sns.regplot(x="Month", y="logSales", data=train)
fig = fig.get_figure()
fig.savefig('Month.png')
fig.clf()

    *************************** WALMART STORE SALES PREDICTION -FORCASTING (PREDICTION MODEL) *********************

In [2]:
## Import Libraries
import numpy as np
import pandas as pd
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.cross_validation import train_test_split
from sklearn import metrics
import seaborn as sns

#######################################################################################################################
#COMMENTS:-
#Defining function prosData
    #Step 1 - Importing train, test, stores, features and sampleSubmission
    #Step 2 - Merging store and feature information train and test dataframe
    #Step 3 - Split the Date Field as year, month and Day and also count the number of days
    #Step 4 - Type converstion categoric to numeric for column 'Type'
    #Step 5 - Counting the days to next Holiday and log of sales + 4990
    #Step 6 - Dropping MarkDown column since it is availible only for 1 year
#######################################################################################################################
############################################ START OF prosData ########################################################
def prosData():
    
    #Reading Database 
    dfTrain = pd.read_csv('train.csv')
    dfFeature = pd.read_csv('features.csv')
    dfTest = pd.read_csv('test.csv')
    dfStores = pd.read_csv('stores.csv')
    submission = pd.read_csv('sampleSubmission.csv')
       
    #Merging store information into the data [Train and Test]
    dfTrainTmp           = pd.merge(dfTrain, dfStores)
    dfTestTmp            = pd.merge(dfTest, dfStores)   
      
    #Merging the feature with the data [Train and Test]
    train                = pd.merge(dfTrainTmp, dfFeature)
    test                 = pd.merge(dfTestTmp, dfFeature)
    
    #Split the field Date
    train['Year']        = pd.to_datetime(train['Date']).dt.year
    train['Month']       = pd.to_datetime(train['Date']).dt.month
    train['Day']         = pd.to_datetime(train['Date']).dt.day
    train['Days']        = train['Month']*30+train['Day'] 

    #Pass Type to numeric 
    train['Type']        = train['Type'].replace('A',1)
    train['Type']        = train['Type'].replace('B',2)
    train['Type']        = train['Type'].replace('C',3)
    
    #Counting the days until the next holiday
    train['daysHoliday'] = train['IsHoliday']*train['Days']
    #Coverting the sales to 
    train['logSales']    = np.log(4990+train['Weekly_Sales'])

    #Same test procedure
    test['Year']         = pd.to_datetime(test['Date']).dt.year
    test['Month']        = pd.to_datetime(test['Date']).dt.month
    test['Day']          = pd.to_datetime(test['Date']).dt.day
    test['Days']         = test['Month']*30+test['Day']
    test['Type']         = test['Type'].replace('A',1)
    test['Type']         = test['Type'].replace('B',2)
    test['Type']         = test['Type'].replace('C',3)
    test['daysHoliday']  = test['IsHoliday']*test['Days']
    
    #Removing date that is in a non-usal format, and the other data that have missing data
    #Weekly_Sales has been transformed so let's remove it
    
    train                = train.drop(['CPI','Unemployment','Date',
                                       'MarkDown1','MarkDown2','MarkDown3', 
                                       'MarkDown4','MarkDown5','Weekly_Sales'],axis=1)
                                       
    test                 = test.drop(['CPI','Unemployment','Date',
                                      'MarkDown1','MarkDown2','MarkDown3',
                                      'MarkDown4','MarkDown5'],axis=1)

    return (train,test,submission)

########################################### END OF prosData ##########################################################

######################################################################################################################
#Comments:-
#Defining main function -
    #STEP 1 - Defining the input and output file for writing prediction Results
    #STEP 2 - Calling prosData to return the train and test dataset
    #STEP 3 - Formating train and test dataset adding count of department, stores and holiday
    #STEP 4 - Random Forest Regressor and AdaboostRegreesor to get the sales prediction
    #STEP 5 - Writting error along with accuracy score
    #STEP 6 - Writing in submission file with prediction of sales
######################################################################################################################

############################################ START OF main ###########################################################
if __name__=="__main__":
    
    sns.set(color_codes=True)
    
    #Output files
    f_Submission_RF       = open('resultRF.csv','w')         #File Submission for RF
    f_Submission_AB       = open('resultAB.csv','w')         #File Submission for AB
    fmetrics_RF           = open('resultRFmetrics.csv','w')  #File with the metrics for RF
    fmetrics_AB           = open('resultABmetrics.csv','w')  #File with the metrics for AB
    
    #Outputs Header   
    f_Submission_RF.write('Id,Weekly_Sales\n')
    f_Submission_AB.write('Id,Weekly_Sales\n')
    fmetrics_RF.write('absolute_error,squared_error,sqrt(squared_error),Accuracy,Store,Department\n')
    fmetrics_AB.write('absolute_error,squared_error,sqrt(squared_error),Accuracy,Store,Department\n')
    
    #Process the data   
    train,test,submission = prosData()
   
    #Random Forest Regressor and AdaboostRegreesor to get the sales prediction
    #Both thealogorithm has been used to compare prediction accuracy 
    RFreg    = RandomForestRegressor(n_estimators=200,min_samples_split=3,n_jobs=2)
    ABreg    = AdaBoostRegressor(n_estimators=200)

    size = submission['Id'].count()
    i=0;
    while (i < size):
        tmpId        = submission['Id'][i]
        tmpStr       = tmpId.split('_')
        tmpStore     = int(tmpStr[0])                                  #Store ID
        tmpDept      = int(tmpStr[1])                                  #Dept ID 
        dataF1       = train.loc[train['Dept']==tmpDept]               #Get the data from Dept ID from all data
        tmpDf        = dataF1.loc[dataF1['Store']==tmpStore]           #Get the data form StoreID from the Department data 
        tmpSL        = tmpDf['Store'].count()                          #Get the store count
        tmpDL        = dataF1['Dept'].count()                          #Get the dept count
        tmpF         = dataF1.loc[train['IsHoliday']==1]               #Get the holiday dates
        dataF1       = pd.concat([dataF1,tmpF*4])                      #Boosting holiday data
        dataF2       = dataF1.loc[dataF1['Store']==tmpStore]           #Filtering Store Data
        testF1       = test.loc[test['Dept']==tmpDept]                 #Test DataFrame for Prediction filtered by dept
        testF1       = testF1.loc[testF1['Store']==tmpStore]           #Test DataFrame for Prediction filtered by store
        testRows     = testF1['Store'].count()                         #Count of stores
        k            = i + testRows
        
        #Purpose of the IF CLAUSE -
        #When the number of stores of the dataframe is very small, 
        #then we will only work with the data of the department

        if (tmpSL < 10) and (tmpDL!=0): 
            print ("Number of Store is less than 10")
            X_train, X_test, y_train, y_test = train_test_split(dataF1.drop(['logSales'],axis=1),np.asarray(dataF1['logSales'], dtype="|S6"))       
            tmpModel_RF_trabalho = RFreg.fit(X_train,np.asarray(y_train,dtype=float))
            tmpModel_RF_Submiss  = RFreg.fit(dataF1.drop(['logSales'],axis=1),
                                 np.asarray(dataF1['logSales'],dtype=float))
            tmpModel_AB_trabalho = ABreg.fit(X_train,np.asarray(y_train,dtype=float))
            tmpModel_AB_Submiss  = ABreg.fit(dataF1.drop(['logSales'],axis=1),
                                 np.asarray(dataF1['logSales'],dtype=float)) 
        else:
            print ("Number of Store is greater than 10")
            X_train, X_test, y_train, y_test = train_test_split(dataF2.drop(['logSales'],axis=1),np.asarray(dataF2['logSales'], dtype="|S6"))          
            tmpModel_RF_trabalho = RFreg.fit(X_train,np.asarray(y_train,dtype=float))
            tmpModel_RF_Submiss  = RFreg.fit(dataF2.drop(['logSales'],axis=1),
                                 np.asarray(dataF2['logSales'],dtype=float))
            tmpModel_AB_trabalho = ABreg.fit(X_train,np.asarray(y_train,dtype=float))
            tmpModel_AB_Submiss  = ABreg.fit(dataF2.drop(['logSales'],axis=1),
                                 np.asarray(dataF2['logSales'],dtype=float))   
            
        #Creating temporary  varaible for both Random forest and Adaboost
        #Storing the value for further calculation

        tmpP_RF_Submiss      = ( np.exp(pd.to_numeric(tmpModel_RF_Submiss.predict(testF1))) - 4990 )
        tmpP_AB_Submiss      = ( np.exp(pd.to_numeric(tmpModel_AB_Submiss.predict(testF1))) - 4990 )
        tmpP_RF_trabalho     = tmpModel_RF_trabalho.predict(X_test)
        tmpP_AB_trabalho     = tmpModel_AB_trabalho.predict(X_test)
            
        #Recording the results of the error values

        fmetrics_RF.write('%f,%f,%f,%f,%f,%f\n'%(metrics.mean_absolute_error(np.asarray(y_test,dtype=float),tmpP_RF_trabalho),
                                metrics.mean_squared_error(np.asarray(y_test,dtype=float) ,tmpP_RF_trabalho),
                                np.sqrt(metrics.mean_squared_error(np.asarray(y_test,dtype=float) ,tmpP_RF_trabalho)),
                                RFreg.score(X_test, np.asarray(y_test,dtype=float))*100,tmpStore,tmpDept))
       
        fmetrics_AB.write('%f,%f,%f,%f,%f,%f\n'%(metrics.mean_absolute_error(np.asarray(y_test,dtype=float),tmpP_AB_trabalho),
                                  metrics.mean_squared_error(np.asarray(y_test,dtype=float) ,tmpP_AB_trabalho),
                                  np.sqrt(metrics.mean_squared_error(np.asarray(y_test,dtype=float) ,tmpP_AB_trabalho)),
                                  ABreg.score(X_test, np.asarray(y_test,dtype=float))*100,tmpStore,tmpDept))

        for j in range(i,k):                                     
        #Writing in final submission file
            f_Submission_RF.write('%s,%s\n'%(submission['Id'][j],tmpP_RF_Submiss[j-i]))
            f_Submission_AB.write('%s,%s\n'%(submission['Id'][j],tmpP_AB_Submiss[j-i]))
        i+=testRows       
        print (i)
    f_Submission_RF.close()
    f_Submission_AB.close()
    fmetrics_RF.close()
    fmetrics_AB.close() 
############################################## END OF main ###########################################################

Number of Store is greater than 10
39
Number of Store is greater than 10
78
Number of Store is greater than 10
117
Number of Store is greater than 10
156
Number of Store is greater than 10
195
Number of Store is greater than 10
234
Number of Store is greater than 10
273
Number of Store is greater than 10
312
Number of Store is greater than 10
351
Number of Store is greater than 10
390
Number of Store is greater than 10
429
Number of Store is greater than 10
468
Number of Store is greater than 10
507
Number of Store is greater than 10
546
Number of Store is greater than 10
585
Number of Store is greater than 10
624
Number of Store is greater than 10
656
Number of Store is greater than 10
695
Number of Store is greater than 10
734
Number of Store is greater than 10
773
Number of Store is greater than 10
812
Number of Store is greater than 10
851
Number of Store is greater than 10
890
Number of Store is greater than 10
929
Number of Store is greater than 10
968
Number of Store is greater 

7707
Number of Store is greater than 10
7746
Number of Store is greater than 10
7785
Number of Store is greater than 10
7824
Number of Store is greater than 10
7863
Number of Store is greater than 10
7902
Number of Store is greater than 10
7923
Number of Store is greater than 10
7962
Number of Store is greater than 10
8001
Number of Store is greater than 10
8040
Number of Store is greater than 10
8053
Number of Store is greater than 10
8092
Number of Store is greater than 10
8131
Number of Store is greater than 10
8170
Number of Store is greater than 10
8209
Number of Store is greater than 10
8248
Number of Store is greater than 10
8287
Number of Store is greater than 10
8326
Number of Store is greater than 10
8365
Number of Store is greater than 10
8404
Number of Store is greater than 10
8443
Number of Store is greater than 10
8482
Number of Store is greater than 10
8521
Number of Store is greater than 10
8560
Number of Store is greater than 10
8599
Number of Store is greater than 10


15053
Number of Store is greater than 10
15087
Number of Store is greater than 10
15126
Number of Store is greater than 10
15165
Number of Store is greater than 10
15204
Number of Store is greater than 10
15243
Number of Store is greater than 10
15282
Number of Store is greater than 10
15321
Number of Store is greater than 10
15360
Number of Store is greater than 10
15399
Number of Store is greater than 10
15438
Number of Store is greater than 10
15477
Number of Store is greater than 10
15516
Number of Store is greater than 10
15555
Number of Store is greater than 10
15594
Number of Store is greater than 10
15633
Number of Store is greater than 10
15672
Number of Store is greater than 10
15711
Number of Store is greater than 10
15750
Number of Store is greater than 10
15789
Number of Store is greater than 10
15828
Number of Store is greater than 10
15867
Number of Store is greater than 10
15906
Number of Store is greater than 10
15945
Number of Store is greater than 10
15984
Number of 

22542
Number of Store is greater than 10
22581
Number of Store is greater than 10
22620
Number of Store is greater than 10
22659
Number of Store is greater than 10
22698
Number of Store is greater than 10
22737
Number of Store is greater than 10
22776
Number of Store is greater than 10
22815
Number of Store is greater than 10
22854
Number of Store is greater than 10
22893
Number of Store is greater than 10
22932
Number of Store is greater than 10
22971
Number of Store is greater than 10
23010
Number of Store is greater than 10
23021
Number of Store is greater than 10
23060
Number of Store is greater than 10
23063
Number of Store is greater than 10
23097
Number of Store is greater than 10
23124
Number of Store is greater than 10
23163
Number of Store is greater than 10
23183
Number of Store is greater than 10
23222
Number of Store is greater than 10
23261
Number of Store is greater than 10
23284
Number of Store is greater than 10
23323
Number of Store is greater than 10
23362
Number of 

29861
Number of Store is greater than 10
29900
Number of Store is greater than 10
29939
Number of Store is greater than 10
29978
Number of Store is greater than 10
30017
Number of Store is greater than 10
30056
Number of Store is greater than 10
30091
Number of Store is greater than 10
30130
Number of Store is greater than 10
30169
Number of Store is greater than 10
30208
Number of Store is greater than 10
30247
Number of Store is greater than 10
30286
Number of Store is greater than 10
30325
Number of Store is greater than 10
30364
Number of Store is greater than 10
30403
Number of Store is greater than 10
30442
Number of Store is greater than 10
30481
Number of Store is greater than 10
30520
Number of Store is greater than 10
30559
Number of Store is greater than 10
30598
Number of Store is greater than 10
30637
Number of Store is greater than 10
30676
Number of Store is greater than 10
30715
Number of Store is greater than 10
30754
Number of Store is greater than 10
30793
Number of 

37295
Number of Store is greater than 10
37334
Number of Store is greater than 10
37373
Number of Store is greater than 10
37412
Number of Store is greater than 10
37451
Number of Store is greater than 10
37490
Number of Store is greater than 10
37492
Number of Store is greater than 10
37531
Number of Store is greater than 10
37570
Number of Store is greater than 10
37597
Number of Store is greater than 10
37636
Number of Store is greater than 10
37675
Number of Store is greater than 10
37714
Number of Store is greater than 10
37753
Number of Store is greater than 10
37792
Number of Store is greater than 10
37831
Number of Store is greater than 10
37870
Number of Store is greater than 10
37909
Number of Store is greater than 10
37948
Number of Store is greater than 10
37987
Number of Store is greater than 10
38026
Number of Store is greater than 10
38065
Number of Store is greater than 10
38104
Number of Store is greater than 10
38143
Number of Store is greater than 10
38182
Number of 

44619
Number of Store is greater than 10
44645
Number of Store is greater than 10
44684
Number of Store is greater than 10
44723
Number of Store is greater than 10
44760
Number of Store is greater than 10
44799
Number of Store is greater than 10
44838
Number of Store is greater than 10
44877
Number of Store is greater than 10
44916
Number of Store is greater than 10
44955
Number of Store is greater than 10
44994
Number of Store is greater than 10
45033
Number of Store is greater than 10
45072
Number of Store is greater than 10
45111
Number of Store is greater than 10
45150
Number of Store is greater than 10
45189
Number of Store is greater than 10
45228
Number of Store is greater than 10
45267
Number of Store is greater than 10
45306
Number of Store is greater than 10
45345
Number of Store is greater than 10
45384
Number of Store is greater than 10
45423
Number of Store is greater than 10
45432
Number of Store is greater than 10
45471
Number of Store is greater than 10
45510
Number of 

52035
Number of Store is greater than 10
52074
Number of Store is greater than 10
52113
Number of Store is greater than 10
52152
Number of Store is greater than 10
52191
Number of Store is greater than 10
52230
Number of Store is greater than 10
52269
Number of Store is greater than 10
52308
Number of Store is greater than 10
52347
Number of Store is greater than 10
52386
Number of Store is greater than 10
52425
Number of Store is greater than 10
52464
Number of Store is greater than 10
52503
Number of Store is greater than 10
52542
Number of Store is greater than 10
52581
Number of Store is greater than 10
52620
Number of Store is greater than 10
52659
Number of Store is greater than 10
52661
Number of Store is greater than 10
52700
Number of Store is greater than 10
52702
Number of Store is greater than 10
52741
Number of Store is greater than 10
52780
Number of Store is greater than 10
52819
Number of Store is greater than 10
52858
Number of Store is greater than 10
52893
Number of 

59450
Number of Store is greater than 10
59489
Number of Store is greater than 10
59528
Number of Store is greater than 10
59567
Number of Store is greater than 10
59606
Number of Store is greater than 10
59645
Number of Store is greater than 10
59684
Number of Store is greater than 10
59723
Number of Store is greater than 10
59757
Number of Store is greater than 10
59796
Number of Store is greater than 10
59835
Number of Store is greater than 10
59874
Number of Store is greater than 10
59913
Number of Store is greater than 10
59952
Number of Store is greater than 10
59991
Number of Store is greater than 10
60030
Number of Store is greater than 10
60069
Number of Store is greater than 10
60108
Number of Store is greater than 10
60147
Number of Store is greater than 10
60186
Number of Store is greater than 10
60225
Number of Store is greater than 10
60264
Number of Store is greater than 10
60303
Number of Store is greater than 10
60342
Number of Store is greater than 10
60381
Number of 

66884
Number of Store is greater than 10
66923
Number of Store is greater than 10
66962
Number of Store is greater than 10
67001
Number of Store is greater than 10
67040
Number of Store is greater than 10
67079
Number of Store is greater than 10
67118
Number of Store is greater than 10
67157
Number of Store is greater than 10
67196
Number of Store is greater than 10
67235
Number of Store is greater than 10
67274
Number of Store is less than 10
67276
Number of Store is greater than 10
67315
Number of Store is greater than 10
67354
Number of Store is greater than 10
67393
Number of Store is greater than 10
67432
Number of Store is greater than 10
67471
Number of Store is greater than 10
67510
Number of Store is greater than 10
67549
Number of Store is greater than 10
67588
Number of Store is greater than 10
67627
Number of Store is greater than 10
67666
Number of Store is greater than 10
67705
Number of Store is greater than 10
67744
Number of Store is greater than 10
67783
Number of Sto

74410
Number of Store is greater than 10
74414
Number of Store is greater than 10
74453
Number of Store is greater than 10
74492
Number of Store is greater than 10
74523
Number of Store is greater than 10
74562
Number of Store is greater than 10
74601
Number of Store is greater than 10
74631
Number of Store is greater than 10
74670
Number of Store is greater than 10
74709
Number of Store is greater than 10
74748
Number of Store is greater than 10
74787
Number of Store is greater than 10
74826
Number of Store is greater than 10
74865
Number of Store is greater than 10
74904
Number of Store is greater than 10
74943
Number of Store is greater than 10
74982
Number of Store is greater than 10
75021
Number of Store is greater than 10
75060
Number of Store is greater than 10
75099
Number of Store is greater than 10
75138
Number of Store is greater than 10
75177
Number of Store is greater than 10
75216
Number of Store is greater than 10
75255
Number of Store is greater than 10
75294
Number of 

81577
Number of Store is greater than 10
81616
Number of Store is greater than 10
81624
Number of Store is greater than 10
81663
Number of Store is greater than 10
81666
Number of Store is greater than 10
81705
Number of Store is greater than 10
81706
Number of Store is greater than 10
81745
Number of Store is greater than 10
81775
Number of Store is greater than 10
81814
Number of Store is greater than 10
81853
Number of Store is greater than 10
81891
Number of Store is greater than 10
81930
Number of Store is greater than 10
81969
Number of Store is greater than 10
82008
Number of Store is greater than 10
82047
Number of Store is greater than 10
82086
Number of Store is greater than 10
82125
Number of Store is less than 10
82127
Number of Store is greater than 10
82166
Number of Store is greater than 10
82205
Number of Store is greater than 10
82244
Number of Store is greater than 10
82283
Number of Store is greater than 10
82322
Number of Store is greater than 10
82361
Number of Sto

88710
Number of Store is less than 10
88712
Number of Store is greater than 10
88751
Number of Store is greater than 10
88790
Number of Store is greater than 10
88829
Number of Store is greater than 10
88868
Number of Store is greater than 10
88876
Number of Store is greater than 10
88915
Number of Store is greater than 10
88917
Number of Store is greater than 10
88956
Number of Store is greater than 10
88995
Number of Store is greater than 10
89034
Number of Store is greater than 10
89054
Number of Store is greater than 10
89093
Number of Store is greater than 10
89132
Number of Store is greater than 10
89171
Number of Store is greater than 10
89209
Number of Store is greater than 10
89248
Number of Store is greater than 10
89287
Number of Store is greater than 10
89326
Number of Store is greater than 10
89365
Number of Store is greater than 10
89404
Number of Store is greater than 10
89443
Number of Store is greater than 10
89482
Number of Store is greater than 10
89521
Number of Sto

95452
Number of Store is greater than 10
95476
Number of Store is greater than 10
95515
Number of Store is greater than 10
95516
Number of Store is greater than 10
95554
Number of Store is greater than 10
95593
Number of Store is greater than 10
95632
Number of Store is greater than 10
95671
Number of Store is less than 10
95672
Number of Store is greater than 10
95691
Number of Store is greater than 10
95730
Number of Store is greater than 10
95769
Number of Store is greater than 10
95808
Number of Store is greater than 10
95847
Number of Store is greater than 10
95886
Number of Store is greater than 10
95925
Number of Store is greater than 10
95963
Number of Store is greater than 10
96002
Number of Store is greater than 10
96041
Number of Store is greater than 10
96080
Number of Store is greater than 10
96119
Number of Store is greater than 10
96158
Number of Store is greater than 10
96197
Number of Store is greater than 10
96236
Number of Store is greater than 10
96275
Number of Sto

102492
Number of Store is greater than 10
102531
Number of Store is greater than 10
102570
Number of Store is greater than 10
102609
Number of Store is greater than 10
102612
Number of Store is greater than 10
102651
Number of Store is greater than 10
102653
Number of Store is greater than 10
102691
Number of Store is greater than 10
102724
Number of Store is greater than 10
102763
Number of Store is greater than 10
102786
Number of Store is greater than 10
102825
Number of Store is greater than 10
102864
Number of Store is greater than 10
102903
Number of Store is greater than 10
102942
Number of Store is greater than 10
102981
Number of Store is greater than 10
103020
Number of Store is greater than 10
103059
Number of Store is greater than 10
103098
Number of Store is greater than 10
103137
Number of Store is greater than 10
103176
Number of Store is greater than 10
103215
Number of Store is greater than 10
103254
Number of Store is greater than 10
103293
Number of Store is greater 

109351
Number of Store is greater than 10
109390
Number of Store is greater than 10
109429
Number of Store is greater than 10
109468
Number of Store is greater than 10
109507
Number of Store is greater than 10
109516
Number of Store is greater than 10
109555
Number of Store is less than 10
109557
Number of Store is greater than 10
109594
Number of Store is greater than 10
109612
Number of Store is greater than 10
109651
Number of Store is greater than 10
109690
Number of Store is less than 10
109693
Number of Store is greater than 10
109732
Number of Store is greater than 10
109771
Number of Store is greater than 10
109810
Number of Store is greater than 10
109849
Number of Store is greater than 10
109888
Number of Store is greater than 10
109927
Number of Store is greater than 10
109966
Number of Store is greater than 10
109976
Number of Store is greater than 10
110015
Number of Store is greater than 10
110054
Number of Store is greater than 10
110093
Number of Store is greater than 1

    ********************** WALMART STORE SALES PREDICTION -FORCASTING (ANALYSIS & VISUALIZATION) *******************
                                    (POST PROCESSING)

In [6]:
#Visualize the metrics outcome of RandomForest and AdaBoost (post-processing)
#Need to be Run it after building prediction model (WALMART_STORE_PREDICTION_MODEL_SOLUTION.ipynb)
results_AB = pd.read_csv('resultABmetrics.csv')
results_RF = pd.read_csv('resultRFmetrics.csv')

#Plotting absolute_error VS Store - RandomForest
fig = sns.regplot(x="Store", y="absolute_error", data=results_AB)
fig = fig.get_figure()
fig.savefig('AbError_Store_AB.png')
fig.clf()

#Plotting absolute_error VS Store - Adaboost
fig = sns.regplot(x="Store", y="absolute_error", data=results_RF)
fig = fig.get_figure()
fig.savefig('AbError_Store_RF.png')
fig.clf()

#Plotting squared_error VS Store  - Adaboost
fig = sns.regplot(x="Store", y="squared_error", data=results_AB)
fig = fig.get_figure()
fig.savefig('SqError_Store_AB.png')
fig.clf()

#Plotting squared_error VS Store  - RandomForest
fig = sns.regplot(x="Store", y="squared_error", data=results_RF)
fig = fig.get_figure()
fig.savefig('.SqError_Store_RF.png')
fig.clf()

#Plotting sqrt(squared_error) VS Store  - Adaboost
fig = sns.regplot(x="Store", y="sqrt(squared_error)", data=results_AB)
fig = fig.get_figure()
fig.savefig('Sqrt_SqError_Store_AB.png')
fig.clf()

#Plotting sqrt(squared_error) VS Store  - random Forest
fig = sns.regplot(x="Store", y="sqrt(squared_error)", data=results_RF)
fig = fig.get_figure()
fig.savefig('Sqrt_SqError_Store_RF.png')
fig.clf()

#Plotting Accuracy VS Store - Adaboost
fig = sns.regplot(x="Store", y="Accuracy", data=results_AB)
fig = fig.get_figure()
fig.savefig('Acuracy_Store_AB.png')
fig.clf()

#Plotting Accuracy VS Store - RandomForest
fig = sns.regplot(x="Store", y="Accuracy", data=results_RF)
fig = fig.get_figure()
fig.savefig('Acuracy_Store_RF.png')
fig.clf()

#Plotting Departament VS absolute_error - AdaBoost
fig = sns.regplot(x="Department", y="absolute_error", data=results_AB)
fig = fig.get_figure()
fig.savefig('AbError_Department_AB.png')
fig.clf()

#Plotting Departament VS absolute_error - RandomForest
fig = sns.regplot(x="Department", y="absolute_error", data=results_RF)
fig = fig.get_figure()
fig.savefig('AbError_Department_RF.png')
fig.clf()

#Plotting Departament VS squared_error - Adaboost
fig = sns.regplot(x="Department", y="squared_error", data=results_AB)
fig = fig.get_figure()
fig.savefig('SqError_Department_AB.png')
fig.clf()

#Plotting Departament VS squared_error - RandomForest
fig = sns.regplot(x="Department", y="squared_error", data=results_RF)
fig = fig.get_figure()
fig.savefig('SqError_Department_RF.png')
fig.clf()

#Plotting Departament VS sqrt(squared_error) - Adaboost
fig = sns.regplot(x="Department", y="sqrt(squared_error)", data=results_AB)
fig = fig.get_figure()
fig.savefig('Sqrt_SqError_Department_AB.png')
fig.clf()

#Plotting Departament VS sqrt(squared_error) - RandomForest
fig = sns.regplot(x="Department", y="sqrt(squared_error)", data=results_RF)
fig = fig.get_figure()
fig.savefig('Sqrt_SqError_Department_RF.png')
fig.clf()

#Plotting Departament VS Accuracy - Adaboost
fig = sns.regplot(x="Department", y="Accuracy", data=results_AB)
fig = fig.get_figure()
fig.savefig('Accuracy_Department_AB.png')
fig.clf()

#Plotting Departament VS Accuracy - RandomForest
fig = sns.regplot(x="Department", y="Accuracy", data=results_RF)
fig = fig.get_figure()
fig.savefig('Accuracy_Department_RF.png')
fig.clf()

    *********************************************** THANK YOU ******************************************************