In [76]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
import time
from datetime import datetime
import datetime

types = {'CompetitionOpenSinceYear': np.dtype(int),
         'CompetitionOpenSinceMonth': np.dtype(int),
         'CompetitionDistance' : np.dtype(int),
         'StateHoliday': np.dtype(str),
         'Promo2SinceWeek': np.dtype(int),
         'Promo2SinceYear': np.dtype(int),
         'SchoolHoliday': np.dtype(float),
         'PromoInterval': np.dtype(str)}

In [2]:
train = pd.read_csv('D://kishore//train.csv',
                    parse_dates=['Date'], dtype=types)

test = pd.read_csv('D://kishore//test.csv',
                   parse_dates=['Date'],dtype=types)

store = pd.read_csv('D://kishore//store.csv')

In [3]:
def calcDates(df):
    df['Month'] = df.Date.dt.month
    df['Year'] = df.Date.dt.year
    df['Day'] = df.Date.dt.day
    df['WeekOfYear'] = df.Date.dt.weekofyear
    # Year-Month 2015-08 
    # will be used for monthly sale calculation:
    df['YearMonth'] = df['Date'].apply(lambda x:(str(x)[:7]))
    return df


train = pd.merge(train,store,on='Store')
test = pd.merge(test,store,on='Store')

train = calcDates(train)
test = calcDates(test)

In [4]:
def cleanPromoCompetition(df,drop=False):
    # ========== Fixing promo2 ============
    df.PromoInterval.fillna(0,inplace=True)
    monthAsString = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun',
                     7:'Jul', 8:'Aug', 9:'Sept', 10:'Oct', 11:'Nov', 12:'Dec'}

    # Using string format of month names to extract info from promo interval column                 
    df['SMonth'] = df.Month.map(monthAsString)
    # Fixing NaN values in promo interval when there is no promotion
    df.loc[df.PromoInterval==0,'PromoInterval'] = ''

    # New feature: 
    #     IsPromo2Month: 
    #     0 if month is not among PromoInterval
    #     1 if it is


    df['IsPromo2Month'] = 0
    for interval in df.PromoInterval.unique():
        if interval != '':
            for month in interval.split(','):
                condmatch = (df.SMonth == month) & (df.PromoInterval == interval)
                # If promo started this year, Week of Year must be > Promo2SinceWeek
                cond1 = (condmatch & (df.Year == df.Promo2SinceYear)
                         & (df.WeekOfYear >= df.Promo2SinceWeek) )
                # Or If promo started previous year, Week of Year doesn't matter
                cond2 = condmatch & (df.Year > df.Promo2SinceYear)
                fullcond = cond1 | cond2
                df.loc[fullcond, 'IsPromo2Month'] = 1

     # ======= Fixing Competition =============
    df.CompetitionOpenSinceYear.fillna(0,inplace=True)
    df.CompetitionOpenSinceMonth.fillna(0,inplace=True)

    # New feature: 
    #    Competition:
    #    1 if there exist a compettion at date = today
    #    0 otherwise

    df['Competition'] = 0
    cond1 = df.Year > df.CompetitionOpenSinceYear
    cond2 = ((df.Year == df.CompetitionOpenSinceYear)
             & (df.Month >= df.CompetitionOpenSinceMonth))
    fullcond = cond1 | cond2
    df.loc[fullcond, 'Competition'] = 1

    if (drop):
        df = df.drop(['SMonth','PromoInterval','Promo2SinceYear','Promo2SinceWeek'],1)
        df = df.drop(['CompetitionOpenSinceMonth','CompetitionOpenSinceYear'],1)

    return df

train = cleanPromoCompetition(train,drop=True)
test = cleanPromoCompetition(test,drop=True)

In [5]:
trainOpen = train[train.Open == 1][['Store','YearMonth','Sales']]
monthlySale  = trainOpen.groupby(['Store','YearMonth'],as_index=False).mean()


#====== Finding renovated stores ========

renovatedStores = []
for store in train.Store.unique():
    # Renovated stores are close before 2015 for more than 2 month
    if len(monthlySale[monthlySale.Store==store]) < 29:
        renovatedStores.append(store)


#print(renovatedStores)

def createRenovation(df,renovatedStores):

    # New features:
    # StoreRenovated: 1 if it is, 0 otherwise
    # DaysAfterRenovation: 0 if date is before renovation, 1 if it is after
    df['StoreRenovated'] = 0
    df['DaysAfterRenovation'] = 0
    for store in renovatedStores:
        df.loc[df.Store == store,'StoreRenovated'] = 1
        # Renovated stores are back to open state in 2015
        df.loc[(df.Store == store) & (df.Year == 2015), 'DaysAfterRenovation'] = 1

    return df


train = createRenovation(train,renovatedStores)
test  = createRenovation(test,renovatedStores)



monthlySale['MonthSale'] = monthlySale.Sales
monthlySale = monthlySale.drop(['Sales'],1)

# New feature: MonthSale:
# Average of monthly sale for each store
# Adding monthly sale to train set:
train = pd.merge(train,monthlySale,on=['Store','YearMonth'])


# Small NaN Fix on test, only 1 case which is in fact open
test.Open.fillna(1,inplace=True)


#train = train.sort_values(by = 'Date')
train.to_csv('D://kishore//trainCleaned.csv')
test.to_csv('D://kishore//testCleaned.csv')

In [157]:
train_r = pd.read_csv('D://kishore//trainCleaned_new.csv')
test_r = pd.read_csv('D://kishore//testCleaned_new.csv')

train_r.drop(['Unnamed: 0'], axis=1, inplace=True)
test_r.drop(['Unnamed: 0'], axis=1, inplace=True)


In [158]:
train_r['CompetitionDistance'].mean()


1270.0

In [159]:
test_r['CompetitionDistance'].mean()


1270.0

In [160]:
train_r.CompetitionDistance = train_r.CompetitionDistance.fillna(value=train_r['CompetitionDistance'].mean())
test_r.CompetitionDistance = test_r.CompetitionDistance.fillna(value=test_r['CompetitionDistance'].mean())

In [161]:
train_r.isnull().sum()
test_r.isnull().sum()

Id                     0
Store                  0
DayOfWeek              0
Date                   0
Open                   0
Promo                  0
StateHoliday           0
SchoolHoliday          0
StoreType              0
Assortment             0
CompetitionDistance    0
Promo2                 0
Month                  0
Year                   0
Day                    0
WeekOfYear             0
YearMonth              0
IsPromo2Month          0
Competition            0
StoreRenovated         0
DaysAfterRenovation    0
dtype: int64

In [162]:
train_r.columns


Index(['Store', 'DayOfWeek', 'Date', 'Sales', 'Customers', 'Open', 'Promo',
       'StateHoliday', 'SchoolHoliday', 'StoreType', 'Assortment',
       'CompetitionDistance', 'Promo2', 'Month', 'Year', 'Day', 'WeekOfYear',
       'YearMonth', 'IsPromo2Month', 'Competition', 'StoreRenovated',
       'DaysAfterRenovation', 'MonthSale'],
      dtype='object')

In [165]:
train_r.drop(train_r[train_r.Open != 1].index, inplace=True)
train_r.drop(train_r[train_r.Sales == 0].index, inplace=True)

In [166]:
train_r.drop(['YearMonth'], axis=1, inplace=True)
test_r.drop(['YearMonth'], axis=1, inplace=True)

In [167]:
train_r['YearTrend']=0
test_r['YearTrend']=0
train_r['TimeInMonth'] = (train_r['Year'] - 2013)*12 + train_r['Month']
test_r['TimeInMonth'] = (test_r['Year'] - 2013)*12 + test_r['Month']

In [168]:
growthfitnopromo = None
growthfitwithpromo = None
growthfit0nopromo = None
growthfit0withpromo = None
growthfit1nopromo = None
growthfit1withpromo = None

In [169]:
#for i in train_r['Date']:
 #   date_format = '%Y-%m-%d'
  #  a = datetime.datetime.strptime('1899-12-30', date_format)
   # b = datetime.datetime.strptime(d, date_format)
   # delta = b - a
   # train_r[i, 'date']= delta
            

In [None]:
Store =  train_r[(train_r['Store']==1) & (train_r['Month'] !=12)][['Store', 'DayOfWeek', 'Date', 'Sales', 
                                                                                'Customers', 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation', 'MonthSale']]

Store_test =  test_r[(test_r['Store']==1) & (test_r['Month'] !=12)][['Store', 'DayOfWeek', 'Date',  
                                                                                 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation']]


In [None]:
for i in train_r['Store'].unique():
    Store =  train_r[(train_r['Store']==1) & (train_r['Month'] !=12)][['Store', 'DayOfWeek', 'Date', 'Sales', 
                                                                                'Customers', 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation', 'MonthSale']]

    Store_test =  test_r[(test_r['Store']==1) & (test_r['Month'] !=12)][['Store', 'DayOfWeek', 'Date',  
                                                                                 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation']]

    if Store['StoreRenovated'].iloc[1] == 0:
        store_subset = Store.loc[Store['Promo']==0]
        x = store_subset['Date']
        x = x.reshape(-1, 1)
        y= store_subset['Sales']
        y = y.reshape(-1, 1)
        growthfitnopromo = linear_model.LinearRegression()
        model = growthfitnopromo.fit(x, y)
        print('Coefficients: \n', growthfitnopromo.coef_)
        xx_subset = train_r.loc[train_r['Store']==1]
        xx_subset = train_r.loc[train_r['Promo']==0]
        xx = xx_subset['Date']
        xx = xx.reshape(-1, 1)
        yy = regr.predict(xx)
        train_r[(train_r['Store']==1) & (train_r['Promo'] ==0)][['YearTrend']] = yy
        
        if s in test_r['Store'].unique():
            xx_test_subset = test_r.loc[test_r['Store'] == s & test_r['Promo'] == 0]
            xx_test = xx_test_subset['Date']
            yy_test = growthfitnopromo.predict(yy_test)
            test_r[(test_r['Store']== s) & (test_r['Promo'] == 0)][['YearTrend']] = yy
            
        #======= With Promo =============
        promo_subset = train_r.loc[train_r['Promo']==1]
        xx = promo_subset['Date']
        xx = xx.reshape(-1, 1)
        yy = promo_subset['Sales']
        y = y.reshape(-1, 1)
        growthfitwithpromo = linear_model.LinearRegression()
        growthfitwithpromo = growthfitwithpromo.fit(xx, yy)
        print('Coefficients: \n', growthfitwithpromo.coef_)
        xx_subset = train_r.loc[train_r['Store']==1]
        xx_subset = train_r.loc[train_r['Promo']==0]
        xx = xx_subset['Date']
        xx = xx.reshape(-1, 1)
        yy = regr.predict(xx)
        train_r[(train_r['Store']==1) & (train_r['Promo'] ==0)][['YearTrend']] = yy
        
        plt.scatter(train_r['Store'], train_r['Sales'],  color='black')
        plt.plot(train_r['Store'], train_r['Sales'], color='blue', linewidth=3)

        plt.xticks(())
        plt.yticks(())

        plt.show()
        
         if s in test_r['Store'].unique():
            xx_test_subset = test_r.loc[test_r['Store'] == s & test_r['Promo'] == 0]
            xx_test = xx_test_subset['Date']
            yy_test = growthfitwithpromo.predict(yy_test)
            test_r[(test_r['Store']== s) & (test_r['Promo'] == 0)][['YearTrend']] = yy
  
    else:
         #=======Without Promo =============
         # Fitting before renovation:
         store_subset = Store.loc[Store['DaysAfterRenovation']==0 & Store['Promo']==0]
         x = store_subset['Date']
         x = x.reshape(-1, 1)
         y= store_subset['Sales']
         y = y.reshape(-1, 1)
         growthfit0nopromo = linear_model.LinearRegression()
         growthfit0nopromo = growthfit0nopromo.fit(x, y)
         print('Coefficients: \n', growthfit0nopromo.coef_)
         xx_subset = train_r.loc[train_r['DaysAfterRenovation']==0]
         xx_subset = train_r.loc[train_r['Promo']==0]
         xx = xx_subset['Date']
         xx = xx.reshape(-1, 1)
         yy = growthfit0nopromo.predict(xx)
         train_r[(train_r['DaysAfterRenovation']==1) & (train_r['Promo'] ==0)][['YearTrend']] = yy
        
         #=======With Promo =============
         store_subset = Store.loc[Store['DaysAfterRenovation']==0 & Store['Promo']==1]
         x = store_subset['Date']
         x = x.reshape(-1, 1)
         y= store_subset['Sales']
         y = y.reshape(-1, 1)
         growthfit0withpromo = linear_model.LinearRegression()
         growthfit0withpromo = growthfit0withpromo.fit(x, y)
         print('Coefficients: \n', growthfit0withpromo.coef_)
         xx_subset = train_r.loc[train_r['DaysAfterRenovation']==0]
         xx_subset = train_r.loc[train_r['Promo']==1]
         xx = xx_subset['Date']
         xx = xx.reshape(-1, 1)
         yy = growthfit0withpromo.predict(xx)
         train_r[(train_r['DaysAfterRenovation']==1) & (train_r['Promo'] ==0)][['YearTrend']] = yy
         
         # Fitting to after renovation: 
         #=======Without Promo =============
         store_subset = Store.loc[Store['DaysAfterRenovation']== 1 & Store['Promo']==0]
         x = store_subset['Date']
         x = x.reshape(-1, 1)
         y= store_subset['Sales']
         y = y.reshape(-1, 1)
         growthfit1nopromo = linear_model.LinearRegression()
         growthfit1nopromo = growthfit1nopromo.fit(x, y)
         print('Coefficients: \n', growthfit1nopromo.coef_)
         xx_subset = train_r.loc[train_r['DaysAfterRenovation']==1]
         xx_subset = train_r.loc[train_r['Promo']==0]
         xx = xx_subset['Date']
         xx = xx.reshape(-1, 1)
         yy = growthfit1nopromo.predict(xx)
         train_r[(train_r['DaysAfterRenovation']==1) & (train_r['Promo'] ==0)][['YearTrend']] = yy
        
         if r in test_r['Store'].unique():
            xx_test_subset = test_r.loc[test_r['Store'] == r & test_r['Promo'] == 0]
            xx_test = xx_test_subset['Date']
            yy_test = growthfit1nopromo.predict(yy_test)
            test_r[(test_r['Store']== s) & (test_r['Promo'] == 0)][['YearTrend']] = yy

         #=======With Promo =============
         store_subset = Store.loc[Store['DaysAfterRenovation']== 1 & Store['Promo']==1]
         x = store_subset['Date']
         x = x.reshape(-1, 1)
         y= store_subset['Sales']
         y = y.reshape(-1, 1)
         growthfit1withpromo = linear_model.LinearRegression()
         model = growthfit1withpromo.fit(x, y)
         print('Coefficients: \n', model_after_renovation_with_promo.coef_)
         xx_subset = train_r.loc[train_r['DaysAfterRenovation']==1]
         xx_subset = train_r.loc[train_r['Promo']==1]
         xx = xx_subset['Date']
         xx = xx.reshape(-1, 1)
         yy = growthfit1withpromo.predict(xx)
         train_r[(train_r['DaysAfterRenovation']==1) & (train_r['Promo'] == 1)][['YearTrend']] = yy
        
         if r in test_r['Store'].unique():
            xx_test_subset = test_r.loc[test_r['Store'] == r & test_r['Promo'] == 0]
            xx_test = xx_test_subset['Date']
            yy_test = growthfit1withpromo.predict(yy_test)
            test_r[(test_r['Store']== s) & (test_r['Promo'] == 0)][['YearTrend']] = yy


list_of_fits = []
list_of_fits.append(growthfitnopromo)
list_of_fits.append(growthfitwithpromo)
list_of_fits.append(growthfit0nopromo)
list_of_fits.append(growthfit0withpromo)
list_of_fits.append(growthfit1nopromo)
list_of_fits.append(growthfit1withpromo)
print (list_of_fits)

train_r['Sales2'] = train_r['Sales']
train_r['Sales'] = train_r['YearTrend']


# Monthly Trend


In [None]:
monthfit = NULL
train_r['MonthTrend'] = 0
test_r['MonthTrend'] = 0
for i in train_r['Store'].unique():
    Store =  train_r[(train_r['Store']==i)][['Store', 'DayOfWeek', 'Date', 'Sales', 
                                                                                'Customers', 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation', 'MonthSale']]
    y = [Store['Sales'],Store['Day'],mean]
    x = int(y)
    monthfit = linear_model.LinearRegression()
    model = monthfit.fit(x, y)
    print('Coefficients: \n', monthfit.coef_)
    xx_subset = train_r.loc[train_r['Store']==i]
    xx = xx_subset['Day']
    xx = xx.reshape(-1, 1)
    yy = monthfit.predict(xx)
    train_r[(train_r['Store']==i)][['MonthTrend ']] = yy
    if r in test_r['Store'].unique():
        xx_test_subset = test_r.loc[test_r['Store'] == r]
        xx_test = xx_test_subset['Day']
        yy_test = monthfit.predict(yy_test)
        test_r[(test_r['Store']== r)[['MonthTrend']] = yy
        
    train_r['Sales3'] = train_r['Sales']
    train_r['Sales'] = train_r['Sales'] - train_r['MonthTrend']

In [None]:
# Exploring the monthly trend:
monthfit <- NULL
train$MonthTrend <- 0
test$MonthTrend <- 0
for (i in unique(train$Store)) {
  cat("doing Store =",i,"\n")
  Store <- train[train$Store==i ,]
  y <- by(Store$Sales, Store$Day, mean)
  x <- as.integer(names(y))
  monthfit[[i]] <- lm(y~ns(x,df=5))
  plot(x,y,pch=19,xlab="DayofMonth",ylab="sale variation",main = paste0("Store = ",as.character(i)))

  xx <- train[train$Store==i,]$Day
  yy <- predict(monthfit[[i]], data.frame(x=xx))
  train[train$Store==i,]$MonthTrend <- yy

  if (i %in% unique(test$Store)) {
    xx <- test[test$Store==i,]$Day
    yy <- predict(monthfit[[i]],data.frame(x=xx))
    test[test$Store==i,]$MonthTrend <- yy
    points(test[test$Store==i,]$Day,test[test$Store==i,]$MonthTrend,col="blue",pch=19)
  }
  fname<-paste0("Store_",as.character(i),"_MonthTrend.png")
  dev.copy(png,file=fname); dev.off()
}

save(list=c("monthfit"),file="~/all_monthfits.RData")

train$Sales3 <- train$Sales
train$Sales <- train$Sales - train$MonthTrend


#save(list=c("train"),file="~/train_set_with_Month_Trend.RData")
#save(list=c("test"),file="~/test_set_with_Month_Trend.RData")


train$SaleSD <- 1
test$SaleSD <- 1
for (i in unique(train$Store)) {
  sd_sale <- sd(train[train$Store==i ,]$Sales)
  cat("doing Store i=",i,"sd=",sd_sale,"\n")
  train[train$Store==i,]$SaleSD <- sd_sale
  if (i %in% unique(test$Store)) {
    test[test$Store==i,]$SaleSD <- sd_sale
  }
}

train$Sales4 <- train$Sales
# Normalizing sale of stores by their standard deviation
train$Sales <- train$Sales / train$SaleSD

write_csv(train, "~/kaggleout/rossman/train_normalized.csv")
write_csv(test,"~/kaggleout/rossman/test_normalized.csv")


In [207]:
x = Store['Date']
x = x.reshape(-1, 1)
y= Store['Sales']
y = y.reshape(-1, 1)
x_pred = Store_test['Date']
x_pred = x_pred.reshape(-1, 1)

  
  after removing the cwd from sys.path.
  


In [208]:
regr = linear_model.LinearRegression()

# Train the model using the training sets
model = regr.fit(x, y)

In [209]:
print('Coefficients: \n', regr.coef_)

Coefficients: 
 [[-0.56555049]]


In [210]:
y_pred = regr.predict(x_pred)


In [214]:
Store =  train_r[(train_r['Store']==1) & (train_r['Month'] !=12)][['Store', 'DayOfWeek', 'Date', 'Sales', 
                                                                                'Customers', 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation', 'MonthSale']]
x = Store['Date']
x = x.reshape(-1, 1)
y= Store['Sales']
y = y.reshape(-1, 1)
regr = linear_model.LinearRegression()

# Train the model using the training sets
model = regr.fit(x, y)
print('Coefficients: \n', regr.coef_)
xx = 

Coefficients: 
 [[-0.56555049]]


  if __name__ == '__main__':
  # This is added back by InteractiveShellApp.init_path()


In [223]:
xx_subset = train_r.loc[train_r['Store']==1]
xx_subset = train_r.loc[train_r['Promo']==0]
xx = xx_subset['Date']
xx = xx.reshape(-1, 1)

  after removing the cwd from sys.path.


In [233]:
yy = regr.predict(xx) 
yy.shape

(431, 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [None]:
if (i in train_r['Store'].unique()):
    

In [None]:
  if (i %in% unique(test$Store)) {
    xx <- as.integer(test[test$Store==i & test$Promo==0,]$Date)
    yy <- predict(growthfitnopromo[[i]],data.frame(x=xx))
    test[test$Store==i & test$Promo==0,]$YearTrend <- yy
  }

In [82]:
Store =  train_r[(train['Store']==1) & (train['Month'] !=12)][['Store', 'DayOfWeek', 'Date', 'Sales', 
                                                                                'Customers', 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation', 'MonthSale']]
if Store['StoreRenovated'].iloc[1] == 0:
    date = ((Store['Date']))
    for d in date:
        date_format = '%Y-%m-%d'
        a = datetime.datetime.strptime('1899-12-30', date_format)
        b = datetime.datetime.strptime(d, date_format)
        delta = b - a
        print (delta.days) # that's it


#    y = Store[Store['Promo']Promo==0,]Store['Sales']
    

42211
42204
42197
42190
42183
42176
42169
42162
42159
42155
42149
42148
42141
42138
42134
42127
42125
42120
42113
42106
42100
42099
42097
42092
42085
42078
42071
42064
42057
42050
42043
42036
42029
42022
42015
42008
42005
41973
41966
41959
41952
41945
41938
41931
41924
41917
41915
41910
41903
41896
41889
41882
41875
41868
41861
41854
41847
41840
41833
41826
41819
41812
41809
41805
41799
41798
41791
41788
41784
41777
41770
41763
41760
41756
41750
41749
41747
41742
41735
41728
41721
41714
41707
41700
41693
41686
41679
41672
41665
41658
41651
41644
41640
41602
41595
41588
41581
41574
41567
41560
41553
41550
41546
41539
41532
41525
41518
41511
41504
41497
41490
41483
41476
41469
41462
41455
41448
41441
41434
41427
41424
41420
41414
41413
41406
41403
41399
41395
41392
41385
41378
41371
41365
41364
41362
41357
41350
41343
41336
41329
41322
41315
41308
41301
41294
41287
41280
41275


  """Entry point for launching an IPython kernel.


In [83]:
def excel_date(date1):
    date_format = '%Y-%m-%d'
    a = datetime.datetime.strptime('1899-12-30', date_format)
    b = datetime.datetime.strptime(date1, date_format)
    delta = b - a
    return delta

In [61]:
 Store =  train_r[(train['Store']==1114) & (train['Month'] !=12)][['Store', 'DayOfWeek', 'Date', 'Sales', 
                                                                                'Customers', 'Open', 'Promo','StateHoliday', 
                                                                                'SchoolHoliday', 'StoreType', 'Assortment',
                                                                                'CompetitionDistance', 'Promo2', 'Month',
                                                                                'Year', 'Day', 'WeekOfYear','IsPromo2Month',
                                                                                'Competition', 'StoreRenovated',
                                                                                'DaysAfterRenovation', 'MonthSale']]
    

  """Entry point for launching an IPython kernel.


In [88]:
x

datetime.timedelta(41275)