# 3. Assigning weights

Once we have identified meaningful patterns in the data, we can use them to assign weights to the relevant value counts to inform the predictions. For instance, we can use the amount of times a household bought groceries on a Monday as a weight that influences the probability that the algorithm will predict Monday as a potential grocery day (e.g., if they bought groceires on Monday one time out of eight, it is unlikely that the algorithm predicted that the household would buy groceries on Monday again).

### Import libraries 

In [2]:
# %matplotlib notebook
%matplotlib inline
#Pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
#NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Matplotlib is a plotting library for python and pyplot gives us a MatLab like plotting framework. We will use this in our plotter function to plot data.
import matplotlib.pyplot as plt
#Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics
import seaborn as sns
import dataframe_image as dfi
from datetime import time
import matplotlib.dates as mdates
from matplotlib.ticker import StrMethodFormatter
from matplotlib.pyplot import figure
class bcolors:
    WARNING = '\033[91m'
    BOLD = '\033[1m'
    
SEED = 30
np.random.seed(SEED)

## 3.1. Days of the week


Outcome (example):
1. input: Monday,Tuesday...
2. output: 0 times, 1 time, 2 times...

### Load and view data 

In [None]:
  # data for dow
# df_period1 = pd.read_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_period1.csv")
# df_period2 = pd.read_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_period2.csv")
from DAYS_HH2weights import period1, period2

  #Function to define the limits for total visits, days, same store and type per week
from AlgorithmCOUNTS_HH2 import CountTotalVisits, CountTotalDays, CountStoreName, CountTimePerDay, CountStoreType, CountTotalPerday, CountVisitsPerDay, CountTimingPerDay

In [None]:
df_period1 = pd.concat([period1(), period2()])
df_period1

day,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,sum,ndays,med
0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,4.0,3,1.0
1,0.0,0.0,2.0,2.0,0.0,0.0,3.0,7.0,3,2.0
2,0.0,0.0,0.0,1.0,1.0,1.0,2.0,5.0,4,1.0
3,0.0,0.0,1.0,0.0,0.0,1.0,3.0,5.0,3,1.0
4,2.0,2.0,0.0,0.0,0.0,3.0,0.0,7.0,3,2.0
5,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3,1.0
6,1.0,0.0,0.0,1.0,0.0,1.0,0.0,3.0,3,1.0
7,2.0,0.0,1.0,1.0,1.0,0.0,0.0,5.0,4,1.0


In [None]:
# Assign weights based on how the numbers per day are represented
df_Mondays1 = pd.DataFrame(df_period1['Monday'])
df_Mondays1['weights'] = df_Mondays1.groupby(['Monday'])['Monday'].transform('count')
df_Mondays1 = df_Mondays1.drop_duplicates()

df_Tuesdays1 = pd.DataFrame(df_period1['Tuesday'])
df_Tuesdays1['weights'] = df_Tuesdays1.groupby(['Tuesday'])['Tuesday'].transform('count')
df_Tuesdays1 = df_Tuesdays1.drop_duplicates()

df_Wednesdays1 = pd.DataFrame(df_period1['Wednesday'])
df_Wednesdays1['weights'] = df_Wednesdays1.groupby(['Wednesday'])['Wednesday'].transform('count')
df_Wednesdays1 = df_Wednesdays1.drop_duplicates()

df_Thursdays1 = pd.DataFrame(df_period1['Thursday'])
df_Thursdays1['weights'] = df_Thursdays1.groupby(['Thursday'])['Thursday'].transform('count')
df_Thursdays1 = df_Thursdays1.drop_duplicates()

df_Fridays1 = pd.DataFrame(df_period1['Friday'])
df_Fridays1['weights'] = df_Fridays1.groupby(['Friday'])['Friday'].transform('count')
df_Fridays1 = df_Fridays1.drop_duplicates()

df_Saturdays1 = pd.DataFrame(df_period1['Saturday'])
df_Saturdays1['weights'] = df_Saturdays1.groupby(['Saturday'])['Saturday'].transform('count')
df_Saturdays1 = df_Saturdays1.drop_duplicates()

df_Sundays1 = pd.DataFrame(df_period1['Sunday'])
df_Sundays1['weights'] = df_Sundays1.groupby(['Sunday'])['Sunday'].transform('count')
df_Sundays1 = df_Sundays1.drop_duplicates()

In [None]:
df_period2 = pd.concat([period1(), period2()])

In [None]:
# Assign weights based on how the numbers per day are represented
df_Mondays2 = pd.DataFrame(df_period2['Monday'])
df_Mondays2['weights'] = df_Mondays2.groupby(['Monday'])['Monday'].transform('count')
df_Mondays2 = df_Mondays2.drop_duplicates()

df_Tuesdays2 = pd.DataFrame(df_period2['Tuesday'])
df_Tuesdays2['weights'] = df_Tuesdays2.groupby(['Tuesday'])['Tuesday'].transform('count')
df_Tuesdays2 = df_Tuesdays2.drop_duplicates()

df_Wednesdays2 = pd.DataFrame(df_period2['Wednesday'])
df_Wednesdays2['weights'] = df_Wednesdays2.groupby(['Wednesday'])['Wednesday'].transform('count')
df_Wednesdays2 = df_Wednesdays2.drop_duplicates()

df_Thursdays2 = pd.DataFrame(df_period2['Thursday'])
df_Thursdays2['weights'] = df_Thursdays2.groupby(['Thursday'])['Thursday'].transform('count')
df_Thursdays2 = df_Thursdays2.drop_duplicates()

df_Fridays2 = pd.DataFrame(df_period2['Friday'])
df_Fridays2['weights'] = df_Fridays2.groupby(['Friday'])['Friday'].transform('count')
df_Fridays2 = df_Fridays2.drop_duplicates()

df_Saturdays2 = pd.DataFrame(df_period2['Saturday'])
df_Saturdays2['weights'] = df_Saturdays2.groupby(['Saturday'])['Saturday'].transform('count')
df_Saturdays2 = df_Saturdays2.drop_duplicates()

df_Sundays2 = pd.DataFrame(df_period2['Sunday'])
df_Sundays2['weights'] = df_Sundays2.groupby(['Sunday'])['Sunday'].transform('count')
df_Sundays2 = df_Sundays2.drop_duplicates()

In [None]:
# df_Thursdays1.sort_values(by=['Thursday'])
# df_Fridays1.sort_values(by=['Friday'])
# df_Saturdays1.sort_values(by=['Saturday'])
df_Sundays1.sort_values(by=['Sunday'])

Unnamed: 0,Sunday,weights
0,0.0,4
5,1.0,1
2,2.0,1
1,3.0,2


## 3.2. Store types/names

In [None]:
# Select only one day
Monday = 'Monday'
Tuesday = 'Tuesday'
Wednesday = 'Wednesday'
Thursday = 'Thursday'
Friday = 'Friday'
Saturday = 'Saturday'
Sunday = 'Sunday'

# select only these data for the df
df_Monday = df[df['day'] == 'Monday']
df_Tuesday = df[df['day'] == 'Tuesday']
df_Wednesday = df[df['day'] == 'Wednesday']
df_Thursday = df[df['day'] == 'Thursday']
df_Friday = df[df['day'] == 'Friday']
df_Saturday = df[df['day'] == 'Saturday']
df_Sunday = df[df['day'] == 'Sunday']

In [None]:
df_Mo = df_Monday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_Mo = pd.DataFrame (df_Mo)
df_Mo = df_Mo.reset_index()
df_Mo = df_Mo.loc[~(df_Mo==0).any(axis=1)]
df_Mo = df_Mo.rename(columns={"order_ID": "weight"})

df_Tu = df_Tuesday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_Tu = pd.DataFrame (df_Tu)
df_Tu = df_Tu.reset_index()
df_Tu = df_Tu.loc[~(df_Tu==0).any(axis=1)]
df_Tu = df_Tu.rename(columns={"order_ID": "weight"})

df_We = df_Wednesday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_We = pd.DataFrame (df_We)
df_We = df_We.reset_index()
df_We = df_We.loc[~(df_We==0).any(axis=1)]
df_We = df_We.rename(columns={"order_ID": "weight"})

df_Th = df_Thursday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_Th = pd.DataFrame (df_Th)
df_Th = df_Th.reset_index()
df_Th = df_Th.loc[~(df_Th==0).any(axis=1)]
df_Th = df_Th.rename(columns={"order_ID": "weight"})

df_Fr = df_Friday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_Fr = pd.DataFrame (df_Fr)
df_Fr = df_Fr.reset_index()
df_Fr = df_Fr.loc[~(df_Fr==0).any(axis=1)]
df_Fr = df_Fr.rename(columns={"order_ID": "weight"})

df_Sa = df_Saturday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_Sa = pd.DataFrame (df_Sa)
df_Sa = df_Sa.reset_index()
df_Sa = df_Sa.loc[~(df_Sa==0).any(axis=1)]
df_Sa = df_Sa.rename(columns={"order_ID": "weight"})

df_Su = df_Sunday.groupby(['store_name', 'store_type'])['order_ID'].nunique()
df_Su = pd.DataFrame (df_Su)
df_Su = df_Su.reset_index()
df_Su = df_Su.loc[~(df_Su==0).any(axis=1)]
df_Su = df_Su.rename(columns={"order_ID": "weight"})

In [None]:
df_Mo.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_Mo.csv", index = None, header=True)
df_Tu.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_Tu.csv", index = None, header=True)
df_We.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_We.csv", index = None, header=True)
df_Th.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_Th.csv", index = None, header=True)
df_Fr.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_Fr.csv", index = None, header=True)
df_Sa.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_Sa.csv", index = None, header=True)
df_Su.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_Su.csv", index = None, header=True)

## 3.3. Time

In [None]:
# aggregate afternoon and evening
df_orders['time'] = df_orders['time'].replace(['afternoon', 'evening'], 'afternoon')

# to select only one
store1 = 'Albert Heijn'
store2 = 'Sys'
store3 = 'Okay'
store4 = 'Delhaize'
store5 = 'Versavel Poelman'
store6 = 'Kruidvat'
store7 = 'Brabo'
store8 = 'Ikea'
store9 = 'Carrefour'

# select only these data for the df
df_store1 = df_orders[df_orders['store_name'] == store1]
df_store2 = df_orders[df_orders['store_name'] == store2]
df_store3 = df_orders[df_orders['store_name'] == store3]
df_store4 = df_orders[df_orders['store_name'] == store4]
df_store5 = df_orders[df_orders['store_name'] == store5]
df_store6 = df_orders[df_orders['store_name'] == store6]
df_store7 = df_orders[df_orders['store_name'] == store7]
df_store8 = df_orders[df_orders['store_name'] == store8]
df_store9 = df_orders[df_orders['store_name'] == store9]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_orders['time'] = df_orders['time'].replace(['afternoon', 'evening'], 'afternoon')


In [None]:
df_AH = df_store1.groupby(['time'])['order_ID'].nunique()
df_AH = pd.DataFrame (df_AH)
df_AH = df_AH.reset_index()
df_AH = df_AH.loc[~(df_AH==0).any(axis=1)]
df_AH = df_AH.rename(columns={"order_ID": "weight"})

df_SYS = df_store2.groupby(['time'])['order_ID'].nunique()
df_SYS= pd.DataFrame (df_SYS)
df_SYS = df_SYS.reset_index()
df_SYS = df_SYS.loc[~(df_SYS==0).any(axis=1)]
df_SYS = df_SYS.rename(columns={"order_ID": "weight"})

df_OKAY = df_store3.groupby(['time'])['order_ID'].nunique()
df_OKAY = pd.DataFrame (df_OKAY)
df_OKAY = df_OKAY.reset_index()
df_OKAY = df_OKAY.loc[~(df_OKAY==0).any(axis=1)]
df_OKAY = df_OKAY.rename(columns={"order_ID": "weight"})

df_DELHAIZE = df_store4.groupby(['time'])['order_ID'].nunique()
df_DELHAIZE = pd.DataFrame (df_DELHAIZE)
df_DELHAIZE = df_DELHAIZE.reset_index()
df_DELHAIZE = df_DELHAIZE.loc[~(df_DELHAIZE==0).any(axis=1)]
df_DELHAIZE = df_DELHAIZE.rename(columns={"order_ID": "weight"})

df_VERSAVEL = df_store5.groupby(['time'])['order_ID'].nunique()
df_VERSAVEL = pd.DataFrame (df_VERSAVEL)
df_VERSAVEL = df_VERSAVEL.reset_index()
df_VERSAVEL = df_VERSAVEL.loc[~(df_VERSAVEL==0).any(axis=1)]
df_VERSAVEL = df_VERSAVEL.rename(columns={"order_ID": "weight"})

df_KRUIDVAT = df_store6.groupby(['time'])['order_ID'].nunique()
df_KRUIDVAT = pd.DataFrame (df_KRUIDVAT)
df_KRUIDVAT = df_KRUIDVAT.reset_index()
df_KRUIDVAT = df_KRUIDVAT.loc[~(df_KRUIDVAT==0).any(axis=1)]
df_KRUIDVAT = df_KRUIDVAT.rename(columns={"order_ID": "weight"})

df_BRABO = df_store7.groupby(['time'])['order_ID'].nunique()
df_BRABO = pd.DataFrame (df_BRABO)
df_BRABO = df_BRABO.reset_index()
df_BRABO = df_BRABO.loc[~(df_BRABO==0).any(axis=1)]
df_BRABO = df_BRABO.rename(columns={"order_ID": "weight"})

df_IKEA = df_store8.groupby(['time'])['order_ID'].nunique()
df_IKEA = pd.DataFrame (df_IKEA)
df_IKEA = df_IKEA.reset_index()
df_IKEA = df_IKEA.loc[~(df_IKEA==0).any(axis=1)]
df_IKEA = df_IKEA.rename(columns={"order_ID": "weight"})

df_CARREFOUR = df_store9.groupby(['time'])['order_ID'].nunique()
df_CARREFOUR = pd.DataFrame (df_CARREFOUR)
df_CARREFOUR = df_CARREFOUR.reset_index()
df_CARREFOUR = df_CARREFOUR.loc[~(df_CARREFOUR==0).any(axis=1)]
df_CARREFOUR = df_CARREFOUR.rename(columns={"order_ID": "weight"})

In [None]:
df_AH

Unnamed: 0,time,weight
0,afternoon,1
1,morning,3
2,noon,7


In [None]:
df_AH.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_AHTime.csv", index = None, header=True)
df_SYS.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_SYTime.csv", index = None, header=True)
df_OKAY.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_OKTime.csv", index = None, header=True)
df_DELHAIZE.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_DETime.csv", index = None, header=True)
df_VERSAVEL.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_VETime.csv", index = None, header=True)
df_KRUIDVAT.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_KRTime.csv", index = None, header=True)
df_BRABO.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_BRTime.csv", index = None, header=True)
df_IKEA.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_IKTime.csv", index = None, header=True)
df_CARREFOUR.to_csv (r"C:\Users\20204113\OneDrive - TU Eindhoven\2_Research\1_Groceries\DATA\9th week - narrative (3rd attempt)\HH2\df\df_HH2_CATime.csv", index = None, header=True)

## 3.4. Items

In [3]:
df = pd.read_csv (r"/workspaces/Plenty-in-the-Pantry/database/Groceries_onehousehold1.csv")

df['weights_itemtype'] = df.groupby('item_type')['item_type'].transform('count')

df.to_csv(r"/workspaces/Plenty-in-the-Pantry/database/Groceries_onehousehold1.csv", index=False)