# Data Model for predicting the sales of a given item for a given week - Using linear regression

## Data
1. train_data.csv - A dataset with the daily sales of each item
2. validation_data.csv - A dataset of the weekly sales of a selected number of items
3. test_data.csv - A dataset to be completed by perdicted values of sales

### Step 1: Organizing the data
Create a dataset with the weekly sales ('named weekly_data.csv') of the items from the train_data.csv file

In [None]:
# Required dependencies
from csv import DictReader
from datetime import datetime as dt
from datetime import timedelta as td

In [23]:
# A function to filter out the items in each week
def filterItems(train_dataset_file:str, start_date:str, end_date:str) -> dict:
    with open(train_dataset_file) as f:
        data = DictReader(f)

        start_date = dt.strptime(start_date, "%m/%d/%Y")
        end_date = dt.strptime(end_date, "%m/%d/%Y")
        week = td(days=7)
        days_td = (end_date-start_date)
        num_weeks = int((((days_td.total_seconds()//3600)//24)//7))

        weeks = {f'w{j+1}':{'start':start_date+j*week,'items':{}} for j in range(num_weeks)}

        for row in data:
            date = dt.strptime(row['DateID'], '%m/%d/%Y')
            for key, value in weeks.items():

                start,items = value['start'], value['items']
    
                if start<=date<=start+week*4:
                    # If the item was added earlier
                    if row['ItemCode'] in items.keys():
                        items[row['ItemCode']]['WeeklySales']+=int(row['DailySales'])
                    # If the item was not added
                    else:
                        items[row['ItemCode']] = {
                            'CategoryCode':row['CategoryCode'],
                            'ItemCode':row['ItemCode'],
                            'WeeklySales':0
                        }
    return weeks

weeks = filterItems('./train_data.csv', '10/01/2021', '02/13/2022')
print(len(weeks))


{'w{j}': {'start': datetime.datetime(2022, 2, 4, 0, 0), 'items': {'865933': {'CategoryCode': 'category_2', 'ItemCode': '865933', 'WeeklySales': 24}, '1054978': {'CategoryCode': 'category_1', 'ItemCode': '1054978', 'WeeklySales': 66}, '138742': {'CategoryCode': 'category_2', 'ItemCode': '138742', 'WeeklySales': 19}, '1044691': {'CategoryCode': 'category_1', 'ItemCode': '1044691', 'WeeklySales': 37}, '64978': {'CategoryCode': 'category_2', 'ItemCode': '64978', 'WeeklySales': 6}, '48940': {'CategoryCode': 'category_1', 'ItemCode': '48940', 'WeeklySales': 21}, '1064473': {'CategoryCode': 'category_2', 'ItemCode': '1064473', 'WeeklySales': 29}, '839374': {'CategoryCode': 'category_1', 'ItemCode': '839374', 'WeeklySales': 80}, '1003192': {'CategoryCode': 'category_4', 'ItemCode': '1003192', 'WeeklySales': 80}, '913561': {'CategoryCode': 'category_2', 'ItemCode': '913561', 'WeeklySales': 16}, '1006090': {'CategoryCode': 'category_2', 'ItemCode': '1006090', 'WeeklySales': 18}, '130993': {'Cate