## Meteo Bakery - Combine datasets
This notebook serves to combine sales data with the weather summary statistics.

### import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### load data

In [None]:
sales = pd.read_excel('../data/neueFische_Umsaetze_Baeckerei.xlsx')

In [None]:
weather_stats = pd.read_csv('../data/summary_stats.csv')

### Feature Engineering - Sales

In [None]:
# get basic information on datatypes and missings
sales.info()

In [None]:
# generate location column based on branch
# Filiale 1: U-Bahn
# Filiale 2: Innenstadt
# Filiale 3: Bahnhof

sales['Location'] = sales.Branch.apply(lambda x: 'Metro' if x==1 else 'Center' if x==2 else 'Train_Station')
sales.head()

There are three missing values in the sales data ('SoldTurnver').

In [None]:
# extract time features from Date column
sales['year'] = sales.Date.dt.year
sales['month'] = sales.Date.dt.month
sales['week'] = sales.Date.dt.week
sales['day_of_month'] = sales.Date.dt.day
sales['day_of_week'] = sales.Date.dt.dayofweek

In [None]:
sales.rename(columns={'Date': 'date'}, inplace=True)
sales.head()

### Merge dataframes

In [None]:
weather_stats.info()

In [None]:
# parse date to datetime
weather_stats['date'] = pd.to_datetime(weather_stats['date'])

In [None]:
# merge dataframes
df_joined = sales.merge(weather_stats, on='date', how='left')

In [None]:
df_joined.head(20)

In [None]:
# export combined data to csv file
df_joined.to_csv('../data/data_combined.csv')