# 4.10 Coding Etiquette & Excel Reporting

## This script contains the following points:

### 1. Import Libraries

### 2. Import Data

### 3. Optimise Data Types

### 4. Create Customer Profile - Family 

### 5. Export Data

### 6. Visualise Profile

# 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

# 2. Import Data

In [None]:
# Creating path string to folder
path = r'C:\Users\Admin\Desktop\Instacart Basket Analysis'

In [None]:
# Importing active_customer_sample df from csv
active_customer = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'active_customer_diet_profiles.csv')) 

In [None]:
# Check df
active_customer.shape

In [None]:
active_customer.info()

In [None]:
# drop unnecessary 'Unnamed: 0', 'Unnamed: 0.1', 'Purchasing Profile' column
active_customer = active_customer.drop(columns = ['Unnamed: 0','Unnamed: 0.1','Purchasing Profile','_merge'])

# 3. Optimise Data Types

In [None]:
# Change data type of order_id column to int16
active_customer['order_id'] = active_customer['order_id'].astype('int16')

In [None]:
# Change data type of user_id column to int16
active_customer['user_id'] = active_customer['user_id'].astype('int16')

In [None]:
# Change data type of order_number column to int8
active_customer['order_number'] = active_customer['order_number'].astype('int8')

In [None]:
# Change data type of orders_day_of_week  column to int8
active_customer['orders_day_of_week'] = active_customer['orders_day_of_week'].astype('int8')

In [None]:
# Change data type of hour_order_placed column to int8
active_customer['hour_order_placed'] = active_customer['hour_order_placed'].astype('int8')

In [None]:
# Change data type of days_since_prior_order column to uint16
active_customer['product_id'] = active_customer['product_id'].astype('uint16')

In [None]:
# Change data type of add_to_cart_order column to float16
active_customer['add_to_cart_order'] = active_customer['add_to_cart_order'].astype('float16')

In [None]:
# Change data type of reordered column to int8
active_customer['reordered'] = active_customer['reordered'].astype('int8')

In [None]:
# Change data type of aisle_id column to int8
active_customer['aisle_id'] = active_customer['aisle_id'].astype('int8')

In [None]:
# Change data type of days_since_prior_order column to int8
active_customer['department_id'] = active_customer['department_id'].astype('int8')

In [None]:
# Change data type of prices column to float16
active_customer['prices'] = active_customer['prices'].astype('float16')

In [None]:
# Change data type of max_order column to uint8
active_customer['max_order'] = active_customer['max_order'].astype('uint8')

In [None]:
# Change data type of mean_product_price column to float16
active_customer['mean_product_price'] = active_customer['mean_product_price'].astype('float16')

In [None]:
# Change data type of median_days_since_prior_order column to float16
active_customer['median_days_since_prior_order'] = active_customer['median_days_since_prior_order'].astype('float16')

In [None]:
# Change data type of orders_day_of_week column to int8
active_customer['orders_day_of_week'] = active_customer['orders_day_of_week'].astype('int8')

# 4. Create Customer Profile - Family 

In [None]:
# Check values in 'customer_profile' column
active_customer['customer_profile'].value_counts(dropna = False)

In [None]:
# Use loc() function to create customer profile flag based on family - 'Family'
active_customer.loc[(active_customer['customer_profile'] == 'Middle Aged Married with Kids'), 'family_profile'] = 'Family'

In [None]:
# Use loc() function to create customer profile flag based on family - 'Family'
active_customer.loc[(active_customer['customer_profile'] == 'Older Married with Kids'), 'family_profile'] = 'Family'

In [None]:
# Use loc() function to create customer profile flag based on family - 'Family'
active_customer.loc[(active_customer['customer_profile'] == 'Young Married with Kids'), 'family_profile'] = 'Family'

In [None]:
# Use loc() function to create customer profile flag based on family - 'Family'
active_customer.loc[(active_customer['customer_profile'] == 'Young Individual with Kids'), 'family_profile'] = 'Family'

In [None]:
# Use loc() function to create customer profile flag based on family - 'Individual'
active_customer.loc[(active_customer['customer_profile'] == 'Middle Aged Individual without Kids'), 'family_profile'] = 'Individual'

In [None]:
# Use loc() function to create customer profile flag based on family - 'Individual'
active_customer.loc[(active_customer['customer_profile'] == 'Older Individual without Kids'), 'family_profile'] = 'Individual'

In [None]:
# Use loc() function to create customer profile flag based on family - 'Individual'
active_customer.loc[(active_customer['customer_profile'] == 'Young Individual without Kids'), 'family_profile'] = 'Individual'

In [None]:
# Check values in 'family_profile' column
active_customer['family_profile'].value_counts(dropna = False)

# 5. Export Data

In [None]:
active_customer.to_csv(os.path.join(path,'02 Data','Prepared Data','active_customer_full_profiles.csv'))

# 6. Visualise Profile

In [None]:
# Create bar chart of 'income_profile'
bar_family_profile = active_customer['family_profile'].value_counts().plot.bar()

In [None]:
# Export chart as image file
bar_family_profile.figure.savefig(os.path.join(path, '04 Analysis','Visualisations','bar_family_profile.png'))

In [None]:
# Create crosstab between 'hour_order_placed' and 'family_profile'
crosstab_family_hour = pd.crosstab(active_customer['hour_order_placed'],active_customer['family_profile'],dropna = False)

In [None]:
# Create stacked bar chart
stacked_family_hour = crosstab_family_hour.plot.bar(stacked = True)
plt.legend(title = 'Family Status')

In [None]:
# Export chart as image file
stacked_family_hour.figure.savefig(os.path.join(path, '04 Analysis','Visualisations','stacked_family_hour.png'))

In [None]:
# Create crosstab between 'orders_day_of_week' and 'family_profile'
crosstab_family_day = pd.crosstab(active_customer['orders_day_of_week'],active_customer['family_profile'],dropna = False)

In [None]:
# Create stacked bar chart
stacked_family_day = crosstab_family_day.plot.bar(stacked = True)
plt.legend(title = 'Family Status')

# Label x axis
values = ['Sat','Sun','Mon','Tue','Wed','Thur','Fri']
x = [0,1,2,3,4,5,6]
plt.xticks(x,values)

In [None]:
# Export chart as image file
stacked_family_day.figure.savefig(os.path.join(path, '04 Analysis','Visualisations','stacked_family_day.png'))