# import modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# Hide warnings

In [None]:
%%javascript
(function(on) {
const e=$( "<a>Setup failed</a>" );
const ns="js_jupyter_suppress_warnings";
var cssrules=$("#"+ns);
if(!cssrules.length) cssrules = $("<style id='"+ns+"' type='text/css'>div.output_stderr { } </style>").appendTo("head");
e.click(function() {
    var s='Showing';  
    cssrules.empty()
    if(on) {
        s='Hiding';
        cssrules.append("div.output_stderr, div[data-mime-type*='.stderr'] { display:none; }");
    }
    e.text(s+' warnings (click to toggle)');
    on=!on;
}).click();
$(element).append(e);
})(true);

# Read csv files

In [None]:
orders_df = pd.read_csv('.//input//orders.csv')
aisles_df = pd.read_csv('.//input//aisles.csv') 
department_df = pd.read_csv('.//input//departments.csv')
order_products__prior_df = pd.read_csv('./input//order_products__prior.csv')
order_products__train_df = pd.read_csv('.//input//order_products__train.csv')
products_df = pd.read_csv('.//input//products.csv')
sample_submission_df = pd.read_csv('.//input//sample_submission.csv')

In [None]:
orders_df.head()

In [None]:
orders_df.shape

In [None]:
isna = orders_df.isna().sum(axis=0)
isna

In [None]:
orders_df.loc[orders_df.order_number==1].count()

In [None]:
order_value_set = orders_df.eval_set.value_counts()
sns.barplot(order_value_set.index,order_value_set.values)
plt.ylabel('Number of occurrences')
plt.show()
print(order_value_set)

In [None]:
order_time = orders_df.order_hour_of_day.value_counts()
plt.figure(figsize=(12,5))
sns.barplot(order_time.index,order_time.values)
plt.xlabel('hour')
plt.ylabel('Count')
plt.title('Frequency of order in hour of day')
plt.show()

In [None]:
days_between_order = orders_df.days_since_prior_order.value_counts()
plt.figure(figsize=(15,6))
sns.barplot(days_between_order.index,days_between_order.values)
plt.xlabel('day')
plt.ylabel('Count')
plt.title('Days betweet prior order and last order')
plt.show()

In [None]:
grouped_df = orders_df.groupby(["order_dow", "order_hour_of_day"])["order_number"].aggregate("count").reset_index()
grouped_df = grouped_df.pivot('order_dow', 'order_hour_of_day', 'order_number')

plt.figure(figsize=(16,6))
sns.heatmap(grouped_df)
plt.title("Frequency of Day of week Vs Hour of day")
plt.show()

In [None]:
order__number = orders_df.groupby(by='user_id')['order_number'].aggregate(np.max).reset_index()
order__number = order__number.order_number.value_counts()
plt.figure(figsize=(20,8))
sns.barplot(order__number.index,order__number.values)
plt.xlabel('Number of orders')
plt.ylabel('Number of Occurrences')
plt.show()

In [None]:
aisles_df.head()

In [None]:
aisles_df.shape

In [None]:
department_df.head()

In [None]:
department_df.shape

In [None]:
order_products__prior_df.head(10)

In [None]:
isna = order_products__prior_df.isna().sum(axis=0)
isna

In [None]:
reordered_product = order_products__prior_df.reordered.value_counts()
ratio = reordered_product/order_products__prior_df.shape[0]
ratio


In [None]:
reordered_product = order_products__train_df.reordered.value_counts()
ratio = reordered_product/order_products__train_df.shape[0]
ratio

In [None]:
orders = order_products__prior_df.groupby("order_id")["reordered"].aggregate("sum").reset_index()
reorders = orders[orders['reordered'] != 0]

ratio = reorders.shape[0]/orders.shape[0]
ratio

In [None]:
orders = order_products__train_df.groupby("order_id")["reordered"].aggregate("sum").reset_index()
reorders = orders[orders['reordered'] != 0]

ratio = reorders.shape[0]/orders.shape[0]
ratio

In [None]:
purchase_in_order = order_products__prior_df.groupby("order_id")["add_to_cart_order"].aggregate("max").reset_index()
purchase_in_order = purchase_in_order.add_to_cart_order.value_counts()

plt.figure(figsize=(18,8))
sns.barplot(purchase_in_order.index, purchase_in_order.values)
plt.ylabel('Number of Occurrences')
plt.xlabel('Number of products in an order', fontsize=12)
plt.show()