In [None]:
import pandas as pd

#### Load customer purchase data from a CSV file

In [None]:
data = pd.read_csv("Customer_Purchase\online_retail.csv")

#### Step 1: Keep the required columns

In [None]:
data = data[['Customer ID', 'InvoiceDate', 'Description', 'Quantity', 'Price']]

#### Step 2: Set the column names according to convenience 

In [None]:
data.rename(columns = {'Description':'Product_Name', 'InvoiceDate':'Purchase_Date', 'Customer ID': 'Customer_ID'}, inplace = True)

#### Step 3: Convert Purchase Date to a datetime format

In [None]:
data['Purchase_Date'] = pd.to_datetime(data['Purchase_Date'])

#### Step 4: Remove duplicates

In [None]:
data = data.drop_duplicates()

#### Step 5: Handle missing data
#### Fill missing Quantity values with the mean

In [None]:
data['Quantity'].fillna(data['Quantity'].mean(), inplace=True)

#### Step 6: Normalize product names

In [None]:
data['Product_Name'] = data['Product_Name'].str.lower()

#### Step 7: Save the processed data to csv

In [None]:
data.to_csv('purchase_history.csv')

#### Step 8: Aggregate data to create a six-month purchase history for each customer

In [None]:
purchase_history = data.groupby(['Customer_ID', pd.Grouper(key='Purchase_Date', freq='M')])['Product_Name'].count().unstack().fillna(0)

#### Step 9: Reset the index and convert NaN values to 0

In [None]:
purchase_history.reset_index(inplace=True)
purchase_history.fillna(0, inplace=True)

#### Display the processed data

In [None]:
purchase_history