In [3]:
import pandas as pd
import numpy as np


In [5]:
df = pd.read_csv('/content/cleaned_data.csv')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df.head()


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,TotalAmount
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,20.34


In [6]:
latest_date = df['InvoiceDate'].max()

recency = df.groupby('CustomerID')['InvoiceDate'] \
            .max() \
            .apply(lambda x: (latest_date - x).days)


In [7]:
frequency = df.groupby('CustomerID')['InvoiceNo'].nunique()


In [8]:
monetary = df.groupby('CustomerID')['TotalAmount'].sum()


In [9]:
customer_features = pd.DataFrame({
    'Recency': recency,
    'Frequency': frequency,
    'MonetaryValue': monetary
}).reset_index()


In [10]:
customer_features['AvgTransactionValue'] = (
    customer_features['MonetaryValue'] / customer_features['Frequency']
)


In [11]:
customer_features['CLV'] = (
    customer_features['AvgTransactionValue'] * customer_features['Frequency']
)


In [12]:
df_sorted = df.sort_values(['CustomerID', 'InvoiceDate'])

df_sorted['RollingAvgAmount'] = (
    df_sorted.groupby('CustomerID')['TotalAmount']
    .rolling(window=3, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)


In [13]:
rolling_avg = df_sorted.groupby('CustomerID')['RollingAvgAmount'].mean()
customer_features['RollingAvgAmount'] = customer_features['CustomerID'].map(rolling_avg)


In [14]:
customer_features.describe()
customer_features.head()


Unnamed: 0,CustomerID,Recency,Frequency,MonetaryValue,AvgTransactionValue,CLV,RollingAvgAmount
0,12346,325,1,77183.6,77183.6,77183.6,77183.6
1,12347,1,7,4310.0,615.714286,4310.0,23.766117
2,12348,74,4,1797.24,449.31,1797.24,57.102366
3,12349,18,1,1757.55,1757.55,1757.55,21.402283
4,12350,309,1,334.4,334.4,334.4,19.870588


In [15]:
customer_features.to_csv('/content/customer_features.csv', index=False)
