**Import of required libraries**

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
import yellowbrick

**Records of the given dataset**

In [None]:
data = pd.read_csv("data.csv", delimiter=',', encoding = "ISO-8859-1")
data.head(10)

**Information of the columns in a data**

In [None]:
data.info()

**Description of numeric features**

In [None]:
data.describe(exclude=['int64', 'float64']).T

**Check for null values**

In [None]:
data.isnull().sum()

**Shape of the available data**

In [None]:
data.shape

**Delete all the missing records**

In [12]:
data.dropna(inplace=True)

In [None]:
data.shape

In [None]:
data.isnull().sum()

**Checking of whether negative values present in Quantity and Unit Price**

In [None]:
data[data['Quantity']<0]

In [None]:
data[data['UnitPrice']<0].shape[0]

**Drop the negative values or quantity**

In [17]:
data.drop(data[data['Quantity'] < 0].index, inplace=True)

In [None]:
data.shape

**Calculation of total amount or monetary value**

In [None]:
data['Sales'] = data['Quantity'] * data['UnitPrice']
new_data = data.groupby('CustomerID')['Sales'].sum().reset_index()
new_data

In [None]:
type(new_data)

**Calculating the number of transactions of each customer**

In [None]:
num_trans = data.groupby('CustomerID')['InvoiceNo'].count().reset_index()
num_trans

In [None]:
type(num_trans)

**Calculate Last Transaction**

In [23]:
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])

In [None]:
data['InvoiceDate']

In [25]:
data['Last Transaction'] = (data['InvoiceDate'].max() - data['InvoiceDate']).dt.days

In [None]:
data['Last Transaction']

In [None]:
data.head(10)

In [28]:
lt = data.groupby(['CustomerID','Country'])['Last Transaction'].max().reset_index()

In [None]:
lt

In [None]:
merge_table = pd.merge(lt, num_trans, how='inner', on='CustomerID')
new_df = pd.merge(merge_table, new_data, how='inner', on='CustomerID')
new_df

**Removal of Outliers using boxplot**

In [None]:
plt.figure(figsize=(20,20))
sns.boxplot(data = new_df[['InvoiceNo', 'Last Transaction', 'Sales']])
plt.show()

In [32]:
IQR = new_df['Sales'].quantile(0.75) - new_df['Sales'].quantile(0.25)
lower_limit = new_df['Sales'].quantile(0.25) - 1.5*IQR
upper_limit = new_df['Sales'].quantile(0.75) + 1.5*IQR
new_df_iqr = new_df[(new_df['Sales'] < upper_limit) & (new_df['Sales']>lower_limit)]
new_df_iqr.shape

(1004, 5)

In [None]:
plt.figure(figsize=(20,20))
sns.boxplot(data = new_df_iqr[['InvoiceNo', 'Last Transaction', 'Sales']])
plt.show()

In [None]:
new_df_iqr.reset_index(drop=True, inplace=True)
new_df_iqr

**Visualization of correlation matrix using heatmap**

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(new_df_iqr.select_dtypes(include='number').corr(),cmap="Greens", annot=True)
plt.show()

**Normalization of given dataset using MinMaxScaler**

In [36]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
new2_df= new_df_iqr[['Last Transaction','InvoiceNo','Sales']]
scaler = MinMaxScaler()
scaled_df = scaler.fit_transform(new2_df)
scaled_df = pd.DataFrame(scaled_df)
scaled_df.columns = ['Last Transaction','InvoiceNo','Sales']
scaled_df['Country'] = new_df_iqr['Country']
scaled_df

**K Means Clustering - Plot the graph using elbow method**

In [None]:
df_k=scaled_df.drop(columns=['Country'],axis=1)
# Elbow Method for K means
# Import ElbowVisualizer
from yellowbrick.cluster import KElbowVisualizer
model = KMeans()
# k is range of number of clusters.
visualizer = KElbowVisualizer(model, k=(1,11), timings= False)
visualizer.fit(df_k)        # Fit data to visualizer
visualizer.show()

In [None]:
km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df_k)
df_k['clusters']= y_predicted
df_k

In [None]:
km.cluster_centers_

**Model Evaluation**

In [None]:
from sklearn.metrics import silhouette_samples, silhouette_score
score = silhouette_score(df_k, km.labels_, metric='euclidean')
print(score)  #-1 to 1

**To save Model**

In [None]:
import joblib

filename = "model.pkl"
joblib.dump(model,filename)

In [None]:
from google.colab import drive
drive.mount('/content/drive')