In [None]:
import pandas as pd
import numpy as np

In [None]:
#!pip install pyxlsb
# or converting in excel to xlsx format

In [None]:
df = pd.read_excel("DataSet.xlsx")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df = df[df['CustomerID'].notnull()]

In [None]:
import datetime

In [None]:
df['InvoiceDay'] = df['InvoiceDate'].apply(lambda x:datetime.datetime(x.year , x.month , x.day))

In [None]:
df.head(10)

In [None]:
df.Description.nunique()

In [None]:
datetime.timedelta(1)

In [None]:
pin_data = max(df['InvoiceDay']) + datetime.timedelta(1)
pin_data

In [None]:
df['Total'] = df['Quantity'] * df['UnitPrice']
df.head()

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
sorted_df = df.sort_values(by=['Quantity'], ascending=False)

# get the CustomerID with the highest Quantity
customer_id = sorted_df.iloc[0]['CustomerID']
highest_quantity = sorted_df.iloc[0]['Quantity']

print(f"CustomerID {customer_id}  bought the highest quantity of {highest_quantity}.")

In [None]:
customer_totals = df.groupby('CustomerID')['Quantity'].sum()

# sort the customer_totals by Quantity in descending order
sorted_totals = customer_totals.sort_values(ascending=False)

# get the top 10 customer IDs
top_10_customers = sorted_totals.head(10)

print("Top 10 Customers by Quantity:")
print(top_10_customers)

In [None]:
sorted_totals.head(10).plot(kind="bar")
plt.legend()
plt.show()

In [None]:
top_10_descriptions = df['Description'].value_counts().nlargest(10)

# Create a bar chart using Seaborn
sns.barplot(y=top_10_descriptions.values, x=top_10_descriptions.index)
plt.ylabel('Total')
plt.xlabel('Description')
plt.xticks(rotation=45)
plt.title('Top 10 Product Descriptions')
plt.show()

In [None]:
df_country = df.groupby('Country')['Total'].mean().sort_values(ascending =False)[:10]
df_country.plot(kind='bar', color ='green')
plt.title("Top 10 Country has higest sales")
plt.xlabel('Total')
plt.xticks(rotation=35)
plt.show()

In [None]:
plt.figure(figsize = (18,8))

sns.barplot(x='Country',y='CustomerID',data = df)
plt.xticks(rotation=60)

In [None]:
RFM = df.groupby('CustomerID').agg({
    'InvoiceDay': lambda x: (pin_data - x.max()).days,
    'InvoiceNo' : 'count' ,
    'Total' : 'sum'
})

In [None]:
RFM

In [None]:
df[df['CustomerID']== 12346.0]

In [None]:
RFM.rename(columns= {
    'InvoiceDay': 'Recency' ,
    'InvoiceNo' : 'Frequency' ,
    'Total' : 'Monetary'
 }, inplace=True)
RFM

In [None]:
sns.heatmap(RFM.corr(), annot=True);

In [None]:
r_labels = range(4, 0 , -1)
r_groups = pd.qcut(RFM['Recency'], q=4 , labels=r_labels)
f_labels = range(1, 5)
f_groups = pd.qcut(RFM['Frequency'], q=4 , labels=f_labels)
m_labels = range(1, 5)
m_groups = pd.qcut(RFM['Monetary'], q=4 , labels=m_labels)

In [None]:
RFM['R'] = r_groups.values
RFM['F'] = f_groups.values
RFM['M'] = m_groups.values
RFM

In [None]:
RFM["RFM_SCORE"] = (RFM['R'].astype(str) +
                    RFM['F'].astype(str))

In [None]:
RFM

In [None]:
seg_map = {
    r'[1-2][1-2]': 'inactive',
    r'[1-2][3-4]': 'exposed',
    r'[1-2]5': 'cant_loose',
    r'3[1-2]': 'about_to_sleep',
    r'33': 'need_attention',
    r'[3-4][4-5]': 'allegiant',
    r'41': 'promising',
    r'51': 'new_customers',
    r'[4-5][2-3]': 'potential_loyalists',
    r'5[4-5]': 'crown'
}
RFM['segment'] = RFM['RFM_SCORE'].replace(seg_map, regex=True)
RFM.reset_index(inplace=True)
RFM.head()

In [None]:
from sklearn.cluster import KMeans

In [None]:
X = RFM[['R','F','M']]
kmeans = KMeans(n_clusters = 5, init = 'k-means++', max_iter = 300)
kmeans.fit(X)

In [None]:
lbs = kmeans.labels_
print(lbs)

In [None]:
RFM['kmeans_cluster'] = kmeans.labels_

In [None]:
RFM.mean()

In [None]:
RFM[RFM['kmeans_cluster'] == 3].mean()

In [None]:
kmeans.inertia_

In [None]:
wcss = {}
for k in range(1,11):
    kmeans = KMeans(n_clusters = k, init = 'k-means++', max_iter = 300) 
    kmeans.fit(X)
    wcss[k] = kmeans.inertia_
    
sns.pointplot(x = list(wcss.keys()), y = list(wcss.values()))

In [None]:
from sklearn.metrics import davies_bouldin_score

In [None]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)

In [None]:
print(davies_bouldin_score(X, kmeans.labels_))

In [None]:
kmeans = KMeans(n_clusters=4)
kmeans.fit(X)

In [None]:
print(davies_bouldin_score(X, kmeans.labels_))

In [None]:
kmeans = KMeans(n_clusters=5)
kmeans.fit(X)

In [None]:
print(davies_bouldin_score(X, kmeans.labels_))

In [None]:
order = df.filter(["CustomerID","StockCode"],axis=1)

In [None]:
order = order.merge(RFM)

In [None]:
order

In [None]:
order_rating = order.loc[:, ["CustomerID","StockCode","RFM_SCORE"]]

In [None]:
order_rating

In [None]:
order_rating = order_rating[order_rating['StockCode'].isin(
    order_rating['StockCode'].value_counts()[
        order_rating['StockCode'].value_counts() > 10].index)]

In [None]:
order_rating = order_rating.reset_index()

In [None]:

ratings_utility_matrix = order_rating.pivot_table(values='RFM_SCORE',
                                                  index='CustomerID',
                                                  columns='StockCode',
                                                  fill_value=0)
ratings_utility_matrix.head()

In [None]:
ratings_utility_matrix.shape

In [None]:
X = ratings_utility_matrix.T
X.head()

In [None]:
X.shape

In [None]:
from sklearn.decomposition import TruncatedSVD


In [None]:
def fitsystemrecommendation(ratings_utility_matrix):
    X = ratings_utility_matrix.T
    SVD = TruncatedSVD(n_components=10)
    decomposed_matrix = SVD.fit_transform(X)
    correlation_matrix = np.corrcoef(decomposed_matrix)
    return correlation_matrix

In [None]:
correlation_matrix = fitsystemrecommendation(ratings_utility_matrix)

In [None]:
def systemrecommendation(prod_id):
    order_rating.index[order_rating['StockCode'] == prod_id].tolist()[1]
    product_names = list(X.index)
    product_ID = product_names.index(prod_id)
    correlation_product_ID = correlation_matrix[product_ID]
    Recommend = list(X.index[correlation_product_ID > 0.70])
    Recommend.remove(prod_id)
    return pd.DataFrame(Recommend[0:9], columns=['Recommendation'])

In [None]:
systemrecommendation(10125)