# **Installing Libraries**

In [None]:
!pip install --upgrade pytube moviepy pydub yt-dlp
!pip install Flask

In [None]:
!pip install pycaret
import pycaret

In [None]:
from pycaret.utils import version
version()

In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# **Loading Dataset**

In [None]:
data = pd.read_csv('/content/Dataset on Sales Transaction weekly.csv')

## **Preprocessing the Dataset**

In [None]:
imputer = SimpleImputer(strategy='mean')
data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

In [None]:
data.head()

# **Using Clustering**

In [None]:
from pycaret.clustering import *
kMeanClusteringParameters = setup(data)

# **K means Clustering**

In [None]:
setup(data, verbose=False)
r1 = []
r2 = []
r3 = []

print("For Cluster = 3")
x = create_model('kmeans', num_clusters = 3)
labels=x.fit_predict(data)
r1.append(silhouette_score(data,labels))
r1.append(calinski_harabasz_score(data,labels))
r1.append(davies_bouldin_score(data,labels))

print("For Cluster = 4")
x = create_model('kmeans', num_clusters = 4)
labels=x.fit_predict(data)
r2.append(silhouette_score(data,labels))
r2.append(calinski_harabasz_score(data,labels))
r2.append(davies_bouldin_score(data,labels))

print("For Cluster = 5")
x = create_model('kmeans', num_clusters = 5)
labels=x.fit_predict(data)
r3.append(silhouette_score(data,labels))
r3.append(calinski_harabasz_score(data,labels))
r3.append(davies_bouldin_score(data,labels))

plot_model(x, plot = 'cluster')


plot_model(x, plot = 'elbow')
plot_model(x, plot = 'tsne')

In [None]:
setup(data = data, normalize = True, normalize_method = 'zscore', verbose=False)
r4 = []
r5 = []
r6 = []
print("Normalized\n")
print("For Cluster = 3")
x = create_model('kmeans', num_clusters = 3)
labels=x.fit_predict(data)
r4.append(silhouette_score(data,labels))
r4.append(calinski_harabasz_score(data,labels))
r4.append(davies_bouldin_score(data,labels))

print("For Cluster = 4")
x = create_model('kmeans', num_clusters = 4)
labels=x.fit_predict(data)
r5.append(silhouette_score(data,labels))
r5.append(calinski_harabasz_score(data,labels))
r5.append(davies_bouldin_score(data,labels))

print("For Cluster = 5")
x = create_model('kmeans', num_clusters = 5)
plot_model(x, plot = 'cluster')
labels=x.fit_predict(data)
r6.append(silhouette_score(data,labels))
r6.append(calinski_harabasz_score(data,labels))
r6.append(davies_bouldin_score(data,labels))

plot_model(x, plot = 'elbow')
plot_model(x, plot = 'tsne')

In [None]:
setup(data = data, transformation = True, transformation_method = 'yeo-johnson', verbose=False)
r7 = []
r8 = []
r9 = []

print("Transformation\n")
print("For Cluster = 3")
x = create_model('kmeans', num_clusters = 3)
labels=x.fit_predict(data)
r7.append(silhouette_score(data,labels))
r7.append(calinski_harabasz_score(data,labels))
r7.append(davies_bouldin_score(data,labels))

print("For Cluster = 4")
x = create_model('kmeans', num_clusters = 4)
labels=x.fit_predict(data)
r8.append(silhouette_score(data,labels))
r8.append(calinski_harabasz_score(data,labels))
r8.append(davies_bouldin_score(data,labels))

print("For Cluster = 5")
x = create_model('kmeans', num_clusters = 5)
labels=x.fit_predict(data)
r9.append(silhouette_score(data,labels))
r9.append(calinski_harabasz_score(data,labels))
r9.append(davies_bouldin_score(data,labels))

plot_model(x, plot = 'cluster')


plot_model(x, plot = 'elbow')
plot_model(x, plot = 'tsne')


In [None]:
setup(data = data, pca = True, pca_method = 'linear', verbose=False)
r10 = []
r11 = []
r12 = []

print("PCA\n")
print("For Cluster = 3")
x = create_model('kmeans', num_clusters = 3)
labels=x.fit_predict(data)
r10.append(silhouette_score(data,labels))
r10.append(calinski_harabasz_score(data,labels))
r10.append(davies_bouldin_score(data,labels))

print("For Cluster = 4")
x = create_model('kmeans', num_clusters = 4)
labels=x.fit_predict(data)
r11.append(silhouette_score(data,labels))
r11.append(calinski_harabasz_score(data,labels))
r11.append(davies_bouldin_score(data,labels))

print("For Cluster = 5")
x = create_model('kmeans', num_clusters = 5)
labels=x.fit_predict(data)
r12.append(silhouette_score(data,labels))
r12.append(calinski_harabasz_score(data,labels))
r12.append(davies_bouldin_score(data,labels))

plot_model(x, plot = 'cluster')


plot_model(x, plot = 'elbow')
plot_model(x, plot = 'tsne')

In [None]:
setup(data = data, normalize = True, normalize_method = 'zscore',
      transformation = True, transformation_method = 'yeo-johnson', verbose=False)
r13 = []
r14 = []
r15 = []
print("Normalized and transformation\n")
print("For Cluster = 3")
x = create_model('kmeans', num_clusters = 3)
labels=x.fit_predict(data)
r13.append(silhouette_score(data,labels))
r13.append(calinski_harabasz_score(data,labels))
r13.append(davies_bouldin_score(data,labels))

print("For Cluster = 4")
x = create_model('kmeans', num_clusters = 4)
labels=x.fit_predict(data)
r14.append(silhouette_score(data,labels))
r14.append(calinski_harabasz_score(data,labels))
r14.append(davies_bouldin_score(data,labels))

print("For Cluster = 5")
x = create_model('kmeans', num_clusters = 5)
plot_model(x, plot = 'cluster')
labels=x.fit_predict(data)
r15.append(silhouette_score(data,labels))
r15.append(calinski_harabasz_score(data,labels))
r15.append(davies_bouldin_score(data,labels))

plot_model(x, plot = 'elbow')
plot_model(x, plot = 'tsne')

In [None]:
setup(data = data, normalize = True, normalize_method = 'zscore',
      transformation = True, transformation_method = 'yeo-johnson',
      pca = True, pca_method = 'linear', verbose=False)

r16 = []
r17 = []
r18 = []
print("Normalized, transformation and PCA\n")
print("For Cluster = 3")
x = create_model('kmeans', num_clusters = 3)
labels=x.fit_predict(data)
r16.append(silhouette_score(data,labels))
r16.append(calinski_harabasz_score(data,labels))
r16.append(davies_bouldin_score(data,labels))

print("For Cluster = 4")
x = create_model('kmeans', num_clusters = 4)
labels=x.fit_predict(data)
r17.append(silhouette_score(data,labels))
r17.append(calinski_harabasz_score(data,labels))
r17.append(davies_bouldin_score(data,labels))

print("For Cluster = 5")
x = create_model('kmeans', num_clusters = 5)
labels=x.fit_predict(data)
r18.append(silhouette_score(data,labels))
r18.append(calinski_harabasz_score(data,labels))
r18.append(davies_bouldin_score(data,labels))

plot_model(x, plot = 'cluster')


plot_model(x, plot = 'elbow')
plot_model(x, plot = 'tsne')


In [None]:
result1 = {'Parameters': ['Silhouette', 'Calinski-Harabasz', 'Davies-Bouldins'],
           'No Data Processing(c=3)': r1,'No Data Processing(c=4)': r2,'No Data Processing(c=5)': r3,
           'Using Normalization(c=3)' : r4,'Using Normalization(c=4)' : r5,'Using Normalization(c=5)' : r6,
           'Using Transform(c=3)' : r7,'Using Transform(c=4)' : r8,'Using Transform(c=5)' : r9,
           'Using PCA(c=3)' : r10,'Using PCA(c=4)' : r11,'Using PCA(c=5)' : r12,
           'Using T + N(c=3)': r13,'Using T + N(c=4)': r14,'Using T + N(c=5)': r15,
           'T + N + PCA(c=3)': r16,'T + N + PCA(c=4)': r17,'T + N + PCA(c=5)': r18}

In [None]:
result1df = pd.DataFrame(result1)

# **Hierarichal Clustering**