In [14]:
pip install umap-learn keras bokeh




In [15]:
# Import Necessary Libraries
import numpy as np
import pandas as pd
import os
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image
import umap.umap_ as umap
from sklearn.cluster import DBSCAN
# Import Libraries for Image Clustering
import random as rd
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource, ColorBar
from bokeh.transform import linear_cmap
from bokeh.palettes import all_palettes
from bokeh.palettes import inferno

In [16]:
# Load and Preprocess Images
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    img = image.load_img(image_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

image_dir = "movies1"
image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir)]
images = np.vstack([load_and_preprocess_image(img_path) for img_path in image_paths])

# Extract Features using ResNet50
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
features = model.predict(images)

# Reduce Dimensionality with UMAP
n_neighbors = 15
min_dist =0.1
n_neighbors_value = min(5, len(images) - 1)  # ensure it's less than the number of images
reducer = umap.UMAP(n_neighbors=n_neighbors_value, n_components=2, metric='cosine', min_dist=min_dist)
reduced_features = reducer.fit_transform(features)



In [17]:
reduced_features

array([[ 7.8462205, -2.0900578],
       [ 7.8216906, -2.1121154],
       [ 6.1089487, 10.537859 ],
       ...,
       [ 3.3462803, 16.05402  ],
       [ 3.3278825, 16.06726  ],
       [ 3.361187 , 16.08593  ]], dtype=float32)

In [18]:
# Cluster using DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=10, metric='euclidean')
clusters = dbscan.fit_predict(reduced_features)
n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)

In [19]:
n_clusters_

80

In [20]:
color_map = rd.choices(inferno(256), k=n_clusters_+1)
color_map

['#10092F',
 '#AF315B',
 '#290B54',
 '#F1EB6C',
 '#C43C4E',
 '#A72D5F',
 '#952666',
 '#FAFDA0',
 '#F37719',
 '#220B4C',
 '#B23259',
 '#170B3B',
 '#F68012',
 '#B43358',
 '#C23B4F',
 '#6D186E',
 '#2E0A5A',
 '#FABB21',
 '#2E0A5A',
 '#F4F78D',
 '#D14643',
 '#F4DB4B',
 '#370961',
 '#E55B30',
 '#02010E',
 '#932567',
 '#07051D',
 '#370961',
 '#FB9906',
 '#F5D745',
 '#FAFDA0',
 '#06041B',
 '#70196E',
 '#040314',
 '#F2F485',
 '#9E2963',
 '#63146E',
 '#C73E4C',
 '#5B116E',
 '#952666',
 '#992864',
 '#FBAA0E',
 '#520E6C',
 '#C83E4B',
 '#240B4E',
 '#A92E5E',
 '#430A68',
 '#0D0828',
 '#68166E',
 '#290B54',
 '#9E2963',
 '#B23259',
 '#140B36',
 '#120A32',
 '#902468',
 '#F1721D',
 '#390962',
 '#5D126E',
 '#F1721D',
 '#FA9306',
 '#D44841',
 '#2B0A56',
 '#FBB71C',
 '#520E6C',
 '#AC2F5C',
 '#F9C72F',
 '#F1721D',
 '#CB4049',
 '#CD4247',
 '#E35832',
 '#EE6C22',
 '#BE3852',
 '#F5F891',
 '#FBAA0E',
 '#FB9E07',
 '#F2E45D',
 '#EE6C22',
 '#F57C15',
 '#A92E5E',
 '#F9C52C',
 '#3E0966']

In [21]:
clusters

array([ 0,  0,  1, ..., 79, 79, 79], dtype=int64)

In [22]:
df=pd.DataFrame({
    'x': reduced_features[:, 0],
    'y': reduced_features[:, 1],
    'path': image_paths,
    'cluster': clusters,
    'color': [color_map[i] for i in clusters]
})
df

Unnamed: 0,x,y,path,cluster,color
0,7.846220,-2.090058,movies1\Gran Turismo 1.jpg,0,#10092F
1,7.821691,-2.112115,movies1\Gran Turismo 10.png,0,#10092F
2,6.108949,10.537859,movies1\Gran Turismo 100.png,1,#AF315B
3,6.129954,10.559471,movies1\Gran Turismo 101.png,1,#AF315B
4,6.091336,10.523120,movies1\Gran Turismo 102.png,1,#AF315B
...,...,...,...,...,...
1949,3.346959,16.049433,movies1\the_creator_V3_5.png,79,#F9C52C
1950,3.329957,16.070387,movies1\the_creator_V3_6.png,79,#F9C52C
1951,3.346280,16.054020,movies1\the_creator_V3_7.png,79,#F9C52C
1952,3.327883,16.067261,movies1\the_creator_V3_8.png,79,#F9C52C


In [23]:
source = ColumnDataSource(data=df)
hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("(x,y)", "(@x, @y)"),
    ('path', '@path'),
    ('cluster', '@cluster')
])

plot = figure(width=600, height=600, tools=[hover], title="Movies Poster Clusters with Bokeh")

plot.circle('x', 'y', size=10, source=source, fill_color={"field":"color"})
show(plot)

In [24]:
!pip install ipywidgets matplotlib



In [25]:
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.models import HoverTool
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from ipywidgets import interact, fixed

# Bokeh plot
source = ColumnDataSource(data=df)
hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("(x,y)", "(@x, @y)"),
    ('path', '@path'),
    ('cluster', '@cluster')
])


# DBSCAN Clustering
X = df[['x', 'y']].values  # Use 'x' and 'y' columns from your dataframe

def plot_dbscan(min_distance, n_neighbors):
    # Perform DBSCAN on the data
    db = DBSCAN(eps=min_distance, min_samples=n_neighbors)
    db.fit(X)
    
    # Create a scatter plot
    plt.scatter(X[:, 0], X[:, 1], c=db.labels_)
    plt.title("DBSCAN Clustering Movies Poster")
    plt.show()

# Create interactive plot
interact(plot_dbscan, min_distance=(0.1, 1.0), n_neighbors=(1, 100))

interactive(children=(FloatSlider(value=0.55, description='min_distance', max=1.0, min=0.1), IntSlider(value=5…

<function __main__.plot_dbscan(min_distance, n_neighbors)>