In [None]:
import os
import shutil
import numpy as np
import cv2
from sklearn.cluster import KMeans

# ✅ Step 1: Input & Output Paths
input_folder = "/kaggle/input/violence/Violence"
output_base = "/kaggle/working/Dataset_K"

# ✅ Step 2: Load and flatten images with OpenCV
image_data = []
image_paths = []

for file in os.listdir(input_folder):
    if file.lower().endswith(('.jpg', '.jpeg', '.png')):
        path = os.path.join(input_folder, file)
        try:
            img = cv2.imread(path)
            img = cv2.resize(img, (224, 224))  # Resize for uniformity
            img = img / 255.0  # Normalize
            image_data.append(img.flatten())  # Flatten image
            image_paths.append(path)
        except:
            continue

image_data = np.array(image_data)

# ✅ Step 3: KMeans Clustering and Folder Creation
for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(image_data)

    k_dir = f"{output_base}{k}"
    os.makedirs(k_dir, exist_ok=True)

    for cluster_id in range(k):
        os.makedirs(os.path.join(k_dir, f"Cluster_{cluster_id}"), exist_ok=True)

    for idx, label in enumerate(labels):
        src = image_paths[idx]
        dst = os.path.join(k_dir, f"Cluster_{label}", os.path.basename(src))
        shutil.copy(src, dst)

print("✅ Clustering complete. Check /kaggle/working for output folders.")


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image_dataset_from_directory
from sklearn.metrics import accuracy_score

# ✅ Configuration
base_path = "/kaggle/working"
img_size = (224, 224)
batch_size = 32
epochs = 3
k_values = list(range(2, 11))
accuracies = []

# ✅ Step-by-step over K=2 to 10
for k in k_values:
    dataset_path = os.path.join(base_path, f"Dataset_K{k}")
    
    if not os.path.exists(dataset_path):
        continue  # Skip if folder not found

    # Load dataset
    dataset = image_dataset_from_directory(
        dataset_path,
        image_size=img_size,
        batch_size=batch_size,
        label_mode='int',
        shuffle=True,
        seed=42,
        validation_split=0.2,
        subset='both'
    )

    train_ds, val_ds = dataset

    # ✅ Build DenseNet201
    base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.3)(x)
    output = Dense(k, activation='softmax')(x)  # K classes output

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train model
    model.fit(train_ds, validation_data=val_ds, epochs=epochs, verbose=0)

    # Evaluate accuracy
    val_labels = []
    val_preds = []

    for images, labels in val_ds:
        preds = model.predict(images, verbose=0)
        val_labels.extend(labels.numpy())
        val_preds.extend(np.argmax(preds, axis=1))

    acc = accuracy_score(val_labels, val_preds)
    accuracies.append(acc)
    print(f"K={k}: Accuracy={acc:.4f}")

# ✅ Bar Chart
plt.figure(figsize=(10, 6))
bars = plt.bar(k_values, accuracies, color='skyblue', edgecolor='black')
plt.xlabel("Number of Clusters (K)")
plt.ylabel("Accuracy")
plt.title("DenseNet201 Accuracy on K-Means Clustered Datasets (K=2 to 10)")
plt.ylim(0.0, 1.0)
plt.yticks(np.arange(0.0, 1.01, 0.10))
plt.xticks(k_values)

# Annotate bars
for bar, acc in zip(bars, accuracies):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval + 0.01, f"{acc:.4f}", ha='center', va='bottom', fontsize=10)

plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
plt.savefig("/kaggle/working/densenet_kmeans_accuracy.png", dpi=300)
