In [1]:
# STEP 1: Install torch dan torchvision versi yang kompatibel (CUDA 11.8)
!pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118

# STEP 2: Install PyTorch Geometric dan dependensinya (harus urut)
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-geometric

# STEP 3: Install library tambahan yang dibutuhkan
!pip install timm pillow matplotlib h5py pandas networkx
!pip install numpy==1.26.4


Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch==2.0.1
  Downloading https://download.pytorch.org/whl/cu118/torch-2.0.1%2Bcu118-cp311-cp311-linux_x86_64.whl (2267.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 GB[0m [31m407.6 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.15.2
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.15.2%2Bcu118-cp311-cp311-linux_x86_64.whl (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchaudio==2.0.2
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.0.2%2Bcu118-cp311-cp311-linux_x86_64.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
Collecting triton==2.0.0 (from torch==2.0.1)
  Downloading https://download.pytorch.org/whl/triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64

In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [1]:
import torch
import timm
import torchvision.transforms as transforms
from PIL import Image, ImageOps
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import os
import h5py
import pandas as pd
import gc
from torch_geometric.data import Data, DataLoader

Disabling PyTorch because PyTorch >= 2.1 is required but found 2.0.1+cu118


In [None]:
def load_image(image_path, target_size=(288, 288)):
    image = Image.open(image_path).convert("RGB")
    original_size = image.size

    # Resize sambil mempertahankan rasio aspek
    image.thumbnail(target_size, Image.LANCZOS)

    # Hitung padding yang diperlukan
    delta_w = target_size[0] - image.size[0]
    delta_h = target_size[1] - image.size[1]

    # Hitung warna rata-rata dari gambar
    mean_color = tuple(np.array(image).reshape(-1, 3).mean(axis=0).astype(int))

    # Tambahkan padding dengan warna rata-rata
    padding = (delta_w // 2, delta_h // 2, delta_w - delta_w // 2, delta_h - delta_h // 2)
    image = ImageOps.expand(image, padding, fill=mean_color)

    return image, original_size

In [None]:
def create_feature_graph(W, H):
    import networkx as nx
    G = nx.Graph()
    for i in range(W * H):
        G.add_node(i, feature=None)  # Placeholder, karena kita isi fitur nanti manual

    for x in range(W):
        for y in range(H):
            node_idx = x * H + y
            neighbors = [(x+dx, y+dy) for dx, dy in [(-1,0), (1,0), (0,-1), (0,1)]]
            for nx_, ny_ in neighbors:
                if 0 <= nx_ < W and 0 <= ny_ < H:
                    neighbor_idx = nx_ * H + ny_
                    G.add_edge(node_idx, neighbor_idx)
    return G


In [None]:
from scipy.stats import skew  # untuk color moment ke-3

def compute_color_moments(patch):
    moments = []
    for c in range(3):  # R, G, B
        channel = patch[:, :, c].flatten()
        moments.append(np.mean(channel))
        moments.append(np.std(channel))
        moments.append(skew(channel))
    return moments  # total 9 dimensi (3 momen × 3 channel)

In [None]:
def extract_color_moments(pil_image, grid_size=(3, 3)):
    img_np = np.array(pil_image)  # Asumsi sudah resize + padding
    patch_h = img_np.shape[0] // grid_size[1]
    patch_w = img_np.shape[1] // grid_size[0]

    color_moments = []
    for i in range(grid_size[1]):
        for j in range(grid_size[0]):
            patch = img_np[i*patch_h:(i+1)*patch_h, j*patch_w:(j+1)*patch_w, :]
            cm = compute_color_moments(patch)
            color_moments.append(cm)
    return np.array(color_moments)  # shape: [num_patches, 6]

In [None]:
def save_graphs_with_color_moments(dataset_path,
                                   csv_filename="graph_datase_color_only.csv",
                                   log_filename="log_gagal_color_only.txt"):

    csv_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', csv_filename)
    log_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', log_filename)

    transform_tensor = transforms.Compose([transforms.ToTensor()])
    total_ok = 0
    total_fail = 0

    if os.path.exists(csv_path):
        os.remove(csv_path)
    if os.path.exists(log_path):
        os.remove(log_path)

    with open(log_path, 'w') as log_file:
        log_file.write("Log Gambar Gagal Diproses:\n")

        with torch.no_grad():
            for label, category in enumerate(["tidak_estetik", "estetik"]):
                category_path = os.path.join(dataset_path, category)
                for filename in sorted(os.listdir(category_path)):
                    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                        continue

                    try:
                        image_path = os.path.join(category_path, filename)
                        pil_image, original_size = load_image(image_path)
                        image_tensor = transform_tensor(pil_image).unsqueeze(0)

                        # Gunakan ukuran grid tetap (misal 3x3)
                        W, H = 3, 3  # Atau sesuai kebutuhanmu

                        color_moment_features = extract_color_moments(pil_image, grid_size=(W, H))

                        # Fitur posisi + rasio aspek
                        positions = []
                        rasio_aspek = original_size[0] / original_size[1]
                        for idx in range(H * W):
                            y = idx // W
                            x = idx % W
                            x_norm = x / (W - 1) if W > 1 else 0.0
                            y_norm = y / (H - 1) if H > 1 else 0.0
                            positions.append([x_norm, y_norm, rasio_aspek])

                        positions = np.array(positions)
                        combined = np.concatenate([color_moment_features, positions], axis=1)

                        # Dummy edge untuk grid (grid 3x3 = 9 node)
                        feature_graph = create_feature_graph(W, H)  # kamu harus punya fungsi ini
                        edge_index = torch.tensor(list(feature_graph.edges)).t().contiguous()

                        graph_data = Data(
                            x=torch.tensor(combined, dtype=torch.float),
                            edge_index=edge_index,
                            y=torch.tensor([label], dtype=torch.long)
                        )

                        df_temp = pd.DataFrame([[filename, category, W, H, original_size[0], original_size[1],
                                                 graph_data.x.tolist(),
                                                 graph_data.edge_index.numpy().tolist()]],
                                               columns=['Filename', 'Category', 'W', 'H',
                                                        'Original_Width', 'Original_Height',
                                                        'Node_Features', 'Edge_Index'])

                        mode = 'w' if total_ok == 0 else 'a'
                        df_temp.to_csv(csv_path, mode=mode, header=(total_ok == 0), index=False)
                        total_ok += 1

                        print(f"✔️ {filename} | Fitur: {combined.shape[1]}")

                        del graph_data, color_moment_features, combined
                        gc.collect()
                        torch.cuda.empty_cache()

                    except Exception as e:
                        print(f"⚠️ Error processing {filename}: {e}")
                        log_file.write(f"{filename} ({category}) - {e}\n")
                        total_fail += 1

    print(f"\n📁 Dataset CSV baru disimpan di: {csv_path}")
    print(f"✅ Total berhasil: {total_ok}")
    print(f"❌ Total gagal: {total_fail}")
    print(f"📝 Log error: {log_path}")


In [None]:
def save_graphs_with_color_moments(dataset_path,
                                   csv_filename="graph_dataset_color_only.csv",
                                   log_filename="log_gagal_color_only.txt"):

    csv_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', csv_filename)
    log_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', log_filename)

    transform_tensor = transforms.Compose([transforms.ToTensor()])
    total_ok = 0
    total_fail = 0

    if os.path.exists(csv_path):
        os.remove(csv_path)
    if os.path.exists(log_path):
        os.remove(log_path)

    with open(log_path, 'w') as log_file:
        log_file.write("Log Gambar Gagal Diproses:\n")

        with torch.no_grad():
            for label, category in enumerate(["tidak_estetik", "estetik"]):
                category_path = os.path.join(dataset_path, category)
                for filename in sorted(os.listdir(category_path)):
                    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                        continue

                    try:
                        image_path = os.path.join(category_path, filename)
                        pil_image, original_size = load_image(image_path)
                        image_tensor = transform_tensor(pil_image).unsqueeze(0)

                        # Ukuran grid 3x3
                        W, H = 3, 3

                        # Ambil hanya fitur color moment
                        color_moment_features = extract_color_moments(pil_image, grid_size=(W, H))

                        # Buat graph grid 3x3
                        feature_graph = create_feature_graph(W, H)  # fungsi ini harus sudah ada
                        edge_index = torch.tensor(list(feature_graph.edges)).t().contiguous()

                        # Siapkan data graph
                        graph_data = Data(
                            x=torch.tensor(color_moment_features, dtype=torch.float),
                            edge_index=edge_index,
                            y=torch.tensor([label], dtype=torch.long)
                        )

                        # Simpan ke CSV
                        df_temp = pd.DataFrame([[filename, category, W, H, original_size[0], original_size[1],
                                                 graph_data.x.tolist(),
                                                 graph_data.edge_index.numpy().tolist()]],
                                               columns=['Filename', 'Category', 'W', 'H',
                                                        'Original_Width', 'Original_Height',
                                                        'Node_Features', 'Edge_Index'])

                        mode = 'w' if total_ok == 0 else 'a'
                        df_temp.to_csv(csv_path, mode=mode, header=(total_ok == 0), index=False)
                        total_ok += 1

                        print(f"✔️ {filename} | Node features shape: {color_moment_features.shape}")

                        del graph_data, color_moment_features
                        gc.collect()
                        torch.cuda.empty_cache()

                    except Exception as e:
                        print(f"⚠️ Error processing {filename}: {e}")
                        log_file.write(f"{filename} ({category}) - {e}\n")
                        total_fail += 1

    print(f"\n📁 Dataset CSV disimpan di: {csv_path}")
    print(f"✅ Total berhasil: {total_ok}")
    print(f"❌ Total gagal: {total_fail}")
    print(f"📝 Log error: {log_path}")


In [None]:
def save_graphs_with_color_moments(dataset_path,
                                   csv_filename="graph_dataset_color_only.csv",
                                   log_filename="log_gagal_color_only.txt"):

    csv_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', csv_filename)
    log_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', log_filename)

    transform_tensor = transforms.Compose([transforms.ToTensor()])

    # Muat data yang sudah berhasil (jika ada)
    processed_filenames = set()
    if os.path.exists(csv_path):
        try:
            existing_df = pd.read_csv(csv_path)

            def is_valid_feature(val):
                try:
                    parsed = ast.literal_eval(val)
                    return isinstance(parsed, list) and len(parsed) > 0
                except:
                    return False

            existing_df['Valid'] = existing_df['Node_Features'].apply(is_valid_feature)
            processed_filenames = set(existing_df[existing_df['Valid']]['Filename'].tolist())

            failed_filenames = set(existing_df[~existing_df['Valid']]['Filename'].tolist())
            if failed_filenames:
                print(f"🔁 Akan mencoba ulang file gagal: {sorted(failed_filenames)}")

        except Exception as e:
            print(f"⚠️ Gagal membaca CSV lama: {e}. Mulai dari awal.")

    # Siapkan log error
    if os.path.exists(log_path):
        os.remove(log_path)
    with open(log_path, 'w') as log_file:
        log_file.write("Log Gambar Gagal Diproses:\n")

        total_ok, total_fail = 0, 0

        with torch.no_grad():
            for label, category in enumerate(["tidak_estetik", "estetik"]):
                category_path = os.path.join(dataset_path, category)
                for filename in sorted(os.listdir(category_path)):
                    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                        continue

                    if filename in processed_filenames:
                        continue  # Lewati yang sudah benar-benar berhasil

                    try:
                        image_path = os.path.join(category_path, filename)
                        pil_image, original_size = load_image(image_path)
                        image_tensor = transform_tensor(pil_image).unsqueeze(0)

                        W, H = 3, 3  # grid 3x3
                        color_moment_features = extract_color_moments(pil_image, grid_size=(W, H))

                        feature_graph = create_feature_graph(W, H)
                        edge_index = torch.tensor(list(feature_graph.edges)).t().contiguous()

                        graph_data = Data(
                            x=torch.tensor(color_moment_features, dtype=torch.float),
                            edge_index=edge_index,
                            y=torch.tensor([label], dtype=torch.long)
                        )

                        # Tambah ke CSV
                        df_temp = pd.DataFrame([[filename, category, W, H, original_size[0], original_size[1],
                                                 graph_data.x.tolist(),
                                                 graph_data.edge_index.numpy().tolist()]],
                                               columns=['Filename', 'Category', 'W', 'H',
                                                        'Original_Width', 'Original_Height',
                                                        'Node_Features', 'Edge_Index'])

                        mode = 'a' if os.path.exists(csv_path) else 'w'
                        df_temp.to_csv(csv_path, mode=mode, header=not os.path.exists(csv_path), index=False)

                        print(f"✔️ {filename} | Node features shape: {color_moment_features.shape}")
                        total_ok += 1

                        del graph_data, color_moment_features
                        gc.collect()
                        torch.cuda.empty_cache()

                    except Exception as e:
                        print(f"⚠️ Error processing {filename}: {e}")
                        log_file.write(f"{filename} ({category}) - {e}\n")
                        total_fail += 1

        print(f"\n📁 Dataset CSV disimpan di: {csv_path}")
        print(f"✅ Total berhasil: {total_ok}")
        print(f"❌ Total gagal: {total_fail}")
        print(f"📝 Log error: {log_path}")

In [None]:
# Jalankan pipeline
dataset_path = '/content/drive/MyDrive/images_dataset_fix'
save_graphs_with_color_moments(dataset_path)

🔁 Akan mencoba ulang file gagal: ['1524.jpg', '18650.jpg', '22236.jpg', '2663.jpg', '3660.jpg', '4873.jpg', '5406.jpg', '5847.jpg', '6737.jpg', '9981.jpg']


  moments.append(skew(channel))


✔️ 18650.jpg | Node features shape: (9, 9)
✔️ 22236.jpg | Node features shape: (9, 9)
✔️ 1524.jpg | Node features shape: (9, 9)
✔️ 2663.jpg | Node features shape: (9, 9)
✔️ 3660.jpg | Node features shape: (9, 9)
✔️ 4873.jpg | Node features shape: (9, 9)
✔️ 5406.jpg | Node features shape: (9, 9)
✔️ 5847.jpg | Node features shape: (9, 9)
✔️ 6737.jpg | Node features shape: (9, 9)
✔️ 9981.jpg | Node features shape: (9, 9)

📁 Dataset CSV disimpan di: /content/drive/MyDrive/Hasil_Ekstraksi/graph_dataset_color_only.csv
✅ Total berhasil: 10
❌ Total gagal: 0
📝 Log error: /content/drive/MyDrive/Hasil_Ekstraksi/log_gagal_color_only.txt


In [None]:
import pandas as pd
import ast

# Path ke CSV hasil ekstraksi
csv_path = "/content/drive/MyDrive/Hasil_Ekstraksi/graph_dataset_color_only.csv"

# Baca CSV
df = pd.read_csv(csv_path)

# Fungsi untuk mengecek apakah Node_Features valid (list dan tidak kosong)
def is_valid_node_feature(val):
    try:
        parsed = ast.literal_eval(val)
        return isinstance(parsed, list) and len(parsed) > 0
    except:
        return False

# Tambahkan kolom 'Valid'
df['Valid'] = df['Node_Features'].apply(is_valid_node_feature)

# Hitung total valid dan tidak valid
total_valid = df['Valid'].sum()
total_invalid = len(df) - total_valid

print(f"✅ Jumlah data dengan Node_Features valid: {total_valid}")
print(f"❌ Jumlah data gagal/tidak valid: {total_invalid}")
print(f"📊 Total baris di CSV: {len(df)}")


✅ Jumlah data dengan Node_Features valid: 23990
❌ Jumlah data gagal/tidak valid: 30
📊 Total baris di CSV: 24020


In [None]:
def reprocess_failed_graphs_from_list(dataset_path,
                                      failed_filenames,
                                      csv_filename="graph_datase_color_tata_letak.csv"):
    import pandas as pd
    import numpy as np
    import torch
    import os
    import gc
    from torch_geometric.data import Data
    from torchvision import transforms

    csv_path = os.path.join('/content/drive/MyDrive/Hasil_Ekstraksi', csv_filename)

    print(f"📛 Akan memproses ulang {len(failed_filenames)} file yang gagal sebelumnya.")

    # --- 1. Hapus entri lama dari CSV jika sudah ada ---
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)
        df = df[~df["Filename"].isin(failed_filenames)]
        df.to_csv(csv_path, index=False)
        print("🧹 Entri lama yang gagal telah dibersihkan dari CSV.")

    # --- 2. Siapkan transformasi dan proses ulang hanya gambar yang gagal ---
    transform_tensor = transforms.Compose([transforms.ToTensor()])
    total_ok = 0
    total_fail = 0

    with torch.no_grad():
        for label, category in enumerate(["tidak_estetik", "estetik"]):
            category_path = os.path.join(dataset_path, category)
            for filename in sorted(os.listdir(category_path)):
                if filename not in failed_filenames:
                    continue
                if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                    continue

                try:
                    image_path = os.path.join(category_path, filename)
                    pil_image, original_size = load_image(image_path)
                    image_tensor = transform_tensor(pil_image).unsqueeze(0)

                    W, H = 3, 3
                    color_moment_features = extract_color_moments(pil_image, grid_size=(W, H))
                    color_moment_features = np.nan_to_num(color_moment_features, nan=0.0, posinf=1e6, neginf=-1e6)

                    # Posisi + rasio aspek
                    positions = []
                    rasio_aspek = original_size[0] / original_size[1]
                    for idx in range(H * W):
                        y = idx // W
                        x = idx % W
                        x_norm = x / (W - 1) if W > 1 else 0.0
                        y_norm = y / (H - 1) if H > 1 else 0.0
                        positions.append([x_norm, y_norm, rasio_aspek])
                    positions = np.array(positions)

                    combined = np.concatenate([color_moment_features, positions], axis=1)

                    # Graph dan Data
                    feature_graph = create_feature_graph(W, H)
                    edge_index = torch.tensor(list(feature_graph.edges)).t().contiguous()
                    graph_data = Data(
                        x=torch.tensor(combined, dtype=torch.float),
                        edge_index=edge_index,
                        y=torch.tensor([label], dtype=torch.long)
                    )

                    df_temp = pd.DataFrame([[filename, category, W, H, original_size[0], original_size[1],
                                             graph_data.x.tolist(),
                                             graph_data.edge_index.numpy().tolist()]],
                                           columns=['Filename', 'Category', 'W', 'H',
                                                    'Original_Width', 'Original_Height',
                                                    'Node_Features', 'Edge_Index'])

                    df_temp.to_csv(csv_path, mode='a', header=not os.path.exists(csv_path), index=False)
                    print(f"✔️ {filename} diproses ulang dan ditambahkan ke CSV.")
                    total_ok += 1

                    del graph_data, combined
                    gc.collect()
                    torch.cuda.empty_cache()

                except Exception as e:
                    print(f"⚠️ Gagal memproses ulang {filename}: {e}")
                    total_fail += 1

    print(f"\n📦 Reprocess selesai. Sukses: {total_ok}, Gagal lagi: {total_fail}")


In [None]:
import pandas as pd
import ast

# Path ke file CSV kamu
csv_path = "/content/drive/MyDrive/Hasil_Ekstraksi/color_only/csv_color_only.csv"

# Daftar gambar target
target_ids = ['1', '41', '49', '58', '6', '80', '111', '136', '747', '201']
target_filenames = [f"{i}.jpg" for i in target_ids]

# Baca CSV
df = pd.read_csv(csv_path)

# Filter berdasarkan nama file
filtered_df = df[df['Filename'].isin(target_filenames)].copy()

# Fungsi untuk mencetak Node 0–2 dengan format sesuai permintaan
def print_first_3_nodes(row):
    try:
        node_features = ast.literal_eval(row['Node_Features'])
        print(f"\n📁 {row['Filename']}")
        for i in range(min(3, len(node_features))):
            print(f"Node {i} : {node_features[i]}")
    except Exception as e:
        print(f"❌ Error di {row['Filename']}: {e}")

# Jalankan fungsi untuk setiap baris
filtered_df.apply(print_first_3_nodes, axis=1)



📁 1.jpg
Node 0 : [186.61512756347656, 14.533888816833496, -3.4412951469421387, 167.55284118652344, 16.898588180541992, -0.5582130551338196, 159.79339599609375, 22.448741912841797, -0.6580144762992859]
Node 1 : [193.02734375, 7.811577320098877, 0.11124493926763535, 166.0377655029297, 37.342525482177734, -2.6271417140960693, 162.84060668945312, 35.506622314453125, -2.0490477085113525]
Node 2 : [194.60665893554688, 6.501161575317383, 0.779297947883606, 175.8079376220703, 16.364727020263672, 0.6768613457679749, 170.77615356445312, 17.75863265991211, 0.6945494413375854]

📁 111.jpg
Node 0 : [190.6194610595703, 42.467041015625, -0.6714568734169006, 155.77288818359375, 46.5402717590332, -0.3380005955696106, 80.43966674804688, 46.69710922241211, 0.5541598796844482]
Node 1 : [196.56793212890625, 26.705110549926758, -0.5855464935302734, 159.73968505859375, 30.452590942382812, -0.15674076974391937, 81.47797393798828, 31.234582901000977, 0.4375627636909485]
Node 2 : [159.41796875, 37.7646484375, -

Unnamed: 0,0
10910,
12134,
14400,
15122,
17441,
18328,
19327,
19548,
21181,
21770,


In [None]:
import pandas as pd
import ast
import os

# Path input dan output
csv_input_path = "/content/drive/MyDrive/Hasil_Ekstraksi/graph_datase_color_tata_letak.csv"
csv_output_dir = "/content/drive/MyDrive/Hasil_Ekstraksi/color_only"
csv_output_path = os.path.join(csv_output_dir, "csv_color_only.csv")

# Pastikan folder tujuan ada
os.makedirs(csv_output_dir, exist_ok=True)

# Baca CSV
df = pd.read_csv(csv_input_path)

# Fungsi untuk membersihkan Node_Features
def remove_last_3_per_node(s):
    try:
        features = ast.literal_eval(s)
        cleaned = [node[:-3] for node in features]  # Hapus 3 nilai terakhir dari setiap node
        return cleaned
    except Exception as e:
        print(f"❌ Error parsing Node_Features: {e}")
        return []

# Terapkan fungsi ke kolom Node_Features
df["Node_Features"] = df["Node_Features"].apply(remove_last_3_per_node)

# Simpan ke CSV baru
df.to_csv(csv_output_path, index=False)
print(f"✅ File disimpan di: {csv_output_path}")


✅ File disimpan di: /content/drive/MyDrive/Hasil_Ekstraksi/color_only/csv_color_only.csv


In [2]:
import pandas as pd
import ast
import os

# Path input dan output
csv_input_path = "/content/drive/MyDrive/Hasil_Ekstraksi/graph_datase_color_tata_letak.csv"
csv_output_dir = "/content/drive/MyDrive/Hasil_Ekstraksi/tataletak"
csv_output_path = os.path.join(csv_output_dir, "tata_letak_only.csv")

# Pastikan folder tujuan ada
os.makedirs(csv_output_dir, exist_ok=True)

# Baca CSV
df = pd.read_csv(csv_input_path)

# Fungsi untuk menyimpan 3 nilai terakhir dari setiap node
def keep_last_3_per_node(s):
    try:
        features = ast.literal_eval(s)
        cleaned = [node[-3:] for node in features]  # Ambil 3 nilai terakhir dari setiap node
        return cleaned
    except Exception as e:
        print(f"❌ Error parsing Node_Features: {e}")
        return []

# Terapkan fungsi ke kolom Node_Features
df["Node_Features"] = df["Node_Features"].apply(keep_last_3_per_node)

# Simpan ke CSV baru
df.to_csv(csv_output_path, index=False)
print(f"✅ File disimpan di: {csv_output_path}")


✅ File disimpan di: /content/drive/MyDrive/Hasil_Ekstraksi/tataletak/tata_letak_only.csv
