In [None]:
from pathlib import Path
import pandas as pd
import json
from tqdm import tqdm
import numpy as np
import csv

In [None]:

def check_dir(dir_path):
    """
    检查文件夹路径是否存在，不存在则创建

    Args:
        dir_path (str): 待检查的文件夹路径
    """
    if not dir_path.exists():
        try:
            dir_path.mkdir(parents=True)
        except Exception as e:
            raise e

def load_json(json_path):
    """
    以只读的方式打开json文件

    Args:
        config_path: json文件路径

    Returns:
        A dictionary

    """
    with open(json_path, 'r', encoding='UTF-8') as f:
        return json.load(f)
    
def save_json(save_path, data):
    """
    Saves the data to a file with the given filename in the given path

    Args:
        :param save_path: The path to the folder where you want to save the file
        :param filename: The name of the file to save
        :param data: The data to be saved

    """
    with open(save_path, 'w', encoding='UTF-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)



class Make_Extraction_CSV_Index():
    def __init__(self, len_per_project, project_name_list, base_path, out_path) -> None:
        self.len_per_project = len_per_project
        self.project_name_list = project_name_list
        self.base_path = base_path
        self.out_path = out_path

    def make_dataset_index(self):
        self.make_img_caption_dict()
    
    def make_img_caption_dict(self):
        dict_template = {
            "length" : {},
            "project_name_list": self.project_name_list,
            "extraction_dict": {}
            }
        # 制作NFT1000的索引文件

        for project_name in tqdm(self.project_name_list):
            # 拼凑字典路径
            project_path = self.base_path.joinpath(project_name, "caption",  "_caption_dict.json")
            # 读取字典
            caption_dict = load_json(project_path).get("caption_dict")
            img_path_list = [self.base_path.joinpath(project_name,"img",img_name).as_posix() for img_name in list(caption_dict.keys())]
            target_caption_dict = {k: v for k, v in zip(img_path_list, caption_dict.values())}
            dict_template["extraction_dict"].update(target_caption_dict)

        dict_template["length"]["extraction_dict"] = len(dict_template["extraction_dict"])
        # 保存到json文件

        print("\n##########  saving……  ##########\n")
        self.save_json(self.out_path.joinpath("ETHBJ_top100_extraction_projects.json"), dict_template)
        print("\n##########  img_caption_dict is saved successfully!  ##########\n")

    def save_json(self, save_path, data):
        """
        Saves the data to a file with the given filename in the given path

        Args:
            :param save_path: The path to the folder where you want to save the file
            :param data: The data to be saved

        """
        with open(save_path, 'w', encoding='UTF-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=4)

    def load_json(self, json_path):
        """
        以只读的方式打开json文件

        Args:
            config_path: json文件路径

        Returns:
            A dictionary

        """
        try:

            with open(json_path, 'r', encoding='UTF-8') as f:
                return json.load(f)
        except Exception as e:
            print("Error loading json file: {}".format(json_path))
            print(e)
            return None


In [None]:


if __name__ == "__main__":


    #################################################################################
    ############################# 不同终端的替换范围 开始 #############################

    source_path = Path("/data/sswang/data/NFT1000/")
    target_path = Path("/data/sswang/NFT_search/NFT_Search_ETHBJ_2024/data/")

    ############################# 不同终端的替换范围 结束 #############################
    #################################################################################


    # dataset_dict = load_json(source_path.joinpath("_index", "dataset_index.json"))

    project_name_list = [
        "BoredApeYachtClub",
        "CRYPTOPUNKS",
        "MutantApeYachtClub",
        "Azuki",
        "CloneX",
        "Moonbirds",
        "Doodles",
        "BoredApeKennelClub",
        "Cool Cats",
        "Beanz",
        "PudgyPenguins",
        "Cryptoadz",
        "World Of Women",
        "CyberKongz",
        "0N1 Force",
        "MekaVerse",
        "HAPE PRIME",
        "mfers",
        "projectPXN",
        "Karafuru",
        "Invisible Friends",
        "FLUF",
        "Milady",
        "goblintown",
        "Phanta Bear",
        "CyberKongz VX",
        "KaijuKingz",
        "Prime Ape Planet",
        "Lazy Lions",
        "3Landers",
        "The Doge Pound",
        "DeadFellaz",
        "World Of Women Galaxy",
        "ALIENFRENS",
        "VOX Series 1",
        "Hashmasks",
        "Psychedelics Anonymous Genesis",
        "RENGA",
        "CoolmansUniverse",
        "Art Gobblers",
        "SupDucks",
        "Jungle Freaks",
        "Sneaky Vampire Syndicate",
        "SuperNormalbyZipcy",
        "Nakamigos",
        "Impostors Genesis",
        "Potatoz",
        "CryptoSkulls",
        "Moonbirds Oddities",
        "RumbleKongLeague",
        "MURI",
        "Galactic Apes",
        "Lives of Asuna",
        "My Pet Hooligan",
        "Murakami.Flowers",
        "Kiwami",
        "SHIBOSHIS",
        "Sappy Seals",
        "DEGEN TOONZ",
        "Killer GF",
        "CryptoMories",
        "Crypto Bull Society",
        "CryptoBatz by Ozzy Osbourne",
        "Quirkies",
        "Robotos",
        "Tubby Cats",
        "Chain Runners",
        "MutantCats",
        "Boss Beauties",
        "OnChainMonkey",
        "Rektguy",
        "Desperate ApeWives",
        "DigiDaigaku",
        "DeGods",
        "apekidsclub",
        "The Humanoids",
        "Sevens Token",
        "Akutars",
        "HypeBears",
        "KIA",
        "inbetweeners",
        "C-01 Official Collection",
        "Imaginary Ones",
        "ZombieClub Token",
        "Groupies",
        "Valhalla",
        "MOAR by Joan Cornella",
        "the littles NFT",
        "The Heart Project",
        "CryptoDads",
        "Chimpers",
        "Crypto Chicks",
        "WonderPals",
        "LilPudgys",
        "a KID called BEAST",
        "Akuma",
        "Cyber Snails",
        "Variant",
        "OKOKU",
        "Dodoor NFT",
        "Weirdo Ghost Gang"
    ]
    NFT1000_maker = Make_Extraction_CSV_Index(21, project_name_list, source_path, target_path)
    NFT1000_maker.make_dataset_index()


In [None]:

extraction_img_caption_dict_path = "/data/sswang/NFT_search/NFT_Search_ETHBJ_2024/data/ETHBJ_top100_extraction_projects.json"
nft_name_address_dict_path = "/data/sswang/NFT_search/NFT_Search_ETHBJ_2024/data/ETHBJ_top100_name_address.json"
nft_name_address_dict = load_json(nft_name_address_dict_path)
base_path = "/data/sswang/NFT_search/NFT_Search_ETHBJ_2024/data"
dict_info = load_json(extraction_img_caption_dict_path).get("extraction_dict")
img_path_list = [img_path for img_path in dict_info.keys()]
img_path_obj_list = [Path(img_path) for img_path in img_path_list]
NFT_name_list = [path_obj_item.parts[-3] for path_obj_item in img_path_obj_list]
token_ID_list = [int(path_obj_item.stem.split("_")[-1]) for path_obj_item in img_path_obj_list]
chain_ID_list = [1] * len(img_path_list)

contract_address = []
for nft_name in NFT_name_list:
    contract_address.append(nft_name_address_dict.get(nft_name)[0])
data_frame = zip(img_path_list, dict_info.values(), NFT_name_list,  token_ID_list, chain_ID_list, contract_address)
csv_path = Path(base_path).joinpath("ETHBJ_top100_extraction_projects_index.csv")
with open(csv_path, "w", encoding="utf-8", newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["filepath", "caption", "NFT_name", "token_ID", "chain_ID", "contract_address"])
    csv_writer.writerows(data_frame)

# 依照不同的项目对数据总索引表拆分成不同的子表


In [None]:
top100_extraction_projects_index_path = Path("/data/sswang/NFT_search/NFT_Search_ETHBJ_2024/data/ETHBJ_top100_extraction_projects_index.csv")
# 使用pandas读取csv文件
top100_index = pd.read_csv(top100_extraction_projects_index_path)

candidate_NFT_list = [
        "BoredApeYachtClub",
        "CRYPTOPUNKS",
        "MutantApeYachtClub",
        "Azuki",
        "CloneX",
        "Moonbirds",
        "Doodles",
        "BoredApeKennelClub",
        "Cool Cats",
        "Beanz",
        "PudgyPenguins",
        "Cryptoadz",
        "World Of Women",
        "CyberKongz",
        "0N1 Force",
        "MekaVerse",
        "HAPE PRIME",
        "mfers",
        "projectPXN",
        "Karafuru",
        "Invisible Friends",
        "FLUF",
        "Milady",
        "goblintown",
        "Phanta Bear",
        "CyberKongz VX",
        "KaijuKingz",
        "Prime Ape Planet",
        "Lazy Lions",
        "3Landers",
        "The Doge Pound",
        "DeadFellaz",
        "World Of Women Galaxy",
        "ALIENFRENS",
        "VOX Series 1",
        "Hashmasks",
        "Psychedelics Anonymous Genesis",
        "RENGA",
        "CoolmansUniverse",
        "Art Gobblers",
        "SupDucks",
        "Jungle Freaks",
        "Sneaky Vampire Syndicate",
        "SuperNormalbyZipcy",
        "Nakamigos",
        "Impostors Genesis",
        "Potatoz",
        "CryptoSkulls",
        "Moonbirds Oddities",
        "RumbleKongLeague",
        "MURI",
        "Galactic Apes",
        "Lives of Asuna",
        "My Pet Hooligan",
        "Murakami.Flowers",
        "Kiwami",
        "SHIBOSHIS",
        "Sappy Seals",
        "DEGEN TOONZ",
        "Killer GF",
        "CryptoMories",
        "Crypto Bull Society",
        "CryptoBatz by Ozzy Osbourne",
        "Quirkies",
        "Robotos",
        "Tubby Cats",
        "Chain Runners",
        "MutantCats",
        "Boss Beauties",
        "OnChainMonkey",
        "Rektguy",
        "Desperate ApeWives",
        "DigiDaigaku",
        "DeGods",
        "apekidsclub",
        "The Humanoids",
        "Sevens Token",
        "Akutars",
        "HypeBears",
        "KIA",
        "inbetweeners",
        "C-01 Official Collection",
        "Imaginary Ones",
        "ZombieClub Token",
        "Groupies",
        "Valhalla",
        "MOAR by Joan Cornella",
        "the littles NFT",
        "The Heart Project",
        "CryptoDads",
        "Chimpers",
        "Crypto Chicks",
        "WonderPals",
        "LilPudgys",
        "a KID called BEAST",
        "Akuma",
        "Cyber Snails",
        "Variant",
        "OKOKU",
        "Dodoor NFT",
        "Weirdo Ghost Gang"
    ]

for NFT_name in candidate_NFT_list:
    print("processing: ", NFT_name)

    # 按照NFT_name将原始数据分割，如果top100_index的NFT_name列的数据与NFT_name相等，则将该行数据保存到NFT_name_index.csv文件中
    NFT_name_index = top100_index[top100_index['NFT_name'] == NFT_name]
    # 给新表加上一样的列名
    NFT_name_index.columns = top100_index.columns
    # 保存到csv文件
    csv_path = Path("/data/sswang/data/NFT1000_features_ETHBJ/").joinpath(NFT_name, NFT_name + "_index.csv").as_posix()
    NFT_name_index.to_csv(csv_path, index=False)
    print("已完成：", NFT_name)
    