In [1]:
import pickle
import pandas as pd
from pprint import pprint

class CompatUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
        # Redirect missing pandas.core.indexes.numeric classes to pd.Index
        if module == "pandas.core.indexes.numeric":
            return pd.Index
        return super().find_class(module, name)

# 1. Load the pickle using our CompatUnpickler
with open("dbpedia_20210528.pkl", "rb") as f:
    data = CompatUnpickler(f).load()

# 2. Show the top‐level type
print(f"Loaded object of type: {type(data)}\n")

# 3. Inspect based on type
if isinstance(data, pd.DataFrame):
    print("DataFrame head:")
    print(data.head(), "\n")
elif isinstance(data, dict):
    print(f"Dict with {len(data)} keys. First 5 items:")
    for i, (k, v) in enumerate(data.items()):
        if i == 5: break
        print(f"Key: {k!r}  →  Value type: {type(v)}")
        pprint(v)
        print()
elif isinstance(data, list):
    print(f"List of length {len(data)}. First element:")
    pprint(data[0])
else:
    print("Sample representation:")
    pprint(data)

data

# Save to Excel
output_path = "dbpedia_20210528.xlsx"
data.to_excel(output_path, index=False)

Loaded object of type: <class 'pandas.core.frame.DataFrame'>

DataFrame head:
                 cleaned_label description  \
0                       a side        None   
1         abbeychurch blessing        None   
2  abbeychurch blessing charge        None   
3                 abbreviation        None   
4                able to grind        None   

                                 domain  \
0  [http://dbpedia.org/ontology/Single]   
1  [http://dbpedia.org/ontology/Cleric]   
2  [http://dbpedia.org/ontology/Cleric]   
3                                  None   
4    [http://dbpedia.org/ontology/Mill]   

                                                  id  \
0                  http://dbpedia.org/ontology/aSide   
1    http://dbpedia.org/ontology/abbeychurchBlessing   
2  http://dbpedia.org/ontology/abbeychurchBlessin...   
3           http://dbpedia.org/ontology/abbreviation   
4            http://dbpedia.org/ontology/ableToGrind   

                                       range supe

Obtaining DBpedia classes from domain

In [3]:
import pandas as pd

# Suppose the real file is available as dbpedia.xlsx
# Read your actual file
df = pd.read_excel('dbpedia.xlsx')

# Group by 'domain' and aggregate column names in a list
domain_groups = df.groupby('domain')['cleaned_label'].apply(list).reset_index()

# Save the result as an Excel file
output_filename = 'dbpedia_classes.xlsx'
domain_groups.to_excel(output_filename, index=False)
