In [4]:
# 1. Instalação e Importação
!pip install pandas rdflib
import pandas as pd
from rdflib import Graph, Literal, Namespace, RDF, RDFS, OWL, XSD, URIRef
import random

# 2. Preparação
apt = Namespace("http://www.example.org/ontology/apartments#")
g = Graph()
g.bind("apt", apt)
g.bind("owl", OWL)
g.bind("rdfs", RDFS)

# 3. Carregar o Esquema da Ontologia e os Datasets
try:
    g.parse("apartments.ttl", format="turtle")
    print("Ontologia 'apartments.ttl' carregada com sucesso no grafo.")

    df_apt = pd.read_csv('apartments_processed.csv', sep=',')
    print(f"CSV de apartamentos carregado com {len(df_apt)} registros.")

    df_actor = pd.read_csv('adult.csv')
    print(f"CSV de atores carregado com {len(df_actor)} registros.")

except FileNotFoundError as e:
    print(f"ERRO: Arquivo não encontrado - {e}. Por favor, faça o upload de todos os arquivos necessários para o Colab.")
    exit()

# 4. População do Grafo
print("\n Iniciando a população do grafo de conhecimento...")

# 4.1 Criar Instâncias de Atores (Completo)
df_actor['id'] = range(len(df_actor))
for index, actor_row in df_actor.iterrows():
    actor_id = actor_row['id']
    actor_uri = apt[f"actor_{actor_id}"]

    g.add((actor_uri, RDF.type, apt.Actor))

    # Mapeando todas as características relevantes do Actor
    g.add((actor_uri, apt.hasAge, Literal(actor_row['age'], datatype=XSD.integer)))
    g.add((actor_uri, apt.hasProfession, Literal(actor_row['occupation'].strip())))
    g.add((actor_uri, apt.hasSex, Literal(actor_row['sex'].strip())))
    g.add((actor_uri, apt.hasRace, Literal(actor_row['race'].strip())))

    income_level = apt.Over50K if actor_row['income'].strip() == '>50K' else apt.UnderOrEqual50K
    g.add((actor_uri, apt.hasIncomeLevel, income_level))

print(f"{len(df_actor)} instâncias de Atores adicionadas.")

# 4.2 Criar Instâncias de Apartamentos e Transações (Completo)
# Mapeamento completo de colunas do CSV para Classes de Amenity
amenity_mapping = {
    'AC': apt.AirConditioning, 'Alarm': apt.AlarmSystem, 'Basketball': apt.BasketballCourt,
    'Cable or Satellite': apt.CableTV, 'Clubhouse': apt.Clubhouse, 'Dishwasher': apt.Dishwasher,
    'Doorman': apt.Doorman, 'Elevator': apt.Elevator, 'Fireplace': apt.Fireplace,
    'Garbage Disposal': apt.GarbageDisposal, 'Gated': apt.GatedCommunity, 'Golf': apt.GolfAccess,
    'Gym': apt.Gym, 'Hot Tub': apt.HotTub, 'Internet Access': apt.InternetAccess,
    'Parking': apt.Parking, 'Patio/Deck': apt.Patio, 'Playground': apt.Playground,
    'Pool': apt.Pool, 'Refrigerator': apt.Refrigerator, 'Storage': apt.Storage,
    'Washer Dryer': apt.WasherDryer, 'Wood Floors': apt.WoodFloors,
    'Fitness Center': apt.FitnessCenter
}

transaction_counter = 0
# Removido o .head() para processar todos os apartamentos
for index, apt_row in df_apt.iterrows():
    if pd.notna(apt_row['id']):
        apartment_id = int(apt_row['id'])
        apartment_uri = apt[f"imovel_{apartment_id}"]
        location_uri = apt[f"local_{apartment_id}"]

        g.add((apartment_uri, RDF.type, apt.Apartment))
        g.add((apartment_uri, apt.hasPrice, Literal(apt_row['price'], datatype=XSD.decimal)))
        g.add((apartment_uri, apt.hasBedrooms, Literal(apt_row.get('bedrooms', 0), datatype=XSD.decimal)))
        g.add((apartment_uri, apt.allowsCats, Literal(bool(apt_row.get('cats', 0)), datatype=XSD.boolean)))
        g.add((apartment_uri, apt.allowsDogs, Literal(bool(apt_row.get('dogs', 0)), datatype=XSD.boolean)))

        g.add((apartment_uri, apt.hasLocation, location_uri))
        g.add((location_uri, RDF.type, apt.Location))
        if 'cityname' in apt_row and pd.notna(apt_row['cityname']):
            g.add((location_uri, apt.hasCityName, Literal(apt_row['cityname'])))

        for col_name, amenity_class in amenity_mapping.items():
            if col_name in apt_row and apt_row[col_name] == 1:
                clean_col_name = col_name.lower().replace(' ', '').replace('/', '').replace('-', '')
                amenity_instance_uri = apt[f"{clean_col_name}_do_imovel_{apartment_id}"]
                g.add((amenity_instance_uri, RDF.type, amenity_class))
                g.add((apartment_uri, apt.hasAmenity, amenity_instance_uri))

        random_actor_index = random.randint(0, len(df_actor) - 1)
        renter_uri = apt[f"actor_{random_actor_index}"]

        transaction_counter += 1
        transaction_uri = apt[f"transacao_{transaction_counter}"]

        g.add((transaction_uri, RDF.type, apt.RentalTransaction))
        g.add((transaction_uri, apt.concernsApartment, apartment_uri))
        g.add((transaction_uri, apt.hasRenter, renter_uri))
        g.add((transaction_uri, apt.hasTransactionValue, Literal(apt_row['price'], datatype=XSD.decimal)))

print(f"{len(df_apt)} instâncias de Apartamentos e Transações adicionadas.")

# 5. Salvando o Grafo de Conhecimento Final
output_file = "knowledge_graph_final.ttl"
g.serialize(destination=output_file, format='turtle')

print(f"Grafo de conhecimento completo salvo no arquivo '{output_file}'.")

--- Ambiente preparado ---
Ontologia 'apartments.ttl' carregada com sucesso no grafo.
CSV de apartamentos carregado com 97935 registros.
CSV de atores carregado com 32561 registros.

--- Iniciando a população do grafo de conhecimento... ---
32561 instâncias de Atores adicionadas.
97935 instâncias de Apartamentos e Transações adicionadas.

PROCESSO CONCLUÍDO!
Grafo de conhecimento completo salvo no arquivo 'knowledge_graph_final.ttl'.
