In [141]:
import pandas as pd 
from rdflib import Graph, Literal, RDF, URIRef
from rdflib.namespace import FOAF, XSD

# Create Knowleadge graph table

##### Mining and quarry

In [130]:
df_quarry = pd.read_csv("original_cobalt_data/Site.csv", delimiter=",",index_col=0,  encoding='cp1252')
df_geocoded_quarry = pd.read_csv("geocoded_querydata-16-5-2023 14_02_28.csv", delimiter=",",index_col="original_Site_ID *")
KGraph = Graph()
from rdflib import BNode, Namespace

# Create a Namespace object
namespace_uri = "http://Quarry.org/"
namespace = Namespace(namespace_uri)
namespace.Longitude = namespace.term("Longitude")
namespace.Latitude = namespace.term("Latitude")
namespace.State = namespace.term("State")

for site_id, Site_name, _ , _, _, _, _, Long, Lat , _,_,_,_,_,_,_,_, in df_quarry.itertuples(index=False):
    site = URIRef(value = site_id)
    KGraph.add((site, RDF.type, Literal("Quarry", datatype=XSD.Name)))
    KGraph.add((site,  namespace.Longitude, Literal(Long, datatype=XSD.float)))
    KGraph.add((site,  namespace.Latitude, Literal(Lat, datatype=XSD.float)))
    KGraph.add((site, FOAF.givenName, Literal(Site_name)))
    
    state = URIRef(value = df_geocoded_quarry.loc[site_id, "state"].replace(" ", "_"))
    KGraph.add((site,  namespace.State, state))

In [131]:
KGraph.serialize(destination="KGraph.nt")

<Graph identifier=Nc6f6fc621dbe4e12a809102f7c4ce838 (<class 'rdflib.graph.Graph'>)>

# transport nodes

In [132]:
df_rail = pd.read_csv("railroad_data.csv")

namespace_uri = "http://Transport.org/"
namespace = Namespace(namespace_uri + "Railroad/")
namespace.Class_I = namespace.term("Class_I")
namespace.Total = namespace.term("Total")
namespace.Regional = namespace.term("Regional")
namespace.Linehaul = namespace.term("Linehaul")
namespace.st = namespace.term("Switching_and_terminal")
namespace.Canadian = namespace.term("Canadian")
namespace.Railroad = namespace.term("Railroad")



for state, class1, Regional, linehaul, switch, canadian, total in df_rail.itertuples(index=False):
    state = URIRef(value = state.replace(" ", "_"))
    transport = BNode()
    # KGraph.add((transport, RDF.type, Literal("R", datatype=XSD.Name)))
    KGraph.add((transport,  namespace.Class_I, Literal(class1, datatype=XSD.positiveInteger)))
    KGraph.add((transport,  namespace.Regional, Literal(Regional, datatype=XSD.positiveInteger)))
    KGraph.add((transport,  namespace.Linehaul, Literal(linehaul, datatype=XSD.positiveInteger)))
    KGraph.add((transport,  namespace.st, Literal(switch, datatype=XSD.positiveInteger)))
    KGraph.add((transport,  namespace.Canadian, Literal(canadian, datatype=XSD.positiveInteger)))
    KGraph.add((transport,  namespace.Total, Literal(total, datatype=XSD.positiveInteger)))
    KGraph.add((transport,  Literal("Source", datatype=XSD.string), Literal("http://pubs.aar.org/pubstores/", datatype=XSD.string))) 
    
    KGraph.add((state, namespace.Railroad, transport))

df = pd.read_excel("Combined_dataset.xlsx")
df_waterways = df[["original_Site_ID","cosest_option_for_fraight"]].set_index("original_Site_ID")
df_waterways.head()

namespace = Namespace(namespace_uri)
namespace.Freight = namespace.term("Freight")

for site_id, freight in df_waterways.itertuples(index=True):

    site = URIRef(value = site_id)
    KGraph.add((site,  namespace.Freight, Literal(freight, datatype=XSD.ENTITY)))




#### unemployment

In [133]:
df_unemployment = pd.read_excel("Unemployment_data.xlsx")

namespace = Namespace(namespace_uri)
namespace.Unemployment_rate = namespace.term("Unemployment_rate")

for state, unemployment, _ in df_unemployment.itertuples(index=False):
    state = URIRef(value = state.replace(" ", "_"))
    KGraph.add((state, RDF.type, Literal("State", datatype=XSD.Name)))
    KGraph.add((state,  namespace.Unemployment_rate , Literal(unemployment, datatype=XSD.float)))

In [134]:
df_resources = pd.read_csv("original_cobalt_data/Resources.csv", parse_dates =["Last_Updt"])
# Define a namespace URI
from rdflib import Namespace
namespace_uri = "http://miniral_resources.org/"

# Create a Namespace object
namespace = Namespace(namespace_uri)
namespace.Mat_Amnt = namespace.term("Material_Amount")
namespace.Grade = namespace.term("Grade")
namespace.Reaserchers = namespace.term("Reaserchers")
namespace.Date = namespace.term("Date")
namespace.Refference_Detail = namespace.term("Refference_Detail")
namespace.Research = namespace.term("Research")
namespace.Minerals = namespace.term("Minerals")


for _, row in df_resources.iterrows():
    # print(row)
    site = URIRef(value = row["Site_ID *"])
    resource = BNode()
    
    KGraph.add((resource, RDF.type, Literal(row["Material"], datatype=XSD.Name )))
    KGraph.add((resource,  namespace.Last_Updt, Literal(row["Last_Updt"], datatype=XSD.date)))
    KGraph.add((resource,  namespace.Mat_Amnt, Literal(row["Mat_Amnt"], datatype=XSD.numeric)))
    KGraph.add((resource,  namespace.Grade, Literal(row["Grade"], datatype=XSD.float)))
    
    research = BNode()
    KGraph.add((research,  namespace.Reaserchers, Literal(row["Ref_ID"], datatype=XSD.string)))
    KGraph.add((research,  namespace.Date , Literal(row["Rsrc_Date"], datatype=XSD.date)))
    KGraph.add((research,  namespace.Refference_Detail , Literal(row["Ref_Detail"], datatype=XSD.str)))
    
    
    KGraph.add((resource, namespace.Research , research))
    KGraph.add((site, namespace.Minerals, resource))
    
df_resources

Unnamed: 0,OBJECTID *,Site_ID *,Ftr_ID *,Ftr_Name,Last_Updt,Material,Rsrc_Date,Mat_Type,Mat_Amnt,Mat_Units,...,COU_SI,CntSIComAm,CntSIComUt,CntSICom,Rsrc_Class,Rsrc_Descr,Rsrc_Code,Ref_Detail,Ref_ID,Remarks
0,1,AK00012,Mo00550,Ruby Creek Zone,2018-07-19,cobalt,2018,ore,124600000,tonnes,...,percent copper,20000.111,metric tons,Co,Inferred,Resource,CIM,"Page 2, Table 1",Trilogy Metals Inc. (2018),<Null>
1,2,AK00012,Mo00550,Ruby Creek Zone,2018-07-19,copper,2016,ore,40500000,tonnes,...,percent copper,414000.111,metric tons,Cu,Indicated,Resource,CIM,"Page 2, Table 2",Trilogy Metals Inc. (2018),<Null>
2,3,AK00012,Mo00550,Ruby Creek Zone,2018-07-19,copper,2016,ore,84100000,tonnes,...,percent copper,802000.111,metric tons,Cu,Inferred,Resource,CIM,"Page 2, Table 2",Trilogy Metals Inc. (2018),<Null>
3,4,AK00012,Mo00571,South Reef Zone,2018-07-19,cobalt,2018,ore,-57800000,tonnes,...,percent copper,14500.111,metric tons,Co,Inferred,Resource,CIM,"Page 2, Table 1",Trilogy Metals Inc. (2018),<Null>
4,5,AK00012,Mo00571,South Reef Zone,2018-07-19,copper,1961,ore,21230000,tons,...,percent copper,585000.111,metric tons,Cu,<Null>,Resource,<Null>,"Page 6-5, Table 6.2",Davis and Sim (2013),<Null>
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287,288,PR00005,Mo00635,Maricao East Deposit,2018-07-19,chromium,1959,ore,-5600000,short tons,...,percent nickel,34000.111,metric tons,Cr,Inferred,Drill-Indicated Resource,<Null>,"Page 31, Table 2",Heidenreich and Reynolds (1959),<Null>
288,289,PR00005,Mo00635,Maricao East Deposit,2018-07-19,iron,1959,ore,-5600000,short tons,...,percent nickel,1500000.111,metric tons,Fe,Inferred,Drill-Indicated Resource,<Null>,"Page 31, Table 2",Heidenreich and Reynolds (1959),<Null>
289,290,PR00005,Mo00635,Maricao East Deposit,2018-07-19,nickel,1959,ore,-5600000,short tons,...,percent nickel,55000.111,metric tons,Ni,Inferred,Drill-Indicated Resource,<Null>,"Page 31, Table 2",Heidenreich and Reynolds (1959),<Null>
290,291,PR00005,Mo00635,Maricao East Deposit,2018-07-19,cobalt,1959,ore,-5600000,short tons,...,percent nickel,5600.111,metric tons,Co,Inferred,Drill-Indicated Resource,<Null>,"Page 31, Table 2",Heidenreich and Reynolds (1959),<Null>


In [135]:
KGraph.serialize(destination="KGraph.nt")

<Graph identifier=Nc6f6fc621dbe4e12a809102f7c4ce838 (<class 'rdflib.graph.Graph'>)>

#Query the Knowleadge_graph
1. employment based
2. transportation based

In [138]:
query_high_unemployment = """
SELECT DISTINCT ?a  ?c ?C1 ?t ?fr
WHERE {
    ?a rdf:type 'Quarry'^^xsd:Name .
    ?a foaf:givenName ?name .
    ?a ns1:State ?state .

    ?state ns4:Unemployment_rate ?c .
    
    ?state ns3:Railroad ?rr .
    ?rr ns3:Class_I ?C1 .
    ?rr ns3:Linehaul ?lh .
    ?rr ns3:Total ?t .
    
    ?a ns4:Freight ?fr. 
    
    FILTER (?c > 4).
}"""

qres = KGraph.query(query_high_unemployment)
for row in qres:
    print(f"{row} ")

(rdflib.term.URIRef('CA00070'), rdflib.term.Literal('4.4', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#float')), rdflib.term.Literal('2', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('25', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('Sea', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#ENTITY'))) 
(rdflib.term.URIRef('CA00071'), rdflib.term.Literal('4.4', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#float')), rdflib.term.Literal('2', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('25', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('Sea', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#ENTITY'))) 
(rdflib.term.URIRef('MI00001'), rdflib.term.Literal('4.1', datatype=rdflib.term.URIRef('http://www.w3.org/

In [139]:
query_high_trafic = """
SELECT DISTINCT ?a  ?c ?C1 ?t ?fr
WHERE {
    ?a rdf:type 'Quarry'^^xsd:Name .
    ?a foaf:givenName ?name .
    ?a ns1:State ?state .

    ?state ns4:Unemployment_rate ?c .
    
    ?state ns3:Railroad ?rr .
    ?rr ns3:Class_I ?C1 .
    ?rr ns3:Linehaul ?lh .
    ?rr ns3:Total ?t .
    
    ?a ns4:Freight ?fr. 
    
    FILTER (?C1 > 4).
}"""

qres = KGraph.query(query_high_unemployment)
for row in qres:
    print(f"{row} ")

(rdflib.term.URIRef('MO00001'), rdflib.term.Literal('2.5', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#float')), rdflib.term.Literal('6', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('17', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('Lake', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#ENTITY'))) 
(rdflib.term.URIRef('MO00002'), rdflib.term.Literal('2.5', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#float')), rdflib.term.Literal('6', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('17', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#positiveInteger')), rdflib.term.Literal('Lake', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#ENTITY'))) 
(rdflib.term.URIRef('MO00013'), rdflib.term.Literal('2.5', datatype=rdflib.term.URIRef('http://www.w3.or