In [None]:
# (a) 
import rdflib
from rdflib import URIRef, Literal, Graph, RDF, Namespace, plugin
from rdflib.plugins import sparql
from rdflib.plugins.sparql import prepareQuery
from rdflib.parser import Parser
import numpy as np
import networkx as nx
import tensorflow as tf
import pandas as pd
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from rdflib import Graph, URIRef, BNode, Literal
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tf_geometric as tfg
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt

In [None]:
# In Seri data, I have 10 readings a day every 2 hours. The predictions will be interprated accordingly. 
# Load RDF data
g = rdflib.Graph()
g.parse("SeriKG.rdf", format="ttl")  

# Define RDF namespaces
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
foo = Namespace("https://w3id.org/def/foo#")  # Replace with your ontology namespace
pos = Namespace("http://w3.org/2003/01/geo/wgs84_pos#")

# Define a SPARQL query to extract features and labels
query = prepareQuery(""" Select *

     WHERE {
   ?s      rdf:type      ?type ;
           foo:GMTDate   ?date;
           foo:GMTTime   ?time;
           pos:long      ?long ;
           pos:lat       ?lat;
    }
""", initNs={"rdf": rdf, "foo": foo, "pos": pos})

In [None]:
# Extract features and labels
features = []
labels = []

for row in g.query(query):
    s_uri = row.type
    long = row.long.toPython()
    lat = row.lat.toPython()
    date = row.date.toPython()
    time = row.time.toPython()
    # store features as lists
    KG_features = [long, lat, date,time]

    # Append the features to the features list
    features.append(KG_features)

    # Use long and lat as labels, depending on your task
    KG_labels = [long, lat]
    labels.append(KG_labels)

In [None]:
# Convert features and labels to NumPy arrays
features_np = np.array(features, dtype=np.float32)
labels_np = np.array(labels, dtype=np.float32)  

In [None]:
# Split the data into a temporary set and a test set (80% training and 20% test)
X_train_temp, X_test, y_train_temp, y_test = train_test_split(features_np, labels_np, test_size=0.2, random_state=42)

# Split the temporary set into training and validation sets (60% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train_temp, y_train_temp, test_size=0.25, random_state=42)

# Print the sizes of the sets
print("Training set size:", len(X_train))
print("Validation set size:", len(X_val))
print("Test set size:", len(X_test))


In [None]:
# Create a deep learning model with TensorFlow/Keras
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(4,)),  # Adjust the input shape based on your features
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(2, activation='linear')  # Use 'linear' activation for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error',  metrics=['accuracy'])  # Use mean squared error for regression

# Train the model on your regression data
history = model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_val, y_val))

# Access the accuracy values during training
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']
epochs = range(1, len(training_accuracy) + 1)

# Evaluate the model on the test data
test_loss = model.evaluate(X_test, y_test)
print(f'Test loss: {test_loss}')
model.summary()


In [None]:
# Plot the model architecture to a file (PNG format)
plot_model(model, to_file='model.png', show_shapes=True)

# You can also display the model directly in your Jupyter Notebook or IDE:
plot_model(model, show_shapes=True)

In [None]:
# Make predictions on the test data
predictions = model.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(np.mean((predictions - y_test) ** 2))

# Calculate MAPE
mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print(f'Test RMSE: {rmse}')
print(f'Test MAPE: {mape}%')


In [None]:
# Make predictions on the test data
predictions = model.predict(X_test)

# Calculate residuals
residuals = y_test - predictions

# Create a DataFrame to tabulate residuals
residuals_df = pd.DataFrame({
    'Actual Longitude': y_test[:, 0],
    'Predicted Longitude': predictions[:, 0],
    'Longitude Residuals': residuals[:, 0],
    'Actual Latitude': y_test[:, 1],
    'Predicted Latitude': predictions[:, 1],
    'Latitude Residuals': residuals[:, 1]
}) 

# Display the tabulated residuals
print(residuals_df)

In [None]:
import matplotlib.pyplot as plt

# Create a scatter plot for actual test data (in blue)
plt.scatter(y_test[:, 1], y_test[:, 0], color='gray', label='Actual', marker='o', s=100)

# Create a scatter plot for predicted data (in red)
plt.scatter(predictions[:, 1], predictions[:, 0], color='pink', label='Predicted', marker='o', s=100)

# Set axis labels and legend
plt.xlabel('Latitude')
plt.ylabel('Longitude')
plt.legend()

# Show the plot
plt.show()


In [None]:
# Set the paths to your CSV file and the ontology file
csv_file = "Seri.csv"  # Replace with the data source 
ontology_file = "foo.ttl" # Replace with the ontology

# Create an RDF graph
graph = Graph()

# Load the ontology into the graph
graph.parse(source=ontology_file, format="ttl")

# Set the namespace for your ontology
foo = Namespace("https://w3id.org/def/foo#")
sosa = Namespace("http://w3.org/ns/sosa/")
pos = Namespace("https://w3.org/2003/01/geo/wgs84_pos#")
xsd= Namespace('http://www.w3.org/2001/XMLSchema#')
# RML mapping code
# Iterate over the CSV file and map the data to RDF triples
with open(csv_file, 'r') as file:
    # Skip the header row if present
    next(file)

    for line in file:
        # Split the CSV line into columns
        columns = line.strip().split(',')
        

        # Create subject URI
        subject_uri = URIRef(foo+columns[0])

        # Add triples to the graph
        graph.add((subject_uri, RDF.type, sosa.Observation)) # Replace with the appropriate class from your ontology
        graph.add((subject_uri, foo.LocalDate, Literal(columns[1], datatype=xsd.date))) # Replace with the appropriate predicate from your ontology
        graph.add((subject_uri, foo.LocalTime, Literal(columns[2], datatype=xsd.time)))
        graph.add((subject_uri, foo.GMTDate, Literal(columns[3], datatype=xsd.date)))
        graph.add((subject_uri, foo.GMTTime, Literal(columns[4], datatype=xsd.time)))
        graph.add((subject_uri, pos.lat, Literal(columns[5], datatype=xsd.float)))
        graph.add((subject_uri, pos.long, Literal(columns[6], datatype=xsd.float)))
        graph.add((subject_uri, foo.Temperature, Literal(columns[8], datatype=xsd.double)))
        graph.add((subject_uri, foo.Speed, Literal(columns[9], datatype=xsd.integer)))
        graph.add((subject_uri, foo.Direction, Literal(columns[10], datatype=xsd.integer)))
        graph.add((subject_uri, foo.Cov, Literal(columns[11], datatype=xsd.integer)))
        graph.add((subject_uri, foo.HDOP, Literal(columns[12], datatype=xsd.integer)))
        graph.add((subject_uri, foo.Distance, Literal(columns[13], datatype=xsd.float)))
      

# Save the resulting knowledge graph to a file
output_file = "SeriKG.rdf"
graph.serialize(destination=output_file, format="ttl")
