In [None]:
!pip install rdflib
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from rdflib import Graph, Namespace, Literal, RDF, URIRef
from sklearn.preprocessing import LabelEncoder



In [None]:
# Load the dataset from CSV
df = pd.read_excel('/content/Medicine_description.xlsx')
# Fill NaN values with an empty string or any suitable placeholder
df = df.fillna('')

In [None]:
# Preprocess the data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Drug_Name'] + ' ' + df['Reason'] + ' ' + df['Description'])
total_words = len(tokenizer.word_index) + 1

input_sequences = tokenizer.texts_to_sequences(df['Drug_Name'] + ' ' + df['Reason'] + ' ' + df['Description'])
max_sequence_len = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='post')

In [None]:
# Convert string labels to integers using LabelEncoder
label_encoder = LabelEncoder()
df['Drug_Name'] = label_encoder.fit_transform(df['Drug_Name'])

# Create the ontology
onto = Graph()
onto_namespace = Namespace("http://example.org/")

# Define ontology classes and properties
onto.add((onto_namespace.Medicine, RDF.type, URIRef("owl:Class")))
onto.add((onto_namespace.hasName, RDF.type, URIRef("owl:ObjectProperty")))
onto.add((onto_namespace.hasSymptoms, RDF.type, URIRef("owl:ObjectProperty")))
onto.add((onto_namespace.hasCauses, RDF.type, URIRef("owl:ObjectProperty")))
onto.add((onto_namespace.hasDisease, RDF.type, URIRef("owl:ObjectProperty")))

<Graph identifier=Nacb463e3548c402cae8bdc8864e26de0 (<class 'rdflib.graph.Graph'>)>

In [None]:
# Populate ontology with data
for index, row in df.iterrows():
    medicine_uri = onto_namespace[row['Drug_Name']]
    onto.add((medicine_uri, RDF.type, onto_namespace.Medicine))
    onto.add((medicine_uri, onto_namespace.hasName, Literal(row['Drug_Name'])))
    onto.add((medicine_uri, onto_namespace.hasSymptoms, Literal(row['Drug_Name'])))
    onto.add((medicine_uri, onto_namespace.hasCauses, Literal(row['Reason'])))
    onto.add((medicine_uri, onto_namespace.hasDisease, Literal(row['Description'])))

In [None]:
# Define the deep learning model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(input_sequences, df['Drug_Name'], epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7dcc449b5450>

In [None]:
# Save the ontology to a file
onto.serialize('/content/medicine_prescription.owl', format='turtle')

<Graph identifier=Nacb463e3548c402cae8bdc8864e26de0 (<class 'rdflib.graph.Graph'>)>

In [None]:
# Convert string labels to integers using LabelEncoder
label_encoder = LabelEncoder()
df['Drug_Name'] = label_encoder.fit_transform(df['Drug_Name'])

# Define the deep learning model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(input_sequences, df['Drug_Name'], epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7dcc3e516650>

In [None]:
# Function to predict medicine for a given disease
def predict_medicine(Description):
    # Preprocess the input disease
    input_text = tokenizer.texts_to_sequences([Description])
    padded_input = pad_sequences(input_text, maxlen=max_sequence_len, padding='post')

    # Make prediction
    prediction = model.predict(padded_input)
    predicted_class = np.argmax(prediction)

    # Convert predicted class back to original medicine name using inverse transform
    predicted_medicine = label_encoder.inverse_transform([predicted_class])[0]

    return predicted_medicine

# Example usage
input_disease = "Acne"  # Replace with the actual disease input
predicted_medicine = predict_medicine(input_disease)
print(f"For the disease '{input_disease}', the predicted medicine is: {predicted_medicine}")

For the disease 'Acne', the predicted medicine is: 16961
