<a href="https://colab.research.google.com/github/JaidenFlint/Project3_Group2/blob/Kavita/Project3drugsQ%26A2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Install the required libraries
!pip install requests beautifulsoup4



In [19]:
# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
import joblib

# Web Scraping Function
def scrape_drug_info(drug_name):
    url = f'https://www.drugs.com/{drug_name}.html'
    response = requests.get(url)

    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.text, 'html.parser')
    drug_info = {}

    # Find description
    description_div = soup.find("div", class_="content")
    if description_div:
        description_paragraphs = description_div.find_all("p")
        drug_info['description'] = " ".join([p.text.strip() for p in description_paragraphs])
    else:
        drug_info['description'] = "Description not found."

    # Find uses
    uses_section = soup.find(string="Uses")
    if uses_section:
        drug_info['uses'] = uses_section.find_next("p").text.strip()
    else:
        drug_info['uses'] = "Uses not found."

    # Find side effects
    side_effects_section = soup.find(string="Side effects")
    if side_effects_section:
        drug_info['side_effects'] = side_effects_section.find_next("p").text.strip()
    else:
        drug_info['side_effects'] = "Side effects not found."

    return drug_info

# Scrape multiple drugs
drugs = ['aspirin', 'ibuprofen', 'metformin']
drug_data = []

for drug in drugs:
    info = scrape_drug_info(drug)
    if info:
        drug_data.append(info)

# Convert to DataFrame
drug_df = pd.DataFrame(drug_data)

# Save DataFrame to CSV
drug_df.to_csv('drug_info.csv', index=False)

In [20]:
# Import necessary libraries for multi-label classification
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd

# Simulated drug_df for demonstration purposes
data = {'description': ['pain relief', 'nausea treatment', 'pain and fever relief'],
        'uses': ['pain', 'nausea', 'pain']}
drug_df = pd.DataFrame(data)

# Prepare data for model training
X = drug_df['description']  # Features
y = drug_df['uses'].copy()  # Focus on one target variable for simplicity

# Encode labels (for simplicity, using string matching)
y = y.apply(lambda x: 1 if 'pain' in x.lower() else 0)  # Dummy encoding

# Check the unique values in the labels to identify any unexpected values
print("Unique values in 'uses':", y.unique())
print("Class distribution in 'uses':")
print(y.value_counts())  # Check the class distribution

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline for vectorization and model training
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('classifier', MultinomialNB(class_prior=[0.33, 0.67]))  # Adjust class priors based on class distribution
])

# Initialize Leave-One-Out Cross-Validation
loo = LeaveOneOut()

# Fit the model
try:
    pipeline.fit(X_train, y_train)
except ValueError as e:
    print("Error while fitting the model:", e)

# Validate the model
y_pred = pipeline.predict(X_test)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the trained model
# joblib.dump(pipeline, 'best_drug_model.pkl')

Unique values in 'uses': [1 0]
Class distribution in 'uses':
uses
1    2
0    1
Name: count, dtype: int64
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [21]:
# Hyperparameter tuning using Grid Search
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd

# Simulated drug_df for demonstration purposes
data = {'description': ['pain relief', 'nausea treatment', 'pain and fever relief'],
        'uses': ['pain', 'nausea', 'pain']}
drug_df = pd.DataFrame(data)

# Prepare data for model training
X = drug_df['description']  # Features
y = drug_df['uses'].copy()  # Target variable

# Encode labels (for simplicity, using string matching)
y = y.apply(lambda x: 1 if 'pain' in x.lower() else 0)  # Dummy encoding

# Check the unique values in the labels to identify any unexpected values
print("Unique values in 'uses':", y.unique())
print("Class distribution in 'uses':")
print(y.value_counts())  # Check the class distribution

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline for vectorization and model training
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('classifier', MultinomialNB())  # Use only one classifier
])

# Hyperparameter tuning using Grid Search
param_grid = {
    'classifier__alpha': [0.1, 0.5, 1.0, 1.5, 2.0]  # Hyperparameter to tune
}

# Initialize Leave-One-Out Cross-Validation
loo = LeaveOneOut()

# Initialize GridSearchCV with Leave-One-Out
grid_search = GridSearchCV(pipeline, param_grid, cv=loo, n_jobs=-1)

# Fit the model
try:
    grid_search.fit(X_train, y_train)
except ValueError as e:
    print("Error while fitting the model:", e)

# Check if the model fitted successfully
if hasattr(grid_search, 'best_estimator_'):
    # Best model
    best_model = grid_search.best_estimator_

    # Validate the model
    y_pred = best_model.predict(X_test)

    # Print classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
else:
    print("Model fitting failed; unable to retrieve best estimator.")

Unique values in 'uses': [1 0]
Class distribution in 'uses':
uses
1    2
0    1
Name: count, dtype: int64
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [22]:
# Build text classification model using Support Vector Machines (SVM)
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Simulated drug_df for demonstration purposes
data = {'description': ['pain relief', 'nausea treatment', 'pain and fever relief'],
        'uses': ['pain', 'nausea', 'pain']}
drug_df = pd.DataFrame(data)

# Prepare data for model training
X = drug_df['description']  # Features
y = drug_df['uses'].copy()  # Target variable

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Vectorization using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Initialize Support Vector Machine model
model = SVC(kernel='linear', probability=True)

# Fit the model
model.fit(X_train_tfidf, y_train)

# Predict on test data
y_pred = model.predict(X_test_tfidf)

# Print classification report, specifying labels
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, labels=range(len(label_encoder.classes_))))

Classification Report:
              precision    recall  f1-score   support

      nausea       0.00      0.00      0.00         0
        pain       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       0.50      0.50      0.50         1
weighted avg       1.00      1.00      1.00         1



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [23]:
# Text classification model using a Convolutional Neural Network (CNN) with TensorFlow/Keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Simulated drug_df for demonstration purposes
data = {'description': ['pain relief', 'nausea treatment', 'pain and fever relief'],
        'uses': ['pain', 'nausea', 'pain']}
drug_df = pd.DataFrame(data)

# Prepare data for model training
X = drug_df['description']  # Features
y = drug_df['uses'].copy()  # Target variable

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Tokenization
max_words = 1000  # Maximum number of words to consider
max_len = 20      # Maximum length of each input sequence

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X_encoded = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(X_encoded, maxlen=max_len)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_encoded, test_size=0.2, random_state=42)

# Build the CNN model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128))  # Removed input_length argument
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.5))  # Added dropout layer
model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))  # L2 regularization
model.add(Dropout(0.5))  # Added dropout layer
model.add(Dense(len(label_encoder.classes_), activation='softmax'))  # Output layer

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fit the model
try:
    model.fit(X_train, y_train, epochs=10, batch_size=2, validation_split=0.2)
except Exception as e:
    print("Error while fitting the model:", e)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Predict on new data
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)
print("Predicted classes:", label_encoder.inverse_transform(predicted_classes))

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 1.0000 - loss: 0.8694 - val_accuracy: 0.0000e+00 - val_loss: 0.9689
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292ms/step - accuracy: 1.0000 - loss: 0.7974 - val_accuracy: 0.0000e+00 - val_loss: 0.9725
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step - accuracy: 0.0000e+00 - loss: 0.9327 - val_accuracy: 0.0000e+00 - val_loss: 0.9769
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - accuracy: 1.0000 - loss: 0.8408 - val_accuracy: 0.0000e+00 - val_loss: 0.9804
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 1.0000 - loss: 0.7768 - val_accuracy: 0.0000e+00 - val_loss: 0.9836
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - accuracy: 1.0000 - loss: 0.7670 - val_accuracy: 0.0000e+00 - val_loss: 0.9865
Epoch 7/10
[1m1/

In [24]:
# Using SVM
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Expanded dataset for demonstration purposes
data = {
    'description': [
        'pain relief', 'nausea treatment', 'pain and fever relief',
        'nausea', 'headache relief', 'pain medication',
        'anti-nausea medication', 'nausea and vomiting',
        'pain management strategies', 'over-the-counter pain relief',
        'pain and nausea', 'migraines treatment', 'chronic pain relief',
        'post-operative nausea', 'acute nausea management'
    ],
    'uses': [
        'pain', 'nausea', 'pain', 'nausea', 'pain',
        'pain', 'nausea', 'nausea', 'pain', 'pain',
        'pain', 'nausea', 'pain', 'nausea', 'nausea'
    ]
}
drug_df = pd.DataFrame(data)

# Prepare data for model training
X = drug_df['description']  # Features
y = drug_df['uses'].copy()  # Target variable

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test sets with stratified sampling
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Vectorization using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Initialize Support Vector Machine model
model = SVC(kernel='linear', probability=True)

# Fit the model
model.fit(X_train_tfidf, y_train)

# Predict on test data
y_pred = model.predict(X_test_tfidf)

# Print classification report, specifying zero_division parameter
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

Classification Report:
              precision    recall  f1-score   support

      nausea       1.00      1.00      1.00         1
        pain       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [25]:
# Cross-Validation
from sklearn.model_selection import cross_val_score

# Initialize Support Vector Machine model
model = SVC(kernel='linear', probability=True)

# Perform cross-validation
scores = cross_val_score(model, X_train_tfidf, y_train, cv=5, scoring='accuracy')

# Print cross-validation scores
print("Cross-validation scores:", scores)
print("Mean cross-validation score:", np.mean(scores))

Cross-validation scores: [1. 1. 1. 1. 1.]
Mean cross-validation score: 1.0


In [26]:
# Model using a simple neural network with TensorFlow/Keras
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard

# Expanded dataset for demonstration purposes
data = {
    'description': [
        'pain relief', 'nausea treatment', 'pain and fever relief',
        'nausea', 'headache relief', 'pain medication',
        'anti-nausea medication', 'nausea and vomiting',
        'pain management strategies', 'over-the-counter pain relief',
        'pain and nausea', 'migraines treatment', 'chronic pain relief',
        'post-operative nausea', 'acute nausea management'
    ],
    'uses': [
        'pain', 'nausea', 'pain', 'nausea', 'pain',
        'pain', 'nausea', 'nausea', 'pain', 'pain',
        'pain', 'nausea', 'pain', 'nausea', 'nausea'
    ]
}
drug_df = pd.DataFrame(data)

# Prepare data for model training
X = drug_df['description']  # Features
y = drug_df['uses'].copy()  # Target variable

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Vectorization using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Create a simple neural network model
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(X_train_tfidf.shape[1],)))
model.add(Dense(2, activation='softmax'))  # 2 classes

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Set up TensorBoard callback
tensorboard_callback = TensorBoard(log_dir='./logs', histogram_freq=1)

# Train the model
model.fit(X_train_tfidf.toarray(), y_train, epochs=10, batch_size=5, validation_data=(X_test_tfidf.toarray(), y_test), callbacks=[tensorboard_callback])

# After training, you can run TensorBoard with the following command in the terminal:
# tensorboard --logdir=./logs

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 192ms/step - accuracy: 0.6417 - loss: 0.6653 - val_accuracy: 0.6667 - val_loss: 0.6427
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.6833 - loss: 0.6634 - val_accuracy: 0.6667 - val_loss: 0.6371
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.7083 - loss: 0.6495 - val_accuracy: 0.6667 - val_loss: 0.6314
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.7000 - loss: 0.6772 - val_accuracy: 0.6667 - val_loss: 0.6261
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.7500 - loss: 0.6579 - val_accuracy: 0.6667 - val_loss: 0.6208
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.7500 - loss: 0.6501 - val_accuracy: 0.6667 - val_loss: 0.6157
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x7839c7488f10>

In [17]:
# Web Scraping and User Interaction
import requests
from bs4 import BeautifulSoup

def scrape_drug_info(drug_name):
    url = f'https://www.drugs.com/{drug_name}.html'
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve data for {drug_name}. Status code: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    drug_info = {}
    drug_info['name'] = drug_name.capitalize()

    # Find the description
    description_div = soup.find("div", class_="content")  # Adjust the selector as needed
    if description_div:
        description_paragraphs = description_div.find_all("p")
        drug_info['description'] = " ".join([p.text.strip() for p in description_paragraphs])
    else:
        drug_info['description'] = "Description not found."

    # Find uses
    uses_section = soup.find(string="Uses")
    if uses_section:
        drug_info['uses'] = uses_section.find_next("p").text.strip()
    else:
        drug_info['uses'] = "Uses not found."

    # Find side effects
    side_effects_section = soup.find(string="Side effects")
    if side_effects_section:
        drug_info['side_effects'] = side_effects_section.find_next("p").text.strip()
    else:
        drug_info['side_effects'] = "Side effects not found."

    return drug_info

def main():
    while True:
        drug_name = input("Enter the drug name (or type 'exit' to quit): ").strip().lower()
        if drug_name == 'exit':
            print("Exiting the program.")
            break

        info = scrape_drug_info(drug_name)
        if info:
            print(f"\nDrug Name: {info['name']}")
            print(f"Uses: {info['uses']}")
            print(f"Side Effects: {info['side_effects']}\n")
        else:
            print("No information found.\n")

if __name__ == "__main__":
    main()


Enter the drug name (or type 'exit' to quit): apixaban
Failed to retrieve data for apixaban. Status code: 404
No information found.

Enter the drug name (or type 'exit' to quit): addrall
Failed to retrieve data for addrall. Status code: 404
No information found.

Enter the drug name (or type 'exit' to quit): aspirin

Drug Name: Aspirin
Uses: Aspirin is a salicylate (sa-LIS-il-ate). It works by reducing substances in the body that cause pain, fever, and inflammation.
Side Effects: Aspirin is a salicylate (sa-LIS-il-ate). It works by reducing substances in the body that cause pain, fever, and inflammation.

Enter the drug name (or type 'exit' to quit): exit
Exiting the program.


In [30]:
# Install required libraries
!pip install gradio transformers langchain



In [39]:
# Integration with LangChain and Gradio
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load pre-trained model and tokenizer from Hugging Face
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define labels, indications, and side effects for various medications
labels_info = {
    'pain': {
        'indications': "Used for alleviating various types of pain.",
        'side_effects': "Possible side effects include dizziness, nausea, and constipation."
    },
    'nausea': {
        'indications': "Used for treating nausea and vomiting.",
        'side_effects': "Possible side effects include drowsiness, dry mouth, and fatigue."
    },
    'anticoagulant': {
        'indications': "Used to prevent blood clots.",
        'side_effects': "Possible side effects include bleeding, bruising, and gastrointestinal issues."
    },
    'antiplatelet': {
        'indications': "Used to prevent blood clots by inhibiting platelet aggregation.",
        'side_effects': "Possible side effects include bleeding, gastrointestinal disturbances, and rash."
    },
    'benzodiazepine': {
        'indications': "Used to treat anxiety disorders and panic disorders.",
        'side_effects': "Possible side effects include drowsiness, dizziness, and fatigue."
    },
    'opioid': {
        'indications': "Used for managing severe pain.",
        'side_effects': "Possible side effects include drowsiness, constipation, and nausea."
    },
    'antiemetic': {
        'indications': "Used to prevent nausea and vomiting.",
        'side_effects': "Possible side effects include headache, dizziness, and constipation."
    },
    'calcium_channel_blocker': {
        'indications': "Used to treat high blood pressure and angina.",
        'side_effects': "Possible side effects include dizziness, flushing, and headache."
    }
}

# Function to classify input text and provide indications and side effects
def classify_text(text):
    # Simulate a more controlled output based on known medications
    text_lower = text.lower()
    if "acetaminophen" in text_lower:
        label = 'pain'
    elif "apixaban" in text_lower:
        label = 'anticoagulant'
    elif "clopidogrel" in text_lower:
        label = 'antiplatelet'
    elif "alprazolam" in text_lower:
        label = 'benzodiazepine'
    elif "norco" in text_lower:
        label = 'opioid'
    elif "zofran" in text_lower:
        label = 'antiemetic'
    elif "diltiazem" in text_lower:
        label = 'calcium_channel_blocker'
    elif "nausea" in text_lower:
        label = 'nausea'
    else:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = logits.argmax(dim=-1).item()  # Get the index of the highest score
        label = list(labels_info.keys())[predicted_class]

    # Prepare output information
    indications = labels_info[label]['indications']
    side_effects = labels_info[label]['side_effects']

    return f"Classification: {label}\nIndications: {indications}\nSide Effects: {side_effects}"

# Set up Gradio interface
iface = gr.Interface(fn=classify_text,
                     inputs="text",
                     outputs="text",
                     title="Text Classification with Indications and Side Effects",
                     description="Input a text description, and the model will classify it, providing indications and side effects.")

# Launch the interface
iface.launch()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3868e1177251021e77.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


