In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import load_model
import numpy as np

# Load your dataset
df = pd.read_csv('drugs.csv')

# Drop rows with any missing values
df.dropna(inplace=True)

# Ensure ratings are numeric and filter out invalid entries
df = df[pd.to_numeric(df['Rating'], errors='coerce').notnull()]
df.loc[:, 'Rating'] = df['Rating'].astype(float)

# Convert text data into TF-IDF features
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['Content']).toarray()

# Target variable
y = df['Rating'].values

# Transform ratings to categorical labels for classification
y = np.where(y >= 6, 1, 0)  # Example: ratings >= 6 are considered positive (1), others are negative (0)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the MLP model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Save the trained model
model.save('mlp_model.h5')

# Load the model (if needed)
# model = load_model('mlp_model.h5')

# Predict on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate metrics
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Display metrics
print("Confusion Matrix:")
print(conf_matrix)
print("\nMetrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m5183/5183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 6ms/step - accuracy: 0.8079 - loss: 0.4149 - val_accuracy: 0.8510 - val_loss: 0.3391
Epoch 2/20
[1m5183/5183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 6ms/step - accuracy: 0.8826 - loss: 0.2829 - val_accuracy: 0.8671 - val_loss: 0.3195
Epoch 3/20
[1m5183/5183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 6ms/step - accuracy: 0.9424 - loss: 0.1579 - val_accuracy: 0.8715 - val_loss: 0.3366
Epoch 4/20
[1m5183/5183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 6ms/step - accuracy: 0.9755 - loss: 0.0746 - val_accuracy: 0.8715 - val_loss: 0.4522
Epoch 5/20
[1m5183/5183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 5ms/step - accuracy: 0.9890 - loss: 0.0320 - val_accuracy: 0.8707 - val_loss: 0.5595
Epoch 6/20
[1m5183/5183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 5ms/step - accuracy: 0.9943 - loss: 0.0170 - val_accuracy: 0.8694 - val_loss: 0.7090
Epoch 7/20



[1m1620/1620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
Confusion Matrix:
[[15290  3778]
 [ 2777 29984]]

Metrics:
Accuracy: 0.8735264041366803
Precision: 0.8880990462650317
Recall: 0.9152345776990934
F1-score: 0.9014626520150926
