In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
import xgboost as xgb



In [None]:
# Load and clean data
data = pd.read_csv("Finale.csv")  # Replace with actual path
data = data.drop(columns=["Unnamed: 0"])


In [None]:
# Merge classes for severity
data['Severity'] = data['Severity'].replace({
    'None': 'Mild',
    'significantly': 'Moderate',
    'Life-threatening': 'Severe',
    'Risk': 'Severe',
    'Grade': 'Severe',
    'Very Mild': 'Mild'
})
data = data.dropna(subset=['Severity'])


In [None]:
# Map severity levels to numerical values
severity_mapping = {'Uncertain': 0, 'Mild': 1, 'Moderate': 2, 'Severe': 3}
data['Severity'] = data['Severity'].map(severity_mapping)


In [None]:
# Feature extraction with TF-IDF
X = data['AdverseReaction']
y = data['Severity']
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

In [None]:
# Apply SMOTE for balancing classes
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_vectorized, y)


In [None]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [None]:
# Define a dictionary to hold models
models = {
    'Logistic Regression': LogisticRegression(class_weight='balanced', max_iter=1000),
    'Random Forest': RandomForestClassifier(class_weight='balanced', n_estimators=100, random_state=42),
    'SVM': SVC(class_weight='balanced', kernel='linear', random_state=42),
    'XGBoost': xgb.XGBClassifier(objective='multi:softmax', num_class=4, eval_metric='mlogloss', use_label_encoder=False)
}


In [None]:
# Fit and evaluate each model
for model_name, model in models.items():
    print(f"\n--- {model_name} ---")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred, target_names=severity_mapping.keys()))
