In [None]:
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.utils import shuffle
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np

# Assuming df is your DataFrame with 'ReviewContent'(already cleaned and text preprocessing done) and 'Sentiment_encoded' columns

# Shuffle the dataset
df = shuffle(df, random_state=42)

# Prepare data for training
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['ReviewContent']).toarray()  # Convert to dense array
y = df['Sentiment_encoded']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Convert back to DataFrame for LazyPredict
X_train_df = pd.DataFrame(X_train_smote, columns=vectorizer.get_feature_names_out())
X_test_df = pd.DataFrame(X_test, columns=vectorizer.get_feature_names_out())

# Initialize LazyClassifier
clf = LazyClassifier(predictions=True)

# Fit and evaluate models
models_summary, predictions = clf.fit(X_train_df, X_test_df, y_train_smote, y_test)

print(models_summary)


 97%|█████████████████████████████████████████████████████████████████████████████▍  | 30/31 [1:24:09<00:37, 37.84s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045867 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 94869
[LightGBM] [Info] Number of data points in the train set: 7472, number of used features: 2323
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294


100%|███████████████████████████████████████████████████████████████████████████████| 31/31 [1:24:21<00:00, 163.27s/it]

                               Accuracy  Balanced Accuracy ROC AUC  F1 Score  \
Model                                                                          
LGBMClassifier                     0.68               0.42    None      0.66   
NearestCentroid                    0.64               0.41    None      0.63   
XGBClassifier                      0.67               0.40    None      0.65   
ExtraTreesClassifier               0.68               0.40    None      0.65   
RandomForestClassifier             0.68               0.39    None      0.64   
LogisticRegression                 0.60               0.38    None      0.60   
LinearSVC                          0.57               0.38    None      0.59   
SGDClassifier                      0.65               0.38    None      0.63   
CalibratedClassifierCV             0.60               0.38    None      0.60   
PassiveAggressiveClassifier        0.58               0.38    None      0.59   
AdaBoostClassifier                 0.55 




In [None]:

# Initialize LazyClassifier
clf = LazyClassifier(predictions=True)

# Fit and evaluate models
models_summary, predictions = clf.fit(X_train_df, X_test_df, y_train_smote, y_test)

print(models_summary)
