In [None]:
!pip install pycaret > /dev/null 2>&1

In [None]:
!pip install --upgrade numpy==1.24.3 pandas==2.0.3

In [None]:
import pandas as pd

In [None]:
#import pandas as pd
from pycaret.classification import *

data = pd.read_csv('realistic_high_accuracy_sentiment_dataset.csv')

clf = setup(
    data,
    target='sentiment',
    text_features=['review_text'],
    train_size=0.8,
    session_id=42,
    # fix_imbalance=True
    # preprocess=False
    # fold=10,                             # 10-Fold Cross-Validation
    # use_gpu=True,                         # Use GPU if available
    # #silent=True,                         # Avoid confirmation prompts
    # verbose=True
)

# Train a model
best_model = compare_models()

In [None]:
# Step 6: Evaluate best model
evaluate_model(best_model)

# Step 7: Plot ROC Curve
plot_model(best_model, plot='auc')

# Step 8: Plot Confusion Matrix
plot_model(best_model, plot='confusion_matrix')

In [None]:
# Plot Precision-Recall curve
plot_model(best_model, plot='pr')


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming df is the DataFrame containing the dataset
df = pd.read_csv('realistic_high_accuracy_sentiment_dataset.csv')

# Create a new feature 'review_length' as the length of the review text
df['review_length'] = df['review_text'].apply(len)

# Scatter plot: 'rating' vs 'review_length'
plt.figure(figsize=(8, 6))
sns.scatterplot(x='rating', y='review_length', data=df, hue='sentiment', palette='coolwarm')
plt.title('Rating vs Review Length (Colored by Sentiment)')
plt.xlabel('Rating')
plt.ylabel('Review Length')
plt.show()


In [None]:
# Scatter plot: 'rating' vs 'sentiment' (target variable)
plt.figure(figsize=(8, 6))
sns.scatterplot(x='rating', y='sentiment', data=df, hue='sentiment', palette='coolwarm')
plt.title('Rating vs Sentiment')
plt.xlabel('Rating')
plt.ylabel('Sentiment')
plt.show()


In [None]:
# Plot ROC Curve using 'auc'
plot_model(best_model, plot='auc')

# Plot Confusion Matrix
plot_model(best_model, plot='confusion_matrix')

# Plot Precision-Recall Curve
plot_model(best_model, plot='pr')

# Plot Learning Curve
plot_model(best_model, plot='learning')

# Plot Feature Importance
plot_model(best_model, plot='feature')

In [None]:
#4. Learning Curve
plot_model(best_model, plot='learning')

#5. Feature Importance (useful for tabular datasets)
plot_model(best_model, plot='feature')

In [None]:
import pandas as pd
from pycaret.classification import *
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you've already trained your model
# setup(...) and best_model = compare_models() is done

# Get final data and target used inside PyCaret
X = get_config('X')
y = get_config('y')

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X, columns=X.select_dtypes(include=['object', 'category']).columns)
# Select object and categorical columns for one-hot encoding

# Reduce dimensions to 2D using PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Create a scatter dataframe
scatter_df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
scatter_df['Sentiment'] = y.values

# Plot scatter plot
plt.figure(figsize=(8, 6))
sns.scatterplot(data=scatter_df, x='PC1', y='PC2', hue='Sentiment', s=100, palette='Set2')
plt.title('2D PCA Scatter Plot of Sentiment Classes')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(True)
plt.show()

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

X = get_config('X')
y = get_config('y')

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

scatter_df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
scatter_df['Label'] = y.values

plt.figure(figsize=(8, 6))
sns.scatterplot(data=scatter_df, x='PC1', y='PC2', hue='Label', palette='Set1', s=80)
plt.title('PCA Scatter Plot of Classes')
plt.grid(True)
plt.show()


In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from pycaret.classification import *

X = get_config('X')
y = get_config('y')

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X, columns=X.select_dtypes(include=['object', 'category']).columns)
# Select object and categorical columns for one-hot encoding


pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

scatter_df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
scatter_df['Label'] = y.values

plt.figure(figsize=(8, 6))
sns.scatterplot(data=scatter_df, x='PC1', y='PC2', hue='Label', palette='Set1', s=80)
plt.title('PCA Scatter Plot of Classes')
plt.grid(True)
plt.show()

In [None]:
plot_model(best_model, plot='roc', save=True)
plot_model(best_model, plot='confusion_matrix', save=True)
plot_model(best_model, plot='pr', save=True)
plot_model(best_model, plot='learning', save=True)
plot_model(best_model, plot='error', save=True)


In [None]:
plot_model(best_model, plot='confusion_matrix', save=True)
plot_model(best_model, plot='pr', save=True)  # Precision-Recall curve
plot_model(best_model, plot='learning', save=True) # Learning Curve
plot_model(best_model, plot='feature', save=True) # Feature importance for text features
plot_model(best_model, plot='class_report', save=True) # Classification report
plot_model(best_model, plot='boundary', save=True) # Decision boundary (for 2D or 3D data)
plot_model(best_model, plot='calibration', save=True) # Calibration curve
plot_model(best_model, plot='vc', save=True) # Validation curve
plot_model(best_model, plot='error', save=True) # Error plot
plot_model(best_model, plot='manifold', save=True) # Manifold learning plot
plot_model(best_model, plot='parameter', save=True) # Hyperparameter tuning plot
plot_model(best_model, plot='lift', save=True) # Lift curve
plot_model(best_model, plot='gain', save=True) # Gain curve
plot_model(best_model, plot='ks', save=True) # KS statistic plot