In [1]:
import pandas as pd 
import numpy as np
import re
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

from sklearn.metrics import confusion_matrix

### Data Facts and Import 

In [2]:
df_train = pd.read_csv('Dataset/SentimentAnalysisofTweetsthroughAltmetrics/train.csv')
df_test = pd.read_csv('Dataset/SentimentAnalysisofTweetsthroughAltmetrics/test.csv')

In [3]:
df_train.columns = ["Text", "Label"]
df_test.columns = ["Text", "Label"]

In [4]:
# remove the neutural.
df_train= df_train[df_train['Label'] != 0]
df_test= df_test[df_test['Label'] != 0]

In [5]:
df_train.shape

(470, 2)

In [5]:
df_train.head()

Unnamed: 0,Text,Label
0,good acronym copper nanotubes Definitely,-1
2,GlycemicIndex diet restricted energy effective...,1
3,higher fibre intake partic cereal fibre reduce...,1
4,next life going research copper nanotubes CuNTs,-1
6,Bean rich diet produces equivalent weight loss...,1


In [6]:
df_test.head()

Unnamed: 0,Text,Label
0,Yeah paper ebirdf,1
2,platform Bioinformatics paper advanced access ...,1
4,Duan naturally award Best Science Acronym year,-1
5,Everything Chinese turns swear word think karma,-1
6,dear difficulties finding scientific abbreviat...,-1


In [7]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 470 entries, 0 to 730
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Text    470 non-null    object
 1   Label   470 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 11.0+ KB


In [8]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 201 entries, 0 to 313
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Text    201 non-null    object
 1   Label   201 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 4.7+ KB


In [9]:
df_train.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Label,470.0,0.029787,1.000621,-1.0,-1.0,1.0,1.0,1.0


In [10]:
df_test.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Label,201.0,0.014925,1.002385,-1.0,-1.0,1.0,1.0,1.0


### Data Cleaning / EDA

In [12]:
# ### Checking Missing values in the Data Set and printing the Percentage for Missing Values for Each Columns ###

# count = df_train.isnull().sum().sort_values(ascending=False)
# percentage = ((df_train.isnull().sum()/len(df_train)*100)).sort_values(ascending=False)
# missing_data = pd.concat([count, percentage], axis=1, keys=['Count','Percentage'])

# print('Count and percentage of missing values for the columns:')

# missing_data

In [13]:
# ### Checking for the Distribution of Default ###
# import matplotlib.pyplot as plt
# %matplotlib inline
# print('Percentage for default\n')
# print(round(df_train.Is_Response.value_counts(normalize=True)*100,2))
# round(df_train.Is_Response.value_counts(normalize=True)*100,2).plot(kind='bar')
# plt.title('Percentage Distributions by review type')
# plt.show()

In [14]:
#Removing columns
#df_train.drop(columns = ['User_ID', 'Browser_Used', 'Device_Used'], inplace = True)

In [15]:
# #This function converts to lower-case, removes square bracket, removes numbers and punctuation
# def text_clean_1(text):
#     text = text.lower()
#     text = re.sub('\[.*?\]', '', text)
#     text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
#     text = re.sub('\w*\d\w*', '', text)
#     return text

# cleaned1 = lambda x: text_clean_1(x)

In [16]:
# # Apply first level cleaning

# # Let's take a look at the updated text
# df_train['cleaned_description'] = pd.DataFrame(df_train.Description.apply(cleaned1))
# df_train.head(10)

In [17]:
# # Apply a second round of cleaning
# def text_clean_2(text):
#     text = re.sub('[‘’“”…]', '', text)
#     text = re.sub('\n', '', text)
#     return text

# cleaned2 = lambda x: text_clean_2(x)

In [18]:
# # Let's take a look at the updated text
# df_train['cleaned_description_new'] = pd.DataFrame(df_train['cleaned_description'].apply(cleaned2))
# df_train.head(10)

### spliting the data.

In [11]:
#from sklearn.model_selection import train_test_split

#Independent_var = df_train.cleaned_description_new
#Dependent_var = df_train.Is_Response

#IV_train, IV_test, DV_train, DV_test = train_test_split(Independent_var, Dependent_var, test_size = 0.1, random_state = 225)

IV_train = df_train.Text
DV_train = df_train.Label
IV_test = df_test.Text
DV_test = df_test.Label


print('IV_train :', len(IV_train))
print('IV_test  :', len(IV_test))
print('DV_train :', len(DV_train))
print('DV_test  :', len(DV_test))


IV_train : 470
IV_test  : 201
DV_train : 470
DV_test  : 201


### Model training 

### Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression

#TF-IDF
# Convert x_train to vector
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
tfidf_train = tfidf_vectorizer.fit_transform(IV_train).toarray()
tfidf_test = tfidf_vectorizer.transform(IV_test).toarray()

model=LogisticRegression()
model.fit(tfidf_train, DV_train) 


In [13]:
y_pred = model.predict(tfidf_test)
y_prob = model.predict_proba(tfidf_test)[:,1]

In [14]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(y_pred, DV_test))
print("Precision : ", precision_score(y_pred, DV_test, average = 'weighted'))
print("Recall : ", recall_score(y_pred, DV_test, average = 'weighted'))
print("F1 Score : ", recall_score(y_pred, DV_test, average = 'weighted'))

Accuracy :  0.9303482587064676
Precision :  0.9305670104616549
Recall :  0.9303482587064676
F1 Score :  0.9303482587064676


In [15]:
!pip install transformers




[notice] A new release of pip available: 22.2.1 -> 22.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [16]:
import shap
import transformers


tokenizer = transformers.AutoTokenizer.from_pretrained("nateraw/bert-base-uncased-emotion", use_fast=True)
model = transformers.AutoModelForSequenceClassification.from_pretrained("nateraw/bert-base-uncased-emotion")

#classifier = transformers.pipeline('text-classification', model = model, tokenizer=tokenizer, return_all_scores=True)
classifier = transformers.pipeline('sentiment-analysis', return_all_scores=True)



  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
`return_all_scores` is now deprecated, use `top_k=1` if you want similar functionnality


In [17]:
classifier(df_train.Text[:3].tolist())

[[{'label': 'NEGATIVE', 'score': 0.0016647680895403028},
  {'label': 'POSITIVE', 'score': 0.9983353018760681}],
 [{'label': 'NEGATIVE', 'score': 0.996561586856842},
  {'label': 'POSITIVE', 'score': 0.003438386367633939}],
 [{'label': 'NEGATIVE', 'score': 0.35527631640434265},
  {'label': 'POSITIVE', 'score': 0.644723653793335}]]

In [18]:
#targets.tolist()
targets = df_train.Label.unique()

In [19]:
explainer = shap.Explainer(classifier , output_names= targets)

In [20]:
shap_values = explainer(df_test['Text'][:3])
shap.plots.text(shap_values)

Partition explainer: 4it [00:28, 14.41s/it]               


In [None]:
import shap

explainer = shap.LinearExplainer(model, tfidf_train, feature_dependence="independent")
shap_values = explainer.shap_values(tfidf_test)
X_test_array = pd.DataFrame(tfidf_test) #.toarray() 

In [None]:
#1

ind = 0
shap.force_plot(
    explainer.expected_value, shap_values[ind,:], X_test_array.iloc[ind,:],
    feature_names=tfidf_vectorizer.get_feature_names()
)

In [None]:
print("Positive" if DV_test[ind] else "Negative", "Review:")
print(IV_test[ind])

In [None]:
#2
ind = 2
shap.force_plot(explainer.expected_value, shap_values[ind,:], X_test_array.iloc[ind,:],
    feature_names=tfidf_vectorizer.get_feature_names()
)

In [None]:
print("Positive" if DV_test[ind] else "Negative", "Review:")
print(IV_test[ind])

In [None]:
#3
ind = 4
shap.force_plot(explainer.expected_value, shap_values[ind,:], X_test_array.iloc[ind,:],
    feature_names=tfidf_vectorizer.get_feature_names()
)

In [None]:
print("Positive" if DV_test[ind] else "Negative", "Review:")
print(IV_test[ind])

In [None]:
DV_test

In [None]:
shap.plots.text(shap_values[ind,:])

In [None]:
# explain the model's predictions on IMDB reviews
imdb_train = IV_train #nlp.load_dataset("imdb")["train"]
shap_values = explainer(imdb_train[:10])

In [None]:
import shap

# subtracting the samples
tfidf_train_sample = tfidf_train [0:100]
tfidf_test_sample = tfidf_test [0:20]

# creating the KernelExplainer using the logistic regression model and training sample
SHAP_explainer = shap.KernelExplainer(model.predict, tfidf_train_sample)
# calculating the shap values of the test sample using the explainer 
shap_values = SHAP_explainer.shap_values(tfidf_test_sample)

In [None]:
tfidf_train_sample

In [None]:
# converting the test samples to a dataframe 
# this is necessary for non-tabular data in order for the visualisations 
# to include feature value
colour_test = pd.DataFrame(tfidf_test_sample)

In [None]:
shap.summary_plot(shap_values, colour_test, feature_names=tfidf_vectorizer.get_feature_names())

In [None]:
colour_test.iloc[2:].tail()

In [None]:
colour_test.iloc[2,:].tail()

In [None]:
SHAP_explainer.expected_value

In [None]:
shap_values[1,:]

In [None]:
df_test

In [None]:
shap.initjs()

shap.force_plot(SHAP_explainer.expected_value, shap_values[1,:], colour_test.iloc[1,:], feature_names=tfidf_vectorizer.get_feature_names())

In [None]:
model.predict_proba

In [None]:
IV_test

In [None]:
# explainer1 = shap.Explainer(model.predict)
# shap_values1 = explainer1(tfidf_test[0:2])

import shap

feature_names = tfidf_vectorizer.get_feature_names_out()
explainer = shap.Explainer(model, tfidf_train, feature_names=feature_names)
shap_values = explainer(tfidf_test)
print(shap_values.values.shape) # (201, 1368)

In [None]:
IV_test.head(1)

In [None]:
ind = 2
print(df_test.iloc[ind])

shap.plots.force(shap_values[ind])
# shap.plots.waterfall(shap_values[ind,:,1])

In [None]:
shap.force_plot(explainer, shap_values[1,:], 
                colour_test.iloc[1,:], feature_names=tfidf_vectorizer.get_feature_names())

In [None]:
explainer = shap.Explainer(model.predict) 
shap_values = explainer("I love Burundian coffee! let's #Visit Burundi.")

  # visualize the first prediction's explanation for the POSITIVE output class
shap.plots.text(shap_values[0, :, "POSITIVE"])

In [None]:
# https://medium.com/nlplanet/two-minutes-nlp-explain-predictions-with-shap-values-2a0e34219177

import shap

feature_names = tfidf_vectorizer.get_feature_names_out()
explainer = shap.Explainer(model, tfidf_train, feature_names=feature_names)
shap_values = explainer(tfidf_test)
print(shap_values.values.shape) # (5000, 16438, 2)

In [None]:
shap.initjs()

ind = 1
#print(IV_test[ind])
# This amazing documentary gives us a glimpse into the lives of the brave
# women in Cameroun's judicial system-- policewomen, lawyers and judges. Despite
# tremendous difficulties-- lack of means, the desperate poverty of the people, multiple
# languages and multiple legal precedents depending on the region of the country and
# the religious/ethnic background of the plaintiffs and defendants-- these brave,
# strong women are making a difference. [...]

shap.plots.text(shap_values[ind,:,0])

In [None]:
feature_names = tfidf_vectorizer.get_feature_names_out()
feature_names

In [None]:
shap.plots.text(shap_values[3])

In [None]:
num_explanations = 20
shap_vals = explainer.shap_values(IV_test[:num_explanations])

feature_names = tfidf_vectorizer.get_feature_names_out()
shap.summary_plot(shap_vals, feature_names=feature_names, class_names=[1,-1])

# visualize the first prediction's explanation for the POSITIVE output class
#shap.plots.text(shap_values[:, "1"])

In [None]:
explainer = shap.Explainer(model.predict , output_names= [1,-1])

In [None]:
shap_values = explainer(tfidf_test[0:3])
shap.plots.text(shap_values)

### Support Vector Machine

In [None]:
from sklearn.svm import SVC
from sklearn import metrics
svc=SVC(probability=True) #Default hyperparameters

model = Pipeline([('vectorizer',tvec), ('classifier',svc)] )
model.fit(IV_train, DV_train)
predictions = model.predict(IV_test)


# svc.fit(X_train,y_train)
# y_pred=svc.predict(X_test)
# print('Accuracy Score:')
# print(metrics.accuracy_score(y_test,y_pred))

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(predictions, DV_test))
print("Precision : ", precision_score(predictions, DV_test, average = 'weighted'))
print("Recall : ", recall_score(predictions, DV_test, average = 'weighted'))
print("F1 Score : ", recall_score(predictions, DV_test, average = 'weighted'))

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[-1,1])

In [None]:
idx =64
df_train.iloc[idx]

In [None]:
idx =0
exp = explainer.explain_instance(df_train.cleaned_description_new[idx], model.predict_proba, num_features=10)
#print('True class: %s' % reviews_test.label[idx])
exp.show_in_notebook(text=True)
exp

### GaussianNB

In [None]:
# # Create the tf-idf vectorizer
# vectorizer = TfidfVectorizer(strip_accents='ascii')

# # First fit the vectorizer with our training set
# tfidf_train = vectorizer.fit_transform(IV_train)

# # Now we can fit our test data with the same vectorizer
# tfidf_test = vectorizer.transform(IV_test)

# # https://www.kaggle.com/code/barishasdemir/classification-with-naive-bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

#TF-IDF
# Convert x_train to vector
tfidf_vectorizer = TfidfVectorizer()
tfidf_train = tfidf_vectorizer.fit_transform(IV_train).toarray()
tfidf_test = tfidf_vectorizer.transform(IV_test).toarray()

model=GaussianNB()
model.fit(tfidf_train, DV_train) 

In [None]:
#prediction

y_pred = model.predict(tfidf_test)
y_prob = model.predict_proba(tfidf_test)[:,1]

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[-1,1])

In [None]:
from sklearn.pipeline import make_pipeline

# Explaining the predictions and important features for predicting the label 1
c = make_pipeline(tfidf_vectorizer, model)

In [None]:
idx =0
IV_test.iloc[idx]

In [None]:
idx =0
explainer = LimeTextExplainer(class_names=[1,-1])
# classifier_fn is the probability function that takes a string and returns prediction probabilities.
# num_features is the max. number of features we want in the explanation(default is 10).
# labels=(1,) means we want the explanation for the label 1
exp = explainer.explain_instance(IV_test.iloc[idx], c.predict_proba, num_features=10)
exp.show_in_notebook()

### MultinomialNB

In [None]:
from sklearn.naive_bayes import MultinomialNB

#TF-IDF
# Convert x_train to vector
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
tfidf_train = tfidf_vectorizer.fit_transform(IV_train) 
tfidf_test = tfidf_vectorizer.transform(IV_test)

model=MultinomialNB()
model.fit(tfidf_train, DV_train) 


In [None]:
#prediction

y_pred = model.predict(tfidf_test.todense())
y_prob = model.predict_proba(tfidf_test.todense())[:,1]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(y_pred, DV_test))
print("Precision : ", precision_score(y_pred, DV_test, average = 'weighted'))
print("Recall : ", recall_score(y_pred, DV_test, average = 'weighted'))
print("F1 Score : ", recall_score(y_pred, DV_test, average = 'weighted'))

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[-1,1])

In [None]:
from sklearn.pipeline import make_pipeline

# Explaining the predictions and important features for predicting the label 1
c = make_pipeline(tfidf_vectorizer, model)

In [None]:
idx =0
explainer = LimeTextExplainer(class_names=[1,-1])
# classifier_fn is the probability function that takes a string and returns prediction probabilities.
# num_features is the max. number of features we want in the explanation(default is 10).
# labels=(1,) means we want the explanation for the label 1
exp = explainer.explain_instance(IV_test.iloc[15], c.predict_proba, num_features=10)
exp.show_in_notebook()

### KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier 

#TF-IDF
# Convert x_train to vector
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
tfidf_train = tfidf_vectorizer.fit_transform(IV_train) 
tfidf_test = tfidf_vectorizer.transform(IV_test)

model=KNeighborsClassifier()
model.fit(tfidf_train, DV_train) 

In [None]:
#prediction

y_pred = model.predict(tfidf_test.todense())
y_prob = model.predict_proba(tfidf_test.todense())[:,1]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(y_pred, DV_test))
print("Precision : ", precision_score(y_pred, DV_test, average = 'weighted'))
print("Recall : ", recall_score(y_pred, DV_test, average = 'weighted'))
print("F1 Score : ", recall_score(y_pred, DV_test, average = 'weighted'))

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[-1,1])

In [None]:
from sklearn.pipeline import make_pipeline

# Explaining the predictions and important features for predicting the label 1
c = make_pipeline(tfidf_vectorizer, model)

In [None]:
idx =0
explainer = LimeTextExplainer(class_names=[1,-1])
# classifier_fn is the probability function that takes a string and returns prediction probabilities.
# num_features is the max. number of features we want in the explanation(default is 10).
# labels=(1,) means we want the explanation for the label 1
exp = explainer.explain_instance(IV_test.iloc[15], c.predict_proba, num_features=10)
exp.show_in_notebook()

### Deceion Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

#TF-IDF
# Convert x_train to vector
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
tfidf_train = tfidf_vectorizer.fit_transform(IV_train) 
tfidf_test = tfidf_vectorizer.transform(IV_test)

model=DecisionTreeClassifier(random_state = 1)
model.fit(tfidf_train, DV_train) 

In [None]:
#prediction

y_pred = model.predict(tfidf_test.todense())
y_prob = model.predict_proba(tfidf_test.todense())[:,1]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(y_pred, DV_test))
print("Precision : ", precision_score(y_pred, DV_test, average = 'weighted'))
print("Recall : ", recall_score(y_pred, DV_test, average = 'weighted'))
print("F1 Score : ", recall_score(y_pred, DV_test, average = 'weighted'))

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[1 ,-1])

In [None]:
from sklearn.pipeline import make_pipeline

# Explaining the predictions and important features for predicting the label 1
c = make_pipeline(tfidf_vectorizer, model)

In [None]:
idx =0

explainer = LimeTextExplainer(class_names=[1,-1])
# classifier_fn is the probability function that takes a string and returns prediction probabilities.
# num_features is the max. number of features we want in the explanation(default is 10).
# labels=(1,) means we want the explanation for the label 1
exp = explainer.explain_instance(IV_test.iloc[15], c.predict_proba, num_features=10)
exp.show_in_notebook()

### Random Forest 

In [None]:
# https://www.kdnuggets.com/2022/01/explain-nlp-models-lime.html

from sklearn.ensemble import RandomForestClassifier

#TF-IDF
# Convert x_train to vector
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
tfidf_train = tfidf_vectorizer.fit_transform(IV_train) 
tfidf_test = tfidf_vectorizer.transform(IV_test)

model=RandomForestClassifier(n_estimators = 100, random_state = 10)
model.fit(tfidf_train, DV_train) 

In [None]:
#prediction

y_pred = model.predict(tfidf_test.todense())
y_prob = model.predict_proba(tfidf_test.todense())[:,1]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print("Accuracy : ", accuracy_score(y_pred, DV_test))
print("Precision : ", precision_score(y_pred, DV_test, average = 'weighted'))
print("Recall : ", recall_score(y_pred, DV_test, average = 'weighted'))
print("F1 Score : ", recall_score(y_pred, DV_test, average = 'weighted'))

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[0,1])

In [None]:
idx =64
df_train.iloc[idx]

In [None]:
from sklearn.pipeline import make_pipeline

# Explaining the predictions and important features for predicting the label 1
c = make_pipeline(tfidf_vectorizer, model)

In [None]:
c

In [None]:
IV_test.iloc[15]

In [None]:
idx =0

explainer = LimeTextExplainer(class_names=[1,-1])
# classifier_fn is the probability function that takes a string and returns prediction probabilities.
# num_features is the max. number of features we want in the explanation(default is 10).
# labels=(1,) means we want the explanation for the label 1
exp = explainer.explain_instance(IV_test.iloc[15], c.predict_proba, num_features=10)
exp.show_in_notebook()


### XGBOOST

In [None]:
!pip install xgboost


In [None]:
from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=100)
training_start = time.perf_counter()
xgb.fit(X_train, y_train)
training_end = time.perf_counter()
prediction_start = time.perf_counter()
preds = xgb.predict(X_test)
prediction_end = time.perf_counter()
acc_xgb = (preds == y_test).sum().astype(float) / len(preds)*100
xgb_train_time = training_end-training_start
xgb_prediction_time = prediction_end-prediction_start
print("XGBoost's prediction accuracy is: %3.2f" % (acc_xgb))
print("Time consumed for training: %4.3f" % (xgb_train_time))
print("Time consumed for prediction: %6.5f seconds" % (xgb_prediction_time))

### Trying on new reviews 

In [None]:
example = ["I'm not happy"]
result = model.predict(example)

print(result)

### Apply LIME 

In [None]:
#import lime
#import lime.lime_text
from lime_text import LimeTextExplainer

# from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=[0,1])

In [None]:
idx =64
df_train.iloc[idx]

In [None]:
idx =0
exp = explainer.explain_instance(df_train.cleaned_description_new[idx], model.predict_proba, num_features=10)
#print('True class: %s' % reviews_test.label[idx])
exp.show_in_notebook(text=True)
exp

In [None]:
#df_train