### Importing Libraries

In [39]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import pandas
import re
import string
import pickle

### Loading Data and HurtLex

In [40]:
df = pandas.read_csv('data.csv')
lexicon = pandas.read_csv('New_Lexicon.csv')
hurt_words = set(lexicon['clean'])

### Vector Models
Start with converting text into feature vectors using Count Vectorizer and TF-IDF Vectorizer

Encode the labels using preprocessing.LabelEncoder

In [41]:
encoder = preprocessing.LabelEncoder()
targets = encoder.fit_transform(df['sentiment'])

Then split the data for training and testing. Use 70% for training and 30% for testing. Since the collected data comes from multiple resources, shuffle it so the order won't affect the classification. 

In [42]:
# split the dataset into training and test datasets 70% training, 30% testing
train_x, test_x, train_y, test_y = model_selection.train_test_split(df['clean'].values.astype('U'), targets,test_size=0.3, random_state = 1)

### Count Vectors

To extract Count Vectors, simply use CountVectorizer function from sklearn.feature_extraction.tex. 

Notice that, fitting the Vectorizer is on training_set only.

In [43]:
# create a count vectorizer object 
count_vect = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}')
count_vect.fit(train_x)

# transform the training and test data using count vectorizer object
xtrain_count =  count_vect.transform(train_x)
xtest_count =  count_vect.transform(test_x)

### TF-IDF Vectors

Same idea for TF-IDF Vectorizer. Use TfidfVectorizer for Word-Level, N-gram-Level, Char-Level.

In [44]:
# word level tf-idf
tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}')
tfidf_vect.fit(train_x)
xtrain_tfidf =  tfidf_vect.transform(train_x)
xtest_tfidf =  tfidf_vect.transform(test_x)

# ngram level tf-idf 
tfidf_vect_ngram = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', ngram_range=(2,3))
tfidf_vect_ngram.fit(train_x)
xtrain_tfidf_ngram =  tfidf_vect_ngram.transform(train_x)
xtest_tfidf_ngram =  tfidf_vect_ngram.transform(test_x)

# characters level tf-idf
tfidf_vect_ngram_chars = TfidfVectorizer(analyzer='char', ngram_range=(2,3))
tfidf_vect_ngram_chars.fit(train_x)
xtrain_tfidf_ngram_chars =  tfidf_vect_ngram_chars.transform(train_x) 
xtest_tfidf_ngram_chars =  tfidf_vect_ngram_chars.transform(test_x) 

### NLP Features

Start with NLP based feature model.

Since Arabic has its own characters, extend the existing punctuation list, which only contains the English punctuations, and add the Arabic punctuations.

In [45]:
arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ'''
english_punctuations = string.punctuation
punctuations_list = arabic_punctuations + english_punctuations

The extracted features from the text are:

- char_count : number of characters in the tweet.
- word_count : number of words in the tweet.
- word_density : average word length in the tweet.
- punctuation_count : number of punctuations in the tweet.
- hashtag : number of hash tags in the tweet.
- hate_words_count : count of HurtLex words in the tweet.
- hate_word : a binary flag to indicate whether a tweet contains a word from HurtLex or not.

In [46]:
df['char_count'] = df['tweet'].apply(len)
df['word_count'] = df['tweet'].apply(lambda x: len(x.split()))
df['word_density'] = df['char_count'] / (df['word_count'])
df['punctuation_count'] = df['tweet'].apply(lambda x: len("".join(_ for _ in x if _ in punctuations_list)))
df['hashtag'] = df['tweet'].apply(lambda x: len(re.findall(r"#(\w+)", x)))
df['hate_words_count'] = df['clean'].apply(lambda x: len([wrd for wrd in hurt_words if wrd in str(x)]))
df['hate_word'] = df['clean'].apply(lambda x: 1 if (len([wrd for wrd in hurt_words if wrd in str(x)])) > 0 else 0)

In [47]:
NLP_features = ['char_count', 'word_count', 'word_density', 'punctuation_count', 'hate_words_count', 'hate_word', 'hashtag']

Use 70% of the extracted features for training and 30% for testing.

In [48]:
# split the dataset into training and test datasets 70% training, 30% testing
NLP_train_x, NLP_test_x, NLP_train_y, NLP_test_y = model_selection.train_test_split(df[NLP_features], targets, test_size=0.3, random_state = 1)

Define a train_test_model function. Pass the model, train_set, train_target, test_set and test_target as parameters. The function returns four metric scores: Accuracy, Precision, Recall, F1_Score

In [49]:
def train_test_model(model, train_set, train_target, test_set, test_target, final=False, model_name=None):
    
    # fit the training dataset on the classifier
    model.fit(train_set, train_target)
    if final:
        pickle.dump(model, open(model_name, 'wb'))
    # GET PREDICTED VALUES
    test_predictions = model.predict(test_set)
    
    # GET EVALUATION NUMBERS ON TRAIN SET 
    accuracy = metrics.accuracy_score(test_target, test_predictions, normalize=True)
    precision = metrics.precision_score(test_target, test_predictions, average='macro', zero_division='warn')
    recall = metrics.recall_score(test_target, test_predictions, average='macro', zero_division='warn')
    f1 = metrics.f1_score(test_target, test_predictions, average='macro', zero_division='warn')
    
    return accuracy, precision, recall, f1

## Results

## Naive Bayes

In [50]:
# Naive Bayes on Count Vectors
accuracy, precision, recall, f1 = train_test_model(naive_bayes.MultinomialNB(), xtrain_count, train_y, xtest_count, test_y)
print("NB, Count Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Naive Bayes on Word Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(naive_bayes.MultinomialNB(), xtrain_tfidf, train_y, xtest_tfidf, test_y)
print ("NB, WordLevel TF-IDF: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Naive Bayes on Ngram Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(naive_bayes.MultinomialNB(), xtrain_tfidf_ngram, train_y, xtest_tfidf_ngram, test_y)
print ("NB, N-Gram Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Naive Bayes on Character Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(naive_bayes.MultinomialNB(), xtrain_tfidf_ngram_chars, train_y, xtest_tfidf_ngram_chars, test_y)
print ("NB, CharLevel Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Naive Bayes on NLP Features
accuracy, precision, recall, f1 = train_test_model(naive_bayes.MultinomialNB(), NLP_train_x, NLP_train_y, NLP_test_x, NLP_test_y)
print("NB, NLP Features: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

NB, Count Vectors: accuracy : 0.7434, precision : 0.6853, recall : 0.5156, f1_score : 0.5526
NB, WordLevel TF-IDF: accuracy : 0.7105, precision : 0.8348, recall : 0.3981, f1_score : 0.3879
NB, N-Gram Vectors: accuracy : 0.7019, precision : 0.8498, recall : 0.3850, f1_score : 0.3660
NB, CharLevel Vectors: accuracy : 0.7032, precision : 0.7704, recall : 0.3916, f1_score : 0.3784
NB, NLP Features: accuracy : 0.6682, precision : 0.4451, recall : 0.3803, f1_score : 0.3688


## Logistic Regression

In [51]:
# Logistic Regression on Count Vectors
accuracy, precision, recall, f1 = train_test_model(linear_model.LogisticRegression(verbose=1, solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000), xtrain_count, train_y, xtest_count, test_y)
print ("LR, Count Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Logistic Regression on Word Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(linear_model.LogisticRegression(verbose=1, solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000), xtrain_tfidf, train_y, xtest_tfidf, test_y)
print ("LR, WordLevel TF-IDF: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Logistic Regression on Ngram Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(linear_model.LogisticRegression(verbose=1, solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000), xtrain_tfidf_ngram, train_y, xtest_tfidf_ngram, test_y)
print ("LR, N-Gram Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Logistic Regression on Character Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(linear_model.LogisticRegression(verbose=1, solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000), xtrain_tfidf_ngram_chars, train_y, xtest_tfidf_ngram_chars, test_y)
print ("LR, CharLevel Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Linear Classifier on NLP Features
accuracy, precision, recall, f1 = train_test_model(linear_model.LogisticRegression(verbose=1, solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000), NLP_train_x, NLP_train_y, NLP_test_x, NLP_test_y)
print ("LR, NLP Features: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

[LibLinear]LR, Count Vectors: accuracy : 0.7353, precision : 0.6474, recall : 0.5776, f1_score : 0.6023
[LibLinear]LR, WordLevel TF-IDF: accuracy : 0.7515, precision : 0.6854, recall : 0.5534, f1_score : 0.5913
[LibLinear]LR, N-Gram Vectors: accuracy : 0.7047, precision : 0.6530, recall : 0.4251, f1_score : 0.4357
[LibLinear]LR, CharLevel Vectors: accuracy : 0.7468, precision : 0.6669, recall : 0.5629, f1_score : 0.5956
[LibLinear]LR, NLP Features: accuracy : 0.6811, precision : 0.5743, recall : 0.3740, f1_score : 0.3525


## SVM

In [52]:
# SVM on Count Vectors
accuracy,precision,recall,f1 = train_test_model(svm.LinearSVC(), xtrain_count, train_y, xtest_count, test_y)
print ("SVM, Count Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# SVM on Word Level TF IDF Vectors
accuracy,precision,recall,f1 = train_test_model(svm.LinearSVC(), xtrain_tfidf, train_y, xtest_tfidf, test_y)
print ("SVM, WordLevel TF-IDF: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# SVM on Ngram Level TF IDF Vectors
accuracy,precision,recall,f1 = train_test_model(svm.LinearSVC(), xtrain_tfidf_ngram, train_y, xtest_tfidf_ngram, test_y)
print ("SVM, N-Gram Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# SVM on Character Level TF IDF Vectors
accuracy, precision, recall, f1 = train_test_model(svm.LinearSVC(), xtrain_tfidf_ngram_chars, train_y, xtest_tfidf_ngram_chars, test_y)
print ("SVM, CharLevel Vectors: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# SVM on NLP Features
accuracy,precision,recall,f1 = train_test_model(svm.LinearSVC(dual=False), NLP_train_x, NLP_train_y, NLP_test_x, NLP_test_y)
print ("SVM, NLP Features: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

SVM, Count Vectors: accuracy : 0.7124, precision : 0.6146, recall : 0.5773, f1_score : 0.5911
SVM, WordLevel TF-IDF: accuracy : 0.7445, precision : 0.6655, recall : 0.5701, f1_score : 0.6024
SVM, N-Gram Vectors: accuracy : 0.7055, precision : 0.6456, recall : 0.4419, f1_score : 0.4612
SVM, CharLevel Vectors: accuracy : 0.7408, precision : 0.6577, recall : 0.5670, f1_score : 0.5973
SVM, NLP Features: accuracy : 0.6754, precision : 0.3972, recall : 0.3587, f1_score : 0.3260


  _warn_prf(average, modifier, msg_start, len(result))


# Combine All Feature Vectors

To combine our feature vectors, use the predictions of the previous vector models as features and add them to the NLP based features and re-train three new models.

To get the predictions, fit the model on the whole dataset and predict on the same set since there is no need for training and testing. 

In [53]:
def model_feature(model, train_set, train_target, name):
    
    # fit the training dataset on the classifier
    model.fit(train_set, train_target)

    # GET PREDICTED VALUES
    train_predictions = model.predict(train_set)
    with open(name, 'wb') as handle:
        pickle.dump(model, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return train_predictions

In [54]:
def save_sth(sth, name):
    with open(name, 'wb') as handle:
        pickle.dump(sth, handle, protocol=pickle.HIGHEST_PROTOCOL)

Compute the Count Vectors and TF-IDF Vectors again but this time, fit the vectorizers on the whole data.

In [55]:
# create a count vectorizer object 
count_vect = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}')
count_vect.fit(df['clean'].values.astype('U'))

# transform the training and test data using count vectorizer object
CV_features =  count_vect.transform(df['clean'].values.astype('U'))

In [56]:
# word level tf-idf
tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}')
tfidf_vect.fit(df['clean'].values.astype('U'))
tfidf_word_features =  tfidf_vect.transform(df['clean'].values.astype('U'))

# ngram level tf-idf 
tfidf_vect_ngram = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', ngram_range=(2,3))
tfidf_vect_ngram.fit(df['clean'].values.astype('U'))
tfidf_ngram_features =  tfidf_vect_ngram.transform(df['clean'].values.astype('U'))

# characters level tf-idf
tfidf_vect_ngram_chars = TfidfVectorizer(analyzer='char', token_pattern=r'\w{1,}', ngram_range=(2,3))
tfidf_vect_ngram_chars.fit(df['clean'].values.astype('U'))
tfidf_char_features =  tfidf_vect_ngram_chars.transform(df['clean'].values.astype('U')) 



In [57]:
# save_sth(count_vect, name='count_vect')
# save_sth(tfidf_vect, name='tfidf_vect')
# save_sth(tfidf_vect_ngram, name='tfidf_vect_ngram')
# save_sth(tfidf_vect_ngram_chars, name='tfidf_vect_ngram_chars')

In [58]:
df['NB_CountVector_feature'] = model_feature(naive_bayes.MultinomialNB(),CV_features, targets, name='NB_CountVector_feature')
df['NB_tfidf_word_feature'] = model_feature(naive_bayes.MultinomialNB(),tfidf_word_features, targets, name='NB_tfidf_word_feature')
df['NB_tfidf_ngram_feature'] = model_feature(naive_bayes.MultinomialNB(),tfidf_ngram_features, targets, name='NB_tfidf_ngram_feature')
df['NB_tfidf_char_feature'] = model_feature(naive_bayes.MultinomialNB(),tfidf_char_features, targets, name='NB_tfidf_char_feature')

In [59]:
df['LR_CountVector_feature'] = model_feature(linear_model.LogisticRegression(solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000),CV_features, targets, name='LR_CountVector_feature')
df['LR_tfidf_word_feature'] = model_feature(linear_model.LogisticRegression(solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000),tfidf_word_features, targets, name='LR_tfidf_word_feature')
df['LR_tfidf_ngram_feature'] = model_feature(linear_model.LogisticRegression(solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000),tfidf_ngram_features, targets, name='LR_tfidf_ngram_feature')
df['LR_tfidf_char_feature'] = model_feature(linear_model.LogisticRegression(solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000),tfidf_char_features, targets, name='LR_tfidf_char_feature')

In [60]:
df['SVM_CountVector_feature'] = model_feature(svm.LinearSVC(), CV_features, targets, name='SVM_CountVector_feature')
df['SVM_tfidf_word_feature'] = model_feature(svm.LinearSVC(), tfidf_word_features, targets, name='SVM_tfidf_word_feature')
df['SVM_tfidf_ngram_feature'] = model_feature(svm.LinearSVC(), tfidf_ngram_features, targets, name='SVM_tfidf_ngram_feature')
df['SVM_tfidf_char_feature'] = model_feature(svm.LinearSVC(), tfidf_char_features, targets, name='SVM_tfidf_char_feature')

In [61]:
combined_features = ['char_count', 'word_count', 'word_density', 'punctuation_count', 'hate_words_count', 'hate_word', 'hashtag', 
            'NB_CountVector_feature', 'NB_tfidf_word_feature', 'NB_tfidf_ngram_feature','NB_tfidf_char_feature',
            'LR_CountVector_feature','LR_tfidf_word_feature','LR_tfidf_ngram_feature','LR_tfidf_char_feature',
            'SVM_CountVector_feature','SVM_tfidf_word_feature','SVM_tfidf_ngram_feature','SVM_tfidf_char_feature']

In [62]:
# split the dataset into training and test datasets 75% training, 25% testing
train_x, test_x, train_y, test_y = model_selection.train_test_split(df[combined_features], targets, test_size=0.3, random_state = 1)


In [63]:
# save final feature dataset
train_x.to_csv('train_x.csv')
test_x.to_csv('test_x.csv')
pandas.DataFrame({'sentiment': train_y}).to_csv('train_y.csv')
pandas.DataFrame({'sentiment': test_y}).to_csv('test_y.csv')

## Results

In [64]:
# Naive Bayes on Combined Features
accuracy, precision, recall, f1 = train_test_model(naive_bayes.MultinomialNB(), train_x, train_y, test_x, test_y, final=True, model_name="NB")
print("NB, Combined Features: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# Linear Classifier on Combined Features
accuracy, precision, recall, f1 = train_test_model(linear_model.LogisticRegression(verbose=1, solver='liblinear',random_state=0, C=5, penalty='l2',max_iter=1000), train_x, train_y, test_x, test_y, final=True, model_name="LR")
print ("LR, Combined Features: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

# SVM on Combined Features
accuracy,precision,recall,f1 = train_test_model(svm.LinearSVC(dual=False), train_x, train_y, test_x, test_y, final=True, model_name="SVM")
print ("SVM, Combined Features: accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

NB, Combined Features: accuracy : 0.9184, precision : 0.8073, recall : 0.8506, f1_score : 0.8252
[LibLinear]LR, Combined Features: accuracy : 0.9872, precision : 0.9780, recall : 0.9671, f1_score : 0.9725
SVM, Combined Features: accuracy : 0.9851, precision : 0.9774, recall : 0.9635, f1_score : 0.9703


Load Model

In [65]:
trained_model = pickle.load(open("LR", 'rb'))
test_x = pandas.read_csv('test_x.csv')
test_y = pandas.read_csv('test_y.csv')

In [68]:
def test_final(model, test_x, test_y):
    
    # GET PREDICTED VALUES
    test_predictions = model.predict(test_x)
    
    # GET EVALUATION NUMBERS ON TRAIN SET 
    accuracy = metrics.accuracy_score(test_y, test_predictions, normalize=True)
    precision = metrics.precision_score(test_y, test_predictions, average='macro', zero_division='warn')
    recall = metrics.recall_score(test_y, test_predictions, average='macro', zero_division='warn')
    f1 = metrics.f1_score(test_y, test_predictions, average='macro', zero_division='warn')
    
    return accuracy, precision, recall, f1

In [69]:
accuracy, precision, recall, f1 = test_final(trained_model, test_x[combined_features], test_y['sentiment'])
print ("accuracy : %.4f, precision : %.4f, recall : %.4f, f1_score : %.4f" %(accuracy, precision,recall,f1))

accuracy : 0.9872, precision : 0.9780, recall : 0.9671, f1_score : 0.9725


In [73]:
import test_live
import pickle
import numpy as np
import random

In [3]:
best_model = pickle.load(open("LR", 'rb'))



In [62]:
examples = ["هاللاعب معلم",
           "هالممثل محترف",
           "يا جحش",
           "يا بغل",
           "يا ثعلب"]

In [74]:
df = pandas.read_csv('data.csv')
# split the dataset into training and test datasets 70% training, 30% testing
# train_x, test_x, train_y, test_y = model_selection.train_test_split(df['clean'].values.astype('U'), targets,test_size=0.3, random_state = 1)

In [86]:
# live_test = "هاللاعب فعلا حريف"
count = 0
for i in range(1000):
    choice = random.randrange(len(df)) 
    sen = df['tweet'][choice]
    label = df['sentiment'][choice]
    if len(sen) > 0:
        live_df = test_live.prepare_live(sen)
        pred = encoder.inverse_transform(best_model.predict(live_df))[0]
        if pred != label:
            count+=1
            print(f"sentence = {sen} \n")
            print(f"Actual = {label}, \t pred = {pred} \n")
print (float(count)/1000)

sentence = يوم وسخ 

Actual = hateful, 	 pred = normal 

sentence = هو البدوي شارب بول البعير ليه بيوافق نسوانه تشوف اجسام و فخاد و حمامة رجال مفتولي العضلات و مش موافق الستات الرجال… @user 

Actual = normal, 	 pred = hateful 

sentence = لسلامة الحجاج اوقفوا مواكب الامراء أثناء الحج. موكب خالد الفيصل تسبب بمقتل واصابة الالاف بتدافع منى امام البوابات المغلقه 

Actual = normal, 	 pred = offensive 

sentence = مستهبل ومن لف لفيفها بصوت واحد الحق على جبران باسيل    

Actual = offensive, 	 pred = normal 

sentence = آه يا حمارويه 

Actual = offensive, 	 pred = normal 

sentence = جبران باسيل مهضوم كتير قال بدو يعمر سوريا و هوي ببلدو لبنان مش قادر يعمر مجرور 

Actual = offensive, 	 pred = normal 

sentence = لبيك اللهم لبيك ان الحمد والنعمت لك والملك لا شريك لك 

Actual = normal, 	 pred = offensive 

sentence = إلا خصني 

Actual = normal, 	 pred = hateful 

sentence = ما نايكن الا جبران باسيل عن جديد منا وجر 

Actual = offensive, 	 pred = normal 

sentence = الحريم اذا شافو واحد متزوج اربع 

In [None]:
sen = "يا بغل"
live_df = test_live.prepare_live(sen)
print(encoder.inverse_transform(best_model.predict(live_df)))