In [1]:
# import all the necessary libraries
import numpy as np
import pandas as pd
import re
import nltk
import spacy
import string

pd.options.mode.chained_assignment = None    #allowing you to modify a DataFrame derived from another one without receiving the warning. 

In [2]:
full_df = pd.read_csv("sample.csv")
print(full_df.head())

   tweet_id     author_id  inbound                      created_at  \
0    119237        105834     True  Wed Oct 11 06:55:44 +0000 2017   
1    119238  ChaseSupport    False  Wed Oct 11 13:25:49 +0000 2017   
2    119239        105835     True  Wed Oct 11 13:00:09 +0000 2017   
3    119240  VirginTrains    False  Tue Oct 10 15:16:08 +0000 2017   
4    119241        105836     True  Tue Oct 10 15:17:21 +0000 2017   

                                                text response_tweet_id  \
0  @AppleSupport causing the reply to be disregar...            119236   
1  @105835 Your business means a lot to us. Pleas...               NaN   
2  @76328 I really hope you all change but I'm su...            119238   
3  @105836 LiveChat is online at the moment - htt...            119241   
4  @VirginTrains see attached error message. I've...            119243   

   in_response_to_tweet_id  
0                      NaN  
1                 119239.0  
2                      NaN  
3                 

In [3]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   tweet_id                 93 non-null     int64  
 1   author_id                93 non-null     object 
 2   inbound                  93 non-null     bool   
 3   created_at               93 non-null     object 
 4   text                     93 non-null     object 
 5   response_tweet_id        65 non-null     object 
 6   in_response_to_tweet_id  68 non-null     float64
dtypes: bool(1), float64(1), int64(1), object(4)
memory usage: 4.6+ KB


In [4]:
full_df.shape

(93, 7)

In [5]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   tweet_id                 93 non-null     int64  
 1   author_id                93 non-null     object 
 2   inbound                  93 non-null     bool   
 3   created_at               93 non-null     object 
 4   text                     93 non-null     object 
 5   response_tweet_id        65 non-null     object 
 6   in_response_to_tweet_id  68 non-null     float64
dtypes: bool(1), float64(1), int64(1), object(4)
memory usage: 4.6+ KB


In [6]:
# Convert the Dtype of text column to string
df = full_df[["text"]]
df["text"] = df["text"].astype(str)
print(df.head())

                                                text
0  @AppleSupport causing the reply to be disregar...
1  @105835 Your business means a lot to us. Pleas...
2  @76328 I really hope you all change but I'm su...
3  @105836 LiveChat is online at the moment - htt...
4  @VirginTrains see attached error message. I've...


## Text Preprocessing
### Lower Casing

In [7]:
df['text_lower'] = df['text'].str.lower()
df.head()

Unnamed: 0,text,text_lower
0,@AppleSupport causing the reply to be disregar...,@applesupport causing the reply to be disregar...
1,@105835 Your business means a lot to us. Pleas...,@105835 your business means a lot to us. pleas...
2,@76328 I really hope you all change but I'm su...,@76328 i really hope you all change but i'm su...
3,@105836 LiveChat is online at the moment - htt...,@105836 livechat is online at the moment - htt...
4,@VirginTrains see attached error message. I've...,@virgintrains see attached error message. i've...


### Removal of the punctuation
Example of punctuation: **!"#$%&\'()*+,-./:;<=>?@[\\]^_{|}~`**

In [8]:
PUNCT_to_remove = string.punctuation

# custom function to remove punctuation
def remove_punctuation(text):
    return text.translate(str.maketrans("","",PUNCT_to_remove))

df["text_wo_punct"] = df["text_lower"].apply(lambda text: remove_punctuation(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_lower  \
0  @applesupport causing the reply to be disregar...   
1  @105835 your business means a lot to us. pleas...   
2  @76328 i really hope you all change but i'm su...   
3  @105836 livechat is online at the moment - htt...   
4  @virgintrains see attached error message. i've...   

                                       text_wo_punct  
0  applesupport causing the reply to be disregard...  
1  105835 your business means a lot to us please ...  
2  76328 i really hope you all change but im sure...  
3  105836 livechat is online at the moment  https...  
4  virgintrains see attached error message ive tr..

### Remove stopwords

In [9]:
from nltk.corpus import stopwords
", ".join(stopwords.words('english'))

"i, me, my, myself, we, our, ours, ourselves, you, you're, you've, you'll, you'd, your, yours, yourself, yourselves, he, him, his, himself, she, she's, her, hers, herself, it, it's, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, that'll, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s, t, can, will, just, don, don't, should, should've, now, d, ll, m, o, re, ve, y, ain, aren, aren't, couldn, couldn't, didn, didn't, doesn, doesn't, hadn, hadn't, hasn, hasn't, haven, haven't, isn, isn't, ma, mightn, mightn't, mustn, mus

In [10]:
STOPWORDS = set(stopwords.words('english'))

# Custom functions to remove stopwords
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

df["text_wo_sw"] = df["text_wo_punct"].apply(lambda text: remove_stopwords(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_lower  \
0  @applesupport causing the reply to be disregar...   
1  @105835 your business means a lot to us. pleas...   
2  @76328 i really hope you all change but i'm su...   
3  @105836 livechat is online at the moment - htt...   
4  @virgintrains see attached error message. i've...   

                                       text_wo_punct  \
0  applesupport causing the reply to be disregard...   
1  105835 your business means a lot to us please ...   
2  76328 i really hope you all change but im sure...   
3  105836 livechat is online at the moment  https...   
4  virgintrains see attached error message ive

### Removal of Frequent words

In [11]:
# Let's find out the most common words in the corpus
from collections import Counter
cnt = Counter()
for text in df['text_wo_sw'].values:
    for word in text.split():
        cnt[word] += 1
        
cnt.most_common(10)

[('us', 25),
 ('dm', 19),
 ('help', 18),
 ('thanks', 13),
 ('httpstcogdrqu22ypt', 12),
 ('applesupport', 11),
 ('please', 11),
 ('phone', 9),
 ('hi', 9),
 ('ive', 8)]

In [12]:
FREQWORD = set([w for (w,wc) in cnt.most_common(10)])

# Custom functions to remove frequent words
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in FREQWORD])

df["text_wo_FW"] = df["text_wo_sw"].apply(lambda text: remove_stopwords(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_lower  \
0  @applesupport causing the reply to be disregar...   
1  @105835 your business means a lot to us. pleas...   
2  @76328 i really hope you all change but i'm su...   
3  @105836 livechat is online at the moment - htt...   
4  @virgintrains see attached error message. i've...   

                                       text_wo_punct  \
0  applesupport causing the reply to be disregard...   
1  105835 your business means a lot to us please ...   
2  76328 i really hope you all change but im sure...   
3  105836 livechat is online at the moment  https...   
4  virgintrains see attached error message ive

### Removal of Rare Words

In [13]:
# [:-n_rare_words-1] means to slice from the beginning of the list (0 index) up to the index that is n_rare_words items 
# The -1 is used because slicing in Python is exclusive on the upper bound, meaning the end index is not included in the slice.
# So, [:-n_rare_words-1] gets the list from the start up to the last n_rare_words items in the list.
# [::-1] The -1 step means to iterate the list in reverse order. So, [::-1] reverses the sliced list.

n_rare_words = 10 
RAREWORDS = set([w for (w,wc) in cnt.most_common()[:-n_rare_words-1:-1]])

# Custom functions to remove rare words
def remove_rarewords(text):
    return " ".join([word for word in str(text).split() if word not in RAREWORDS])

df["text_wo_RW"] = df["text_wo_FW"].apply(lambda text: remove_rarewords(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_lower  \
0  @applesupport causing the reply to be disregar...   
1  @105835 your business means a lot to us. pleas...   
2  @76328 i really hope you all change but i'm su...   
3  @105836 livechat is online at the moment - htt...   
4  @virgintrains see attached error message. i've...   

                                       text_wo_punct  \
0  applesupport causing the reply to be disregard...   
1  105835 your business means a lot to us please ...   
2  76328 i really hope you all change but im sure...   
3  105836 livechat is online at the moment  https...   
4  virgintrains see attached error message ive

In [14]:
print(RAREWORDS)

{'httpstco9281okeebk', 'including', 'log', 'thin', 'browser', 'green', 'keen', 'lee', 'slowdown', 'line'}


### Stemming
Stemming is the text preprocessing normalization task concerned with bluntly removing word affixes (prefixes and suffixes).

In [15]:
from nltk.stem.porter import PorterStemmer

stemmer = PorterStemmer()

def stem_words(text):
    return " ".join([stemmer.stem(word) for word in text.split()])

df["text_stem"] = df["text_wo_RW"].apply(lambda text: stem_words(text))
df.head()

Unnamed: 0,text,text_lower,text_wo_punct,text_wo_sw,text_wo_FW,text_wo_RW,text_stem
0,@AppleSupport causing the reply to be disregar...,@applesupport causing the reply to be disregar...,applesupport causing the reply to be disregard...,applesupport causing reply disregarded tapped ...,causing reply disregarded tapped notification ...,causing reply disregarded tapped notification ...,caus repli disregard tap notif keyboard openedðŸ˜¡ðŸ˜¡ðŸ˜¡
1,@105835 Your business means a lot to us. Pleas...,@105835 your business means a lot to us. pleas...,105835 your business means a lot to us please ...,105835 business means lot us please dm name zi...,105835 business means lot name zip code additi...,105835 business means lot name zip code additi...,105835 busi mean lot name zip code addit detai...
2,@76328 I really hope you all change but I'm su...,@76328 i really hope you all change but i'm su...,76328 i really hope you all change but im sure...,76328 really hope change im sure wont dont,76328 really hope change im sure wont dont,76328 really hope change im sure wont dont,76328 realli hope chang im sure wont dont
3,@105836 LiveChat is online at the moment - htt...,@105836 livechat is online at the moment - htt...,105836 livechat is online at the moment https...,105836 livechat online moment httpstcosy94vtu8...,105836 livechat online moment httpstcosy94vtu8...,105836 livechat online moment httpstcosy94vtu8...,105836 livechat onlin moment httpstcosy94vtu8k...
4,@VirginTrains see attached error message. I've...,@virgintrains see attached error message. i've...,virgintrains see attached error message ive tr...,virgintrains see attached error message ive tr...,virgintrains see attached error message tried ...,virgintrains see attached error message tried ...,virgintrain see attach error messag tri leav v...


We can notice that words like cause, reply and notification has been chopped due to the stemming. This is not intended. Hence, we can use Lemmatization in such cases

In [16]:
# Remove the columns
df.drop(["text_lower","text_wo_punct","text_wo_sw", "text_wo_FW"], axis=1, inplace=True)

### Lemmatization

In [17]:
from nltk.corpus import wordnet
from collections import Counter
from nltk.stem import WordNetLemmatizer

def get_part_of_speech(text):
    probable_part_of_speech = wordnet.synsets(text)
    
    pos_counts = Counter()
    
    pos_counts['n'] = len([item for item in probable_part_of_speech if item.pos() == "n"])
    pos_counts['v'] = len([item for item in probable_part_of_speech if item.pos() == "v"])
    pos_counts['a'] = len([item for item in probable_part_of_speech if item.pos() == "a"])
    pos_counts['r'] = len([item for item in probable_part_of_speech if item.pos() == "r"])
    
    most_likely_part_of_speech =  pos_counts.most_common(1)[0][0]
    return most_likely_part_of_speech

In [18]:
lemmatizer = WordNetLemmatizer()

def lemmatize_words(text):
    return " ".join([lemmatizer.lemmatize(word, get_part_of_speech(word)) for word in text.split()])

df["text_lem"] = df["text_wo_RW"].apply(lambda text: lemmatize_words(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_wo_RW  \
0  causing reply disregarded tapped notification ...   
1  105835 business means lot name zip code additi...   
2         76328 really hope change im sure wont dont   
3  105836 livechat online moment httpstcosy94vtu8...   
4  virgintrains see attached error message tried ...   

                                           text_stem  \
0  caus repli disregard tap notif keyboard openedðŸ˜¡ðŸ˜¡ðŸ˜¡   
1  105835 busi mean lot name zip code addit detai...   
2          76328 realli hope chang im sure wont dont   
3  105836 livechat onlin moment httpstcosy94vtu8k...   
4  virgintrain see attach error messa

In [19]:
print(df["text_lem"].iloc[0])

cause reply disregard tap notification keyboard openedðŸ˜¡ðŸ˜¡ðŸ˜¡


We can see that **`causing`** is cast to cause, **`tapped`** is cast to tap, **`means`** is cast to mean, **`notification`** is remain as notification.

We can notice that there is emoji in the text. Hence, let's remove emoji from the text

### Removal of Emojis

In [20]:
## Referal: https://gist.github.com/slowkow/7a7f61f495e3dbb7e3d767f97bd7304b

def remove_emoji(string):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', string)

In [21]:
remove_emoji("game is on ðŸ”¥ðŸ”¥")

'game is on '

### Removal of Emoticons
From Grammarist.com, emoticon is built from keyboard characters that when put together in a certain way represent a facial expression, an emoji is an actual image.

:-) is an emoticon

ðŸ˜€ is an emoji

In [22]:
from emoticons import EMOTICONS

In [23]:
def remove_emoticons(text):
    emoticon_pattern = re.compile(u'(' + u'|'.join(k for k in EMOTICONS) + u')')
    return emoticon_pattern.sub(r'', text)

remove_emoticons("Hello :-)")

'Hello '

Instead of removing the Emoji and Emoticons, which might lose some valuable information when doing sentimental analysis.
Hence, there is another better option which is to convert Emoji and Emoticons to words

### Conversion of Emoticon to Words

In [24]:
def convert_emoticons(text):
    for emot in EMOTICONS:
        text = re.sub(u'('+emot+')', "_".join(EMOTICONS[emot].replace(",","").split()), text)
    return text

In [25]:
text = "Hello :-) :-)"
convert_emoticons(text)

'Hello Happy_face_smiley Happy_face_smiley'

In [26]:
from emoji import UNICODE_EMO

In [27]:
df["text_no_emoticon"] = df["text_lem"].apply(lambda text: convert_emoticons(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_wo_RW  \
0  causing reply disregarded tapped notification ...   
1  105835 business means lot name zip code additi...   
2         76328 really hope change im sure wont dont   
3  105836 livechat online moment httpstcosy94vtu8...   
4  virgintrains see attached error message tried ...   

                                           text_stem  \
0  caus repli disregard tap notif keyboard openedðŸ˜¡ðŸ˜¡ðŸ˜¡   
1  105835 busi mean lot name zip code addit detai...   
2          76328 realli hope chang im sure wont dont   
3  105836 livechat onlin moment httpstcosy94vtu8k...   
4  virgintrain see attach error messa

### Conversion of Emoji to Words

In [28]:
def convert_emojis(text):
    for emot in UNICODE_EMO:
        text = re.sub(r'('+emot+')', "_".join(UNICODE_EMO[emot].replace(",","").replace(":","").split()), text)
    return text

text = "game is on ðŸ”¥"
convert_emojis(text)

'game is on fire'

In [29]:
df["text_no_emoji"] = df["text_no_emoticon"].apply(lambda text: convert_emojis(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                          text_wo_RW  \
0  causing reply disregarded tapped notification ...   
1  105835 business means lot name zip code additi...   
2         76328 really hope change im sure wont dont   
3  105836 livechat online moment httpstcosy94vtu8...   
4  virgintrains see attached error message tried ...   

                                           text_stem  \
0  caus repli disregard tap notif keyboard openedðŸ˜¡ðŸ˜¡ðŸ˜¡   
1  105835 busi mean lot name zip code addit detai...   
2          76328 realli hope chang im sure wont dont   
3  105836 livechat onlin moment httpstcosy94vtu8k...   
4  virgintrain see attach error messa

In [30]:
print(df["text_no_emoji"].iloc[0])

cause reply disregard tap notification keyboard openedpouting_facepouting_facepouting_face


Emoji is being replaced with words

In [31]:
# remove previous columns
df.drop(["text_wo_RW","text_stem","text_lem","text_no_emoticon"], axis=1, inplace=True)
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                       text_no_emoji  
0  cause reply disregard tap notification keyboar...  
1  105835 business mean lot name zip code additio...  
2         76328 really hope change im sure wont dont  
3  105836 livechat online moment httpstcosy94vtu8...  
4  virgintrains see attach error message try leav...  


### Removal of URLs

In [32]:
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www.\S+')
    return url_pattern.sub(r'',text)

In [33]:
# Example
text = "Driverless AI NLP blog post on https://www.h2o.ai/blog/detecting-sarcasm-is-difficult-but-ai-may-have-an-answer/"
remove_urls(text)

'Driverless AI NLP blog post on '

In [34]:
# Example
text = "Driverless AI NLP blog post on www.kaggle.com/"
remove_urls(text)

'Driverless AI NLP blog post on '

### Chat Words Conversion
People do use a lot of abbreviated words in chat and so it might be helpful to expand those words for our analysis purposes.

In [35]:
from chatwords import chat_words_str

In [36]:
chat_words_map_dict = {}
chat_words_list = []

for line in chat_words_str.split("\n"):
    if line.strip():   # line.strip() method is used to remove leading and trailing whitespaces from each line 
        cw, cw_expanded = line.split("=")
        chat_words_list.append(cw)
        chat_words_map_dict[cw] = cw_expanded
chat_words_list = set(chat_words_list)

def chat_words_conversion(text):
    new_text = []
    for w in text.split():
        if w.upper() in chat_words_list:
            new_text.append(chat_words_map_dict[w.upper()])
        else:
            new_text.append(w)
    return " ".join(new_text)

chat_words_conversion("one minute BRB")

'one minute Be Right Back'

In [37]:
chat_words_conversion("GN CYA")

'Good Night See You'

In [38]:
df["text_word_conversion"] = df["text_no_emoji"].apply(lambda text: chat_words_conversion(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                       text_no_emoji  \
0  cause reply disregard tap notification keyboar...   
1  105835 business mean lot name zip code additio...   
2         76328 really hope change im sure wont dont   
3  105836 livechat online moment httpstcosy94vtu8...   
4  virgintrains see attach error message try leav...   

                                text_word_conversion  
0  cause reply disregard tap notification keyboar...  
1  105835 business mean lot name zip code additio...  
2         76328 really hope change im sure wont dont  
3  105836 livechat online moment httpstcosy94vtu8...  
4  virgintrains see attach error message try leav..

### Spelling Correction

In [39]:
from spellchecker import SpellChecker

spell = SpellChecker()

def correctspelling(text):
    corrected_text = []
    misspelled_words = spell.unknown(text.split())
    for word in text.split():
        if word in misspelled_words:
            corrected_text.append(spell.correction(word))
        else:
            corrected_text.append(word)
    return " ".join(corrected_text)

text = "speling correcton"
correctspelling(text)

'spelling correction'

In [40]:
correctspelling("thnks for the cke")

'thanks for the cake'

In [41]:
df["text_final"] = df["text_word_conversion"].apply(lambda text: chat_words_conversion(text))
print(df.head())

                                                text  \
0  @AppleSupport causing the reply to be disregar...   
1  @105835 Your business means a lot to us. Pleas...   
2  @76328 I really hope you all change but I'm su...   
3  @105836 LiveChat is online at the moment - htt...   
4  @VirginTrains see attached error message. I've...   

                                       text_no_emoji  \
0  cause reply disregard tap notification keyboar...   
1  105835 business mean lot name zip code additio...   
2         76328 really hope change im sure wont dont   
3  105836 livechat online moment httpstcosy94vtu8...   
4  virgintrains see attach error message try leav...   

                                text_word_conversion  \
0  cause reply disregard tap notification keyboar...   
1  105835 business mean lot name zip code additio...   
2         76328 really hope change im sure wont dont   
3  105836 livechat online moment httpstcosy94vtu8...   
4  virgintrains see attach error message try l

In [42]:
df.drop(["text_no_emoji","text_word_conversion"],axis =1, inplace=True)
df

Unnamed: 0,text,text_final
0,@AppleSupport causing the reply to be disregar...,cause reply disregard tap notification keyboar...
1,@105835 Your business means a lot to us. Pleas...,105835 business mean lot name zip code additio...
2,@76328 I really hope you all change but I'm su...,76328 really hope change im sure wont dont
3,@105836 LiveChat is online at the moment - htt...,105836 livechat online moment httpstcosy94vtu8...
4,@VirginTrains see attached error message. I've...,virgintrains see attach error message try leav...
...,...,...
88,@105860 I wish Amazon had an option of where I...,105860 wish amazon option get ship up store av...
89,They reschedule my shit for tomorrow https://t...,reschedule shit tomorrow httpstcorsvzct982t
90,"@105861 Hey Sara, sorry to hear of the issues ...",105861 hey sara sorry hear issue ask lay speed...
91,@Tesco bit of both - finding the layout cumber...,tesco bit find layout cumbersome remove item f...


In [43]:
df.to_csv("final.csv")