In [None]:
## load libraries
import pandas as pd ##data manipulation
import os           ## enables python interpreter to read os files and folders
import nltk         ## nlp library, stemming, stopwords etc
import string       ## enables to work on specifically on string
import re           ## regular expression library
import SpellChecker ## spell checking library

In [None]:
from nltk.stem import WordNetLemmatizer
from collections import Counter
cnt = Counter()
spell = SpellChecker()

In [None]:
## load stopwords
from nltk.corpus import stopwords
STOPWORDS = stopwords.words('english')
sw = pd.read_csv("../data/stopwords_extra.csv",header=None)
for i in sw[0]:
    STOPWORDS.append(i)

In [None]:
## Read the input data
data = pd.read_excel(os.path.join('../data/','Twitter_dataset_0425.xlsx'),  encoding="utf-8")

#lower the dataset
data["text"] = data["Tweet_text"].str.lower()

In [None]:
## remove stopwords
def remove_stopwords(text):
    """
    description :- custom function to remove the stopwords
    input  :- a sentence
    output :- a sentence whose stopwords are removed
    """
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])
data["text"] = data["text"].apply(lambda text: remove_stopwords(text))

In [None]:
## remove b' special charc
def remove_b(x):
     """
    description :- custom function to remove special characters in sentence like b', b"
    input  :- a sentence
    output :- a sentence whose special  b', b" are removed
    """
    x = x.replace("b'","")
    x = x.replace('b"',"")
    return x
data['text'] = data['text'].apply(lambda x: remove_b(x))

In [None]:
## remove RT
def remove_RT_other(x):
    """
    description :- custom function to remove special characters in sentence like RT"
    input  :- a sentence
    output :- a sentence whose special char RT are removed
    """
    if ":" in x[:20]:
        x = " ".join(x.split(":")[1:])
    return x
data['text'] = data['text'].apply(lambda x: remove_RT_other(x))

In [None]:
## remove # and @
def remove_hash_at(x):
    """
    description :- custom function to remove special characters in sentence like #, @
    input  :- a sentence
    output :- a sentence whose special characters #, @ is removed
    """
    ret_val = []
    x = str(x)
    for val in x.split(" "):
        #import pdb; pdb.set_trace()
        if ("#" in val) or ("@" in val):
            continue
        else :
            ret_val.append(val)
    return " ".join(val for val in ret_val)
data['text'] = data['text'].apply(lambda x: remove_hash_at(x))

In [None]:
def remove_urls(text):
    """
    description :- custom function to remove URL's
    input  :- a sentence
    output :- a sentence whose URL links is removed
    """
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)
data['text'] = data['text'].apply(lambda x: remove_urls(x))

In [None]:
CONTRACTION_MAP = {"ain't": "is not", "aren't": "are not","can't": "cannot",
                   "can't've": "cannot have", "'cause": "because", "could've": "could have",
                   "couldn't": "could not", "couldn't've": "could not have","didn't": "did not",
                   "doesn't": "does not", "don't": "do not", "hadn't": "had not",
                   "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not",
                   "he'd": "he would", "he'd've": "he would have", "he'll": "he will",
                   "he'll've": "he he will have", "he's": "he is", "how'd": "how did",
                   "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
                   "I'd": "I would", "I'd've": "I would have", "I'll": "I will",
                   "I'll've": "I will have","I'm": "I am", "I've": "I have",
                   "i'd": "i would", "i'd've": "i would have", "i'll": "i will",
                   "i'll've": "i will have","i'm": "i am", "i've": "i have",
                   "isn't": "is not", "it'd": "it would", "it'd've": "it would have",
                   "it'll": "it will", "it'll've": "it will have","it's": "it is",
                   "let's": "let us", "ma'am": "madam", "mayn't": "may not",
                   "might've": "might have","mightn't": "might not","mightn't've": "might not have",
                   "must've": "must have", "mustn't": "must not", "mustn't've": "must not have",
                   "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
                   "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not",
                   "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would",
                   "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have",
                   "she's": "she is", "should've": "should have", "shouldn't": "should not",
                   "shouldn't've": "should not have", "so've": "so have","so's": "so as",
                   "this's": "this is",
                   "that'd": "that would", "that'd've": "that would have","that's": "that is",
                   "there'd": "there would", "there'd've": "there would have","there's": "there is",
                   "they'd": "they would", "they'd've": "they would have", "they'll": "they will",
                   "they'll've": "they will have", "they're": "they are", "they've": "they have",
                   "to've": "to have", "wasn't": "was not", "we'd": "we would",
                   "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have",
                   "we're": "we are", "we've": "we have", "weren't": "were not",
                   "what'll": "what will", "what'll've": "what will have", "what're": "what are",
                   "what's": "what is", "what've": "what have", "when's": "when is",
                   "when've": "when have", "where'd": "where did", "where's": "where is",
                   "where've": "where have", "who'll": "who will", "who'll've": "who will have",
                   "who's": "who is", "who've": "who have", "why's": "why is",
                   "why've": "why have", "will've": "will have", "won't": "will not",
                   "won't've": "will not have", "would've": "would have", "wouldn't": "would not",
                   "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would",
                   "y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
                   "you'd": "you would", "you'd've": "you would have", "you'll": "you will",
                   "you'll've": "you will have", "you're": "you are", "you've": "you have" }


def expand_contractions(sentence, contraction_mapping):
    """
    description :- custom function to replace contractions in sentence
    input  :- a sentence
    output :- a sentence returns with replace contractions
    """
    contractions_pattern = re.compile('({})'.format('|'.join(contraction_mapping.keys())),
                                      flags=re.IGNORECASE | re.DOTALL)

    def expand_match(contraction):
        match = contraction.group(0)
        first_char = match[0]
        expanded_contraction = contraction_mapping.get(match) if contraction_mapping.get(
            match) else contraction_mapping.get(match.lower())
        expanded_contraction = first_char + expanded_contraction[1:]
        return expanded_contraction

    expanded_sentence = contractions_pattern.sub(expand_match, sentence)
    return expanded_sentence
data['text'] = data['text'].apply(lambda x: " ".join(expand_contractions(txt, CONTRACTION_MAP)  \
                                                     for txt in nltk.sent_tokenize(x)))

In [None]:
#remove punctuation
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    """custom function to remove the punctuation"""
    return str(text).translate(str.maketrans('', '', PUNCT_TO_REMOVE))
data["text"] = data["text"].apply(lambda text: remove_punctuation(text))

In [None]:
#lemmatization
lemmatizer = WordNetLemmatizer()
def lemmatize_words(text):
    """
    description :- custom function where lemmatization is applied
    input  :- a sentence
    output :- a sentence returns words after applying lemmatization
    """
    return " ".join([lemmatizer.lemmatize(word) for word in text.split()])
data["text"] = data["text"].apply(lambda text: lemmatize_words(text))

In [None]:
### removing minimum occurance words
for text in data["text"].values:
    for word in text.split():
        cnt[word] += 1
n_rare_words = 10
RAREWORDS = set([w for (w, wc) in cnt.most_common()[:-n_rare_words-1:-1]])
def remove_rarewords(text):
    """
    description :- custom function to remove the rare words
    input  :- a sentence
    output :- a sentence returns after removal of rarewords
    """
    return " ".join([word for word in str(text).split() if word not in RAREWORDS])

data["text"] = data["text"].apply(lambda text: remove_rarewords(text))

In [None]:
def remove_emoji(string):
    """
    description :- custom function to remove emojis
    input  :- a sentence
    output :- a sentence returns after removal of emojis
    """
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', string)
data["text"] = data["text"].apply(lambda text: remove_emoji(text))

EMOTICONS = {
    u":‑\)":"Happy face or smiley",
    u":\)":"Happy face or smiley",
    u":-\]":"Happy face or smiley",
    u":\]":"Happy face or smiley",
    u":-3":"Happy face smiley",
    u":3":"Happy face smiley",
    u":->":"Happy face smiley",
    u":>":"Happy face smiley",
    u"8-\)":"Happy face smiley",
    u":o\)":"Happy face smiley",
    u":-\}":"Happy face smiley",
    u":\}":"Happy face smiley",
    u":-\)":"Happy face smiley",
    u":c\)":"Happy face smiley",
    u":\^\)":"Happy face smiley",
    u"=\]":"Happy face smiley",
    u"=\)":"Happy face smiley",
    u":‑D":"Laughing, big grin or laugh with glasses",
    u":D":"Laughing, big grin or laugh with glasses",
    u"8‑D":"Laughing, big grin or laugh with glasses",
    u"8D":"Laughing, big grin or laugh with glasses",
    u"X‑D":"Laughing, big grin or laugh with glasses",
    u"XD":"Laughing, big grin or laugh with glasses",
    u"=D":"Laughing, big grin or laugh with glasses",
    u"=3":"Laughing, big grin or laugh with glasses",
    u"B\^D":"Laughing, big grin or laugh with glasses",
    u":-\)\)":"Very happy",
    u":‑\(":"Frown, sad, andry or pouting",
    u":-\(":"Frown, sad, andry or pouting",
    u":\(":"Frown, sad, andry or pouting",
    u":‑c":"Frown, sad, andry or pouting",
    u":c":"Frown, sad, andry or pouting",
    u":‑<":"Frown, sad, andry or pouting",
    u":<":"Frown, sad, andry or pouting",
    u":‑\[":"Frown, sad, andry or pouting",
    u":\[":"Frown, sad, andry or pouting",
    u":-\|\|":"Frown, sad, andry or pouting",
    u">:\[":"Frown, sad, andry or pouting",
    u":\{":"Frown, sad, andry or pouting",
    u":@":"Frown, sad, andry or pouting",
    u">:\(":"Frown, sad, andry or pouting",
    u":'‑\(":"Crying",
    u":'\(":"Crying",
    u":'‑\)":"Tears of happiness",
    u":'\)":"Tears of happiness",
    u"D‑':":"Horror",
    u"D:<":"Disgust",
    u"D:":"Sadness",
    u"D8":"Great dismay",
    u"D;":"Great dismay",
    u"D=":"Great dismay",
    u"DX":"Great dismay",
    u":‑O":"Surprise",
    u":O":"Surprise",
    u":‑o":"Surprise",
    u":o":"Surprise",
    u":-0":"Shock",
    u"8‑0":"Yawn",
    u">:O":"Yawn",
    u":-\*":"Kiss",
    u":\*":"Kiss",
    u":X":"Kiss",
    u";‑\)":"Wink or smirk",
    u";\)":"Wink or smirk",
    u"\*-\)":"Wink or smirk",
    u"\*\)":"Wink or smirk",
    u";‑\]":"Wink or smirk",
    u";\]":"Wink or smirk",
    u";\^\)":"Wink or smirk",
    u":‑,":"Wink or smirk",
    u";D":"Wink or smirk",
    u":‑P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"X‑P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"XP":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":‑Þ":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":Þ":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":b":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"d:":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"=p":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u">:P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":‑/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":-[.]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u">:[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u">:/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":L":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=L":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":S":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":‑\|":"Straight face",
    u":\|":"Straight face",
    u":$":"Embarrassed or blushing",
    u":‑x":"Sealed lips or wearing braces or tongue-tied",
    u":x":"Sealed lips or wearing braces or tongue-tied",
    u":‑#":"Sealed lips or wearing braces or tongue-tied",
    u":#":"Sealed lips or wearing braces or tongue-tied",
    u":‑&":"Sealed lips or wearing braces or tongue-tied",
    u":&":"Sealed lips or wearing braces or tongue-tied",
    u"O:‑\)":"Angel, saint or innocent",
    u"O:\)":"Angel, saint or innocent",
    u"0:‑3":"Angel, saint or innocent",
    u"0:3":"Angel, saint or innocent",
    u"0:‑\)":"Angel, saint or innocent",
    u"0:\)":"Angel, saint or innocent",
    u":‑b":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"0;\^\)":"Angel, saint or innocent",
    u">:‑\)":"Evil or devilish",
    u">:\)":"Evil or devilish",
    u"\}:‑\)":"Evil or devilish",
    u"\}:\)":"Evil or devilish",
    u"3:‑\)":"Evil or devilish",
    u"3:\)":"Evil or devilish",
    u">;\)":"Evil or devilish",
    u"\|;‑\)":"Cool",
    u"\|‑O":"Bored",
    u":‑J":"Tongue-in-cheek",
    u"#‑\)":"Party all night",
    u"%‑\)":"Drunk or confused",
    u"%\)":"Drunk or confused",
    u":-###..":"Being sick",
    u":###..":"Being sick",
    u"<:‑\|":"Dump",
    u"\(>_<\)":"Troubled",
    u"\(>_<\)>":"Troubled",
    u"\(';'\)":"Baby",
    u"\(\^\^>``":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(\^_\^;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(-_-;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(~_~;\) \(・\.・;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(-_-\)zzz":"Sleeping",
    u"\(\^_-\)":"Wink",
    u"\(\(\+_\+\)\)":"Confused",
    u"\(\+o\+\)":"Confused",
    u"\(o\|o\)":"Ultraman",
    u"\^_\^":"Joyful",
    u"\(\^_\^\)/":"Joyful",
    u"\(\^O\^\)／":"Joyful",
    u"\(\^o\^\)／":"Joyful",
    u"\(__\)":"Kowtow as a sign of respect, or dogeza for apology",
    u"_\(\._\.\)_":"Kowtow as a sign of respect, or dogeza for apology",
    u"<\(_ _\)>":"Kowtow as a sign of respect, or dogeza for apology",
    u"<m\(__\)m>":"Kowtow as a sign of respect, or dogeza for apology",
    u"m\(__\)m":"Kowtow as a sign of respect, or dogeza for apology",
    u"m\(_ _\)m":"Kowtow as a sign of respect, or dogeza for apology",
    u"\('_'\)":"Sad or Crying",
    u"\(/_;\)":"Sad or Crying",
    u"\(T_T\) \(;_;\)":"Sad or Crying",
    u"\(;_;":"Sad of Crying",
    u"\(;_:\)":"Sad or Crying",
    u"\(;O;\)":"Sad or Crying",
    u"\(:_;\)":"Sad or Crying",
    u"\(ToT\)":"Sad or Crying",
    u";_;":"Sad or Crying",
    u";-;":"Sad or Crying",
    u";n;":"Sad or Crying",
    u";;":"Sad or Crying",
    u"Q\.Q":"Sad or Crying",
    u"T\.T":"Sad or Crying",
    u"QQ":"Sad or Crying",
    u"Q_Q":"Sad or Crying",
    u"\(-\.-\)":"Shame",
    u"\(-_-\)":"Shame",
    u"\(一一\)":"Shame",
    u"\(；一_一\)":"Shame",
    u"\(=_=\)":"Tired",
    u"\(=\^\·\^=\)":"cat",
    u"\(=\^\·\·\^=\)":"cat",
    u"=_\^=	":"cat",
    u"\(\.\.\)":"Looking down",
    u"\(\._\.\)":"Looking down",
    u"\^m\^":"Giggling with hand covering mouth",
    u"\(\・\・?":"Confusion",
    u"\(?_?\)":"Confusion",
    u">\^_\^<":"Normal Laugh",
    u"<\^!\^>":"Normal Laugh",
    u"\^/\^":"Normal Laugh",
    u"\（\*\^_\^\*）" :"Normal Laugh",
    u"\(\^<\^\) \(\^\.\^\)":"Normal Laugh",
    u"\(^\^\)":"Normal Laugh",
    u"\(\^\.\^\)":"Normal Laugh",
    u"\(\^_\^\.\)":"Normal Laugh",
    u"\(\^_\^\)":"Normal Laugh",
    u"\(\^\^\)":"Normal Laugh",
    u"\(\^J\^\)":"Normal Laugh",
    u"\(\*\^\.\^\*\)":"Normal Laugh",
    u"\(\^—\^\）":"Normal Laugh",
    u"\(#\^\.\^#\)":"Normal Laugh",
    u"\（\^—\^\）":"Waving",
    u"\(;_;\)/~~~":"Waving",
    u"\(\^\.\^\)/~~~":"Waving",
    u"\(-_-\)/~~~ \($\·\·\)/~~~":"Waving",
    u"\(T_T\)/~~~":"Waving",
    u"\(ToT\)/~~~":"Waving",
    u"\(\*\^0\^\*\)":"Excited",
    u"\(\*_\*\)":"Amazed",
    u"\(\*_\*;":"Amazed",
    u"\(\+_\+\) \(@_@\)":"Amazed",
    u"\(\*\^\^\)v":"Laughing,Cheerful",
    u"\(\^_\^\)v":"Laughing,Cheerful",
    u"\(\(d[-_-]b\)\)":"Headphones,Listening to music",
    u'\(-"-\)':"Worried",
    u"\(ーー;\)":"Worried",
    u"\(\^0_0\^\)":"Eyeglasses",
    u"\(\＾ｖ\＾\)":"Happy",
    u"\(\＾ｕ\＾\)":"Happy",
    u"\(\^\)o\(\^\)":"Happy",
    u"\(\^O\^\)":"Happy",
    u"\(\^o\^\)":"Happy",
    u"\)\^o\^\(":"Happy",
    u":O o_O":"Surprised",
    u"o_0":"Surprised",
    u"o\.O":"Surpised",
    u"\(o\.o\)":"Surprised",
    u"oO":"Surprised",
    u"\(\*￣m￣\)":"Dissatisfied",
    u"\(‘A`\)":"Snubbed or Deflated"
}
def remove_emoticons(text):
    """
    description :- custom function to remove emoticons
    input  :- a sentence
    output :- a sentence returns after removal of emoticons
    """
    emoticon_pattern = re.compile(u'(' + u'|'.join(k for k in EMOTICONS) + u')')
    return emoticon_pattern.sub(r'', text)
data["text"] = data["text"].apply(lambda text: remove_emoticons(text))

In [None]:
## strip of spaces in the beginning and ending
data['text'] = data['text'].str.strip()

In [None]:
### Removal of utf special characters
utf_special_characs = ['xe2x80x81',
'xe2x80x82',
'xe2x80x83',
'xe2x80x84',
'xe2x80x85',
'xe2x80x86',
'xe2x80x87',
'xe2x80x88',
'xe2x80x89',
'xe2x80x8a',
'xe2x80x8b',
'xe2x80x8c',
'xe2x80x8d',
'xe2x80x8e',
'xe2x80x8f',
'xe2x80x90',
'xe2x80x91',
'xe2x80x92',
'xe2x80x93',
'xe2x80x94',
'xe2x80x95',
'xe2x80x96',
'xe2x80x97',
'xe2x80x98',
'xe2x80x99',
'xe2x80x9a',
'xe2x80x9b',
'xe2x80x9c',
'xe2x80x9d',
'xe2x80x9e',
'xe2x80x9f',
'xe2x80xa0',
'xe2x80xa1',
'xe2x80xa2',
'xe2x80xa3',
'xe2x80xa4',
'xe2x80xa5',
'xe2x80xa6',
'xe2x80xa7',
'xe2x80xa8',
'xe2x80xa9',
'xe2x80xaa',
'xe2x80xab',
'xe2x80xac',
'xe2x80xad',
'xe2x80xae',
'xe2x80xaf',
'xe2x80xb0',
'xe2x80xb1',
'xe2x80xb2',
'xe2x80xb3',
'xe2x80xb4',
'xe2x80xb5',
'xe2x80xb6',
'xe2x80xb7',
'xe2x80xb8',
'xe2x80xb9',
'xe2x80xba',
'xe2x80xbb',
'xe2x80xbc',
'xe2x80xbd',
'xe2x80xbe',
'xe2x80xbf',
'xe2x81x80',
'xe2x81x81',
'xe2x81x82',
'xe2x81x83',
'xe2x81x84',
'xe2x81x85',
'xe2x81x86',
'xe2x81x87',
'xe2x81x88',
'xe2x81x89',
'xe2x81x8a',
'xe2x81x8b',
'xe2x81x8c',
'xe2x81x8d',
'xe2x81x8e',
'xe2x81x8f',
'xe2x81x90',
'xe2x81x91',
'xe2x81x92',
'xe2x81x93',
'xe2x81x94',
'xe2x81x95',
'xe2x81x96',
'xe2x81x97',
'xe2x81x98',
'xe2x81x99',
'xe2x81x9a',
'xe2x81x9b',
'xe2x81x9c',
'xe2x81x9d',
'xe2x81x9e',
'xe2x81x9f',
'xe2x81xa0',
'xe2x81xa1',
'xe2x81xa2',
'xe2x81xa3',
'xe2x81xa4',
'xe2x81xa5',
'xe2x81xa6',
'xe2x81xa7',
'xe2x81xa8',
'xe2x81xa9',
'xe2x81xaa',
'xe2x81xab',
'xe2x81xac',
'xe2x81xad',
'xe2x81xae',
'xe2x81xaf',
'xe2x81xb0',
'xe2x81xb1',
'xe2x81xb2',
'xe2x81xb3',
'xe2x81xb4',
'xe2x81xb5',
'xe2x81xb6',
'xe2x81xb7',
'xe2x81xb8',
'xe2x81xb9',
'xe2x81xba',
'xe2x81xbb',
'xe2x81xbc',
'xe2x81xbd',
'xe2x81xbe',
'xe2x81xbf']

def remove_special_chars(x):
    """
    description :- custom function to remove utf special chars
    input  :- a sentence
    output :- a sentence returns after removal of  utf special chars
    """
    vals = []
    for val in x.split(" "):
        for utf_val in utf_special_characs:
            val = re.sub(utf_val,"",val)
            if re.search("x..x..*", val) == None:
                vals.append(val)
                break
            continue
    return " ".join(val for val in vals)
data['text'] = data['text'].apply(lambda x: remove_special_chars(x))

def only_chars(x):
    """ keep only characters """
    return re.sub('[^a-zA-Z]+', ' ', x)
only_chars("abc 123 abc")
data['text'] = data['text'].apply(lambda x: only_chars(x))

In [None]:
def correct_spellings(text):
    """
    description :- custom function to correct words which are misspelled
    input  :- a sentence
    output :- a sentence returns after removal spell checker
    """
    global ctr
    corrected_text = []
    misspelled_words = spell.unknown(text.split())
    for word in text.split():
        if word in misspelled_words:
            corrected_text.append(spell.correction(word))
        else:
            corrected_text.append(word)
    print("done for :-", ctr)
    ctr += 1
    return " ".join(corrected_text)
ctr = 0
data['text'] =  data['text'].apply(lambda x: correct_spellings(x))

### saving to disk
data.to_csv("cleaned_dataset_team14.csv", index=False)