# Importing the tweets.csv dataset

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('tweets.csv')

In [89]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Apply StandardScaler to the 'target' column
df['target'] = scaler.fit_transform(df[['target']])
df['target'].max()

0.9988606490598608

# Basic Pre-Processing of text

In [3]:
# lowercasing
df['text'] = df['text'].str.lower()
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,@chrishasboobs ahhh i hope your ok!!!
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,"@misstoriblack cool , i have no tweet apps fo..."


In [4]:
# removing html tags
import re
def remove_html(text):
  pattern = re.compile('<.*?>')
  return pattern.sub(r'', text)

df['text'] = df['text'].apply(remove_html)
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,@chrishasboobs ahhh i hope your ok!!!
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,"@misstoriblack cool , i have no tweet apps fo..."


In [5]:
# removing URLs
def remove_url(text):
  pattern = re.compile(r'https?://\S+|www\.\S+')
  return pattern.sub(r'', text)

df['text'] = df['text'].apply(remove_url)
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,@chrishasboobs ahhh i hope your ok!!!
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,"@misstoriblack cool , i have no tweet apps fo..."


In [6]:
# removing punctuation
import string
punc = string.punctuation
def remove_punc(text):
  return text.translate(str.maketrans('', '', punc))

df['text'] = df['text'].apply(remove_punc)
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,chrishasboobs ahhh i hope your ok
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,misstoriblack cool i have no tweet apps for ...


In [7]:
# handling chat words
chat_words = {
    "AFAIK": "As Far As I Know",
    "AFK": "Away From Keyboard",
    "ASAP": "As Soon As Possible",
    "ATK": "At The Keyboard",
    "ATM": "At The Moment",
    "A3": "Anytime, Anywhere, Anyplace",
    "BAK": "Back At Keyboard",
    "BBL": "Be Back Later",
    "BBS": "Be Back Soon",
    "BFN": "Bye For Now",
    "B4N": "Bye For Now",
    "BRB": "Be Right Back",
    "BRT": "Be Right There",
    "BTW": "By The Way",
    "B4": "Before",
    "B4N": "Bye For Now",
    "CU": "See You",
    "CUL8R": "See You Later",
    "CYA": "See You",
    "FAQ": "Frequently Asked Questions",
    "FC": "Fingers Crossed",
    "FWIW": "For What It's Worth",
    "FYI": "For Your Information",
    "GAL": "Get A Life",
    "GG": "Good Game",
    "GN": "Good Night",
    "GMTA": "Great Minds Think Alike",
    "GR8": "Great!",
    "G9": "Genius",
    "IC": "I See",
    "ICQ": "I Seek you (also a chat program)",
    "ILU": "ILU: I Love You",
    "IMHO": "In My Honest/Humble Opinion",
    "IMO": "In My Opinion",
    "IOW": "In Other Words",
    "IRL": "In Real Life",
    "KISS": "Keep It Simple, Stupid",
    "LDR": "Long Distance Relationship",
    "LMAO": "Laugh My A.. Off",
    "LOL": "Laughing Out Loud",
    "LTNS": "Long Time No See",
    "L8R": "Later",
    "MTE": "My Thoughts Exactly",
    "M8": "Mate",
    "NRN": "No Reply Necessary",
    "OIC": "Oh I See",
    "PITA": "Pain In The A..",
    "PRT": "Party",
    "PRW": "Parents Are Watching",
    "QPSA?": "Que Pasa?",
    "ROFL": "Rolling On The Floor Laughing",
    "ROFLOL": "Rolling On The Floor Laughing Out Loud",
    "ROTFLMAO": "Rolling On The Floor Laughing My A.. Off",
    "SK8": "Skate",
    "STATS": "Your sex and age",
    "ASL": "Age, Sex, Location",
    "THX": "Thank You",
    "TTFN": "Ta-Ta For Now!",
    "TTYL": "Talk To You Later",
    "U": "You",
    "U2": "You Too",
    "U4E": "Yours For Ever",
    "WB": "Welcome Back",
    "WTF": "What The F...",
    "WTG": "Way To Go!",
    "WUF": "Where Are You From?",
    "W8": "Wait...",
    "7K": "Sick:-D Laugher",
    "TFW": "That feeling when",
    "MFW": "My face when",
    "MRW": "My reaction when",
    "IFYP": "I feel your pain",
    "TNTL": "Trying not to laugh",
    "JK": "Just kidding",
    "IDC": "I don't care",
    "ILY": "I love you",
    "IMU": "I miss you",
    "ADIH": "Another day in hell",
    "ZZZ": "Sleeping, bored, tired",
    "WYWH": "Wish you were here",
    "TIME": "Tears in my eyes",
    "BAE": "Before anyone else",
    "FIMH": "Forever in my heart",
    "BSAAW": "Big smile and a wink",
    "BWL": "Bursting with laughter",
    "BFF": "Best friends forever",
    "CSL": "Can't stop laughing"
}

# Function
def chat_conversion(text):
    new_text = []
    for i in text.split():
        if i.upper() in chat_words:
            new_text.append(chat_words[i.upper()])
        else:
            new_text.append(i)
    return " ".join(new_text)

df['text'] = df['text'].apply(chat_conversion)
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,chrishasboobs ahhh i hope your ok
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,misstoriblack cool i have no tweet apps for my...


In [8]:
# decontraction of words
import re

# Contractions dictionary
contractions = {
    "ain't": "am not", "aren't": "are not", "can't": "can not", "can't've": "can not have", "'cause": "because",
    "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not",
    "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not",
    "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he'll've": "he will have",
    "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "i'd": "i would",
    "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have", "i'm": "i am", "i've": "i have", "isn't": "is not",
    "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have", "it's": "it is",
    "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not",
    "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have",
    "needn't": "need not", "needn't've": "need not have", "o'clock": "of the clock", "oughtn't": "ought not",
    "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",
    "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
    "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have",
    "so's": "so as", "that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
    "there'd've": "there would have", "there's": "there is", "they'd": "they would", "they'd've": "they would have",
    "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
    "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have",
    "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have",
    "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have",
    "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have",
    "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not",
    "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have",
    "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have", "y'all're": "you all are",
    "y'all've": "you all have", "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
    "you're": "you are", "you've": "you have"
}

def decontract_text(text):
    """
    Decontracts common English contractions in a given text.
    """
    if not isinstance(text, str):
        return text

    # Split the text into words
    words = text.split()
    # Replace each contracted form in the words with the expanded form
    expanded_words = [contractions[word.lower()] if word.lower() in contractions else word for word in words]

    # Join words back into a sentence
    text = ' '.join(expanded_words)

    # Handle remaining common contractions in case insensitive manner
    text = re.sub(r"\b've\b", " have", text, flags=re.IGNORECASE)
    text = re.sub(r"\bn't\b", " not", text, flags=re.IGNORECASE)
    text = re.sub(r"\b're\b", " are", text, flags=re.IGNORECASE)
    text = re.sub(r"\b'll\b", " will", text, flags=re.IGNORECASE)

    return text

df['text'] = df['text'].apply(decontract_text)

In [9]:
!pip install symspellpy

from symspellpy import SymSpell, Verbosity

# Initialize SymSpell with default values
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)

# Load a dictionary for spell correction
dictionary_path = "/content/en-80k.txt"
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

# Define a function for spell correction using SymSpell
def correct_spelling(text):
    corrected_words = []
    for word in text.split():
        # Get the best suggestion for each word
        suggestion = sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
        corrected_words.append(suggestion[0].term if suggestion else word)  # Append the corrected term or original
    return " ".join(corrected_words)

# Apply the spell correction function to the DataFrame
df['text'] = df['text'].apply(correct_spelling)

df.head(2)

Defaulting to user installation because normal site-packages is not writeable
Collecting symspellpy
  Downloading symspellpy-6.7.8-py3-none-any.whl.metadata (3.9 kB)
Collecting editdistpy>=0.1.3 (from symspellpy)
  Downloading editdistpy-0.1.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Downloading symspellpy-6.7.8-py3-none-any.whl (2.6 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25hDownloading editdistpy-0.1.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
Installing collected packages: editdistpy, symspellpy
Successfully installed editdistpy-0.1.5 symspellpy-6.7.8

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;

2024-11-08 17:29:13,173: E symspellpy.symspellpy] Dictionary file not found at /content/en-80k.txt.


Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,chrishasboobs ahhh i hope your ok
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,misstoriblack cool i have no tweet apps for my...


In [10]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/mridul/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [11]:
# removing StopWords
from nltk.corpus import stopwords
stopword = stopwords.words('english')

# Function
def remove_stopwords(text):
    new_text = []

    for word in text.split():
        if word in stopword:
            new_text.append('')
        else:
            new_text.append(word)
    x = new_text[:]
    new_text.clear()
    return " ".join(x)

df['text'] = df['text'].apply(remove_stopwords)
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,chrishasboobs ahhh hope ok
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,misstoriblack cool tweet apps razr 2


In [12]:
# Handling Emojis
!pip install emoji
import emoji

df['text'] = df['text'].apply(lambda x: emoji.demojize(x))
df.head(2)

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,chrishasboobs ahhh hope ok
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,misstoriblack cool tweet apps razr 2


In [13]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/mridul/nltk_data...


True

In [14]:
# Import WordNetLemmatizer from NLTK library
from nltk.stem import WordNetLemmatizer

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to lemmatize words
def lemmatize_words(text):
    return " ".join([lemmatizer.lemmatize(word) for word in text.split()])

# Apply the lemmatization function to the 'text' column
df['text'] = df['text'].apply(lemmatize_words)
df.head(2)

Unnamed: 0,target,id,date,flag,user,text
0,0,2200003196,Tue Jun 16 18:18:12 PDT 2009,NO_QUERY,LaLaLindsey0609,chrishasboobs ahhh hope ok
1,0,1467998485,Mon Apr 06 23:11:14 PDT 2009,NO_QUERY,sexygrneyes,misstoriblack cool tweet apps razr 2


In [22]:
import re

# Step 1: Extract mentions
# Function to extract mentions from tweet text
def extract_mentions(tweet):
    return re.findall(r'@(\w+)', tweet)

# Apply the function to extract mentions into a new column
df['mentions'] = df['text'].apply(extract_mentions)

In [23]:
df_mention = df[df['mentions'].apply(len) > 0]

In [24]:
df_mention.head()

Unnamed: 0,target,id,date,flag,user,text,mentions


In [20]:
df_mention['target'].describe()

count    0.0
mean     NaN
std      NaN
min      NaN
25%      NaN
50%      NaN
75%      NaN
max      NaN
Name: target, dtype: float64

In [None]:
sample_mean = df_mention['target'].mean()
sample_std = df_mention['target'].std()

In [None]:
val = (sample_mean - 0.5) / (sample_std / np.sqrt(len(df_mention)))
val

# Word Hypothesis Testing

## Intelligent(+ve sentiment accepted)

In [227]:
count = df['text'].str.lower().str.count(r'\bintelligent\b').sum()
count

8

In [228]:
filtered_df = df[df['text'].str.contains(r'\bintelligent\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
3673,0.998861,1558291733,Sun Apr 19 07:55:02 PDT 2009,NO_QUERY,Ellsbeth,paulawhite started reading scifi 8th gr h libr...,[]
17195,0.998861,1573088861,Mon Apr 20 23:40:36 PDT 2009,NO_QUERY,korkykathman,kellyann18 dont silly everyone intelligent way,[]
59101,0.998861,1881811712,Fri May 22 05:36:54 PDT 2009,NO_QUERY,WahooPooh,flick follower diverse intelligent hilarious s...,[]
60267,-1.001141,1826210676,Sun May 17 07:57:50 PDT 2009,NO_QUERY,VictoriaBush,missjahan im intelligent like thatahahaha fric...,[]
70212,-1.001141,2243540897,Fri Jun 19 13:34:51 PDT 2009,NO_QUERY,litrock,baiting crazy guy work difficult refuse see po...,[]


In [229]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean - 1)/sample_standard_deviation)*(np.sqrt(count))
test_char

-2.0525034506325652

### Confidence Interval for Mean:

In [230]:
t = 3.499
mean_interval_min = sample_mean - t*(sample_standard_deviation)/(np.sqrt(count))
mean_interval_max = sample_mean + t*(sample_standard_deviation)/(np.sqrt(count))
Confidence_Interval = (mean_interval_min, mean_interval_max)
print(f"Confidence Interval = {Confidence_Interval}")

Confidence Interval = (-1.0316435535797932, 1.5293638769985647)


## Allergic(-ve sentiment accepted)

In [231]:
count = df['text'].str.lower().str.count(r'\ballergic\b').sum()
count

37

In [232]:
filtered_df = df[df['text'].str.contains(r'\ballergic\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
32,-1.001141,2181196503,Mon Jun 15 11:29:59 PDT 2009,NO_QUERY,grskell3,im allergic reaction silver earings,[]
892,-1.001141,2006900117,Tue Jun 02 11:51:02 PDT 2009,NO_QUERY,JasonTankerley,know ive said million time buti really wish wa...,[]
3045,-1.001141,2186452810,Mon Jun 15 18:51:40 PDT 2009,NO_QUERY,knight_jonathan,querlvox adult allergic close campus ill try B...,[]
16431,-1.001141,1770838938,Mon May 11 22:23:36 PDT 2009,NO_QUERY,marynunez,jonathanuniza im allergic seafood lovvvve seafood,[]
17341,-1.001141,2228675483,Thu Jun 18 14:44:37 PDT 2009,NO_QUERY,leidygee,que claro way school allergic reaction,[]


In [233]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean + 1)/sample_standard_deviation)*(np.sqrt(count))
test_char

2.1096174003329065

### Confidence Interval for Mean:

In [234]:
t = 2.704
mean_interval_min = sample_mean - t*(sample_standard_deviation)/(np.sqrt(count))
mean_interval_max = sample_mean + t*(sample_standard_deviation)/(np.sqrt(count))
Confidence_Interval = (mean_interval_min, mean_interval_max)
print(f"Confidence Interval = {Confidence_Interval}")

Confidence Interval = (-1.0622895531304988, -0.49554701470758755)


## False(-ve sentiment accepted)

In [235]:
count = df['text'].str.lower().str.count(r'\bfalse\b').sum()
count

9

In [236]:
filtered_df = df[df['text'].str.contains(r'\bfalse\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
3502,-1.001141,2004492253,Tue Jun 02 08:17:07 PDT 2009,NO_QUERY,thanate7,craftster sweets4ever false advertising suitca...,[]
18124,0.998861,1753921652,Sun May 10 04:08:21 PDT 2009,NO_QUERY,red_fairy,thejayson thats false,[]
42359,-1.001141,2061412838,Sat Jun 06 20:48:59 PDT 2009,NO_QUERY,katekillgour,hating look nice outside get flippin freezing ...,[]
47530,0.998861,1932529678,Tue May 26 21:39:20 PDT 2009,NO_QUERY,dannigyrl,dynamicdiva42 personally lash thing dont wear ...,[]
63128,-1.001141,2066772610,Sun Jun 07 11:08:36 PDT 2009,NO_QUERY,one_by_one,stupid trip bletchley park tomorrow apprentice...,[]


In [237]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean + 1)/sample_standard_deviation)*(np.sqrt(count))
test_char

1.5079777661312845

### Confidence Interval for Mean:

In [238]:
t = 3.355
mean_interval_min = sample_mean - t*(sample_standard_deviation)/(np.sqrt(count))
mean_interval_max = sample_mean + t*(sample_standard_deviation)/(np.sqrt(count))
Confidence_Interval = (mean_interval_min, mean_interval_max)
print(f"Confidence Interval = {Confidence_Interval}")

Confidence Interval = (-1.542973852471591, 0.4295820178782306)


## Admit(Neutral sentiment accepted)

In [239]:
count = df['text'].str.lower().str.count(r'\badmit\b').sum()
count

40

In [240]:
filtered_df = df[df['text'].str.contains(r'\badmit\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
33,-1.001141,1999945855,Mon Jun 01 21:06:49 PDT 2009,NO_QUERY,nailbiter2,rocdoogie ill admit maybe ill feel better tomo...,[]
2609,0.998861,2067143151,Sun Jun 07 11:49:32 PDT 2009,NO_QUERY,NatalieBabess_x,valberg1 problem theyll miss really dont wanna...,[]
3753,-1.001141,2213527696,Wed Jun 17 15:29:43 PDT 2009,NO_QUERY,CanarsieFlossy,get started mid term admit im little nervous,[]
4010,0.998861,1968749501,Fri May 29 22:07:33 PDT 2009,NO_QUERY,TankaBar_Linda,rosevillerockln pretty darn good must admit wi...,[]
8393,-1.001141,1759771797,Sun May 10 19:56:32 PDT 2009,NO_QUERY,tracyfisher67,ok admit watching apprentice first watched fin...,[]


In [241]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean + 0)/sample_standard_deviation)*(np.sqrt(count))
test_char

2.7177727848368787

### Confidence Interval for Mean:

In [242]:
t = 2.704
mean_interval_min = sample_mean - t*(sample_standard_deviation)/(np.sqrt(count))
mean_interval_max = sample_mean + t*(sample_standard_deviation)/(np.sqrt(count))
Confidence_Interval = (mean_interval_min, mean_interval_max)
print(f"Confidence Interval = {Confidence_Interval}")

Confidence Interval = (0.002021293523987555, 0.7956992248349739)


## Calm(+ve sentiment rejected)

In [243]:
count = df['text'].str.lower().str.count(r'\bcalm\b').sum()
count

35

In [244]:
filtered_df = df[df['text'].str.contains(r'\bcalm\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
15,0.998861,1834598659,Mon May 18 03:32:38 PDT 2009,NO_QUERY,andylackie,wizely Laughing Out Loud calm got 30day loan o...,[]
3665,0.998861,1565432946,Mon Apr 20 06:44:46 PDT 2009,NO_QUERY,michelle_dunlap,aeremaee good luck stay calm professional youl...,[]
3922,-1.001141,1980415360,Sun May 31 06:40:38 PDT 2009,NO_QUERY,creativedbq,prefer calm sunday bell amp bird crude obnoxio...,[]
6330,0.998861,1974364432,Sat May 30 12:59:10 PDT 2009,NO_QUERY,nikk29,enjoying much deserved day calm summer school ...,[]
6333,0.998861,1973655012,Sat May 30 11:34:28 PDT 2009,NO_QUERY,Ellyn_Carnall,joe calm joe breathe breathe,[]


In [245]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean - 1)/sample_standard_deviation)*(np.sqrt(count))
test_char

-4.218782809863097

## Ghost(-ve sentiment rejected)

In [246]:
count = df['text'].str.lower().str.count(r'\bghost\b').sum()
count

36

In [247]:
filtered_df = df[df['text'].str.contains(r'\bghost\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
985,0.998861,1684793209,Sat May 02 22:03:57 PDT 2009,NO_QUERY,workmanxx2012,ksbong scrabble fun pac man make nervous play ...,[]
2104,-1.001141,1967169014,Fri May 29 19:05:48 PDT 2009,NO_QUERY,heydecember,chinesei cant believe last ep ghost whisperer ...,[]
2479,0.998861,2062755622,Sun Jun 07 00:11:38 PDT 2009,NO_QUERY,Barbie1270,wow im youtube check ouut type ghost picture s...,[]
8117,-1.001141,1997593646,Mon Jun 01 17:07:13 PDT 2009,NO_QUERY,Hawser24,mattdavelewis awwwwwwww im watchin ghost chann...,[]
8416,0.998861,1694687191,Mon May 04 03:43:20 PDT 2009,NO_QUERY,alex_bettylou,waiting last movie finish test screening ghost...,[]


In [248]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean + 1)/sample_standard_deviation)*(np.sqrt(count))
test_char

5.284712401794339

## Heat(Neutral sentiment rejected)

In [249]:
count = df['text'].str.lower().str.count(r'\bheat\b').sum()
count

87

In [250]:
filtered_df = df[df['text'].str.contains(r'\bheat\b', case=False, na=False)]
filtered_df.head()

Unnamed: 0,target,id,date,flag,user,text,mentions
2744,0.998861,1969289746,Fri May 29 23:34:34 PDT 2009,NO_QUERY,PinkBerryGirl,paulharriott awww sound like lot work hope sta...,[]
3435,-1.001141,1983306628,Sun May 31 12:54:19 PDT 2009,NO_QUERY,SarahLx,bad heat rash body sucksss buttt im going watc...,[]
4191,-1.001141,2017608705,Wed Jun 03 08:40:04 PDT 2009,NO_QUERY,levkaizer,would rather heat massive loud air con unit ri...,[]
4392,-1.001141,1694485846,Mon May 04 02:50:42 PDT 2009,NO_QUERY,WayneMcNicol,propper gutted heat went last night,[]
4452,-1.001141,2063382158,Sun Jun 07 02:26:00 PDT 2009,NO_QUERY,lovelylaura1982,liking rain cold wheres heat sunshine gone,[]


In [251]:
sample_mean = filtered_df['target'].mean()
sample_standard_deviation = filtered_df['target'].std()
test_char = ((sample_mean + 0)/sample_standard_deviation)*(np.sqrt(count))
test_char

-6.165997788441167