In [1]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [3]:
train = pd.read_csv("../../../data/real_or_not_nlp/train.csv")

In [4]:
train.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [5]:
train.describe()

Unnamed: 0,id,target
count,7613.0,7613.0
mean,5441.934848,0.42966
std,3137.11609,0.49506
min,1.0,0.0
25%,2734.0,0.0
50%,5408.0,0.0
75%,8146.0,1.0
max,10873.0,1.0


In [6]:
train.keyword.unique()

array([nan, 'ablaze', 'accident', 'aftershock', 'airplane%20accident',
       'ambulance', 'annihilated', 'annihilation', 'apocalypse',
       'armageddon', 'army', 'arson', 'arsonist', 'attack', 'attacked',
       'avalanche', 'battle', 'bioterror', 'bioterrorism', 'blaze',
       'blazing', 'bleeding', 'blew%20up', 'blight', 'blizzard', 'blood',
       'bloody', 'blown%20up', 'body%20bag', 'body%20bagging',
       'body%20bags', 'bomb', 'bombed', 'bombing', 'bridge%20collapse',
       'buildings%20burning', 'buildings%20on%20fire', 'burned',
       'burning', 'burning%20buildings', 'bush%20fires', 'casualties',
       'casualty', 'catastrophe', 'catastrophic', 'chemical%20emergency',
       'cliff%20fall', 'collapse', 'collapsed', 'collide', 'collided',
       'collision', 'crash', 'crashed', 'crush', 'crushed', 'curfew',
       'cyclone', 'damage', 'danger', 'dead', 'death', 'deaths', 'debris',
       'deluge', 'deluged', 'demolish', 'demolished', 'demolition',
       'derail', 'der

In [7]:
from collections import Counter

In [8]:
Counter(train.keyword)

Counter({nan: 61,
         'ablaze': 36,
         'accident': 35,
         'aftershock': 34,
         'airplane%20accident': 35,
         'ambulance': 38,
         'annihilated': 34,
         'annihilation': 29,
         'apocalypse': 32,
         'armageddon': 42,
         'army': 34,
         'arson': 32,
         'arsonist': 34,
         'attack': 36,
         'attacked': 35,
         'avalanche': 30,
         'battle': 26,
         'bioterror': 37,
         'bioterrorism': 30,
         'blaze': 38,
         'blazing': 34,
         'bleeding': 35,
         'blew%20up': 33,
         'blight': 32,
         'blizzard': 37,
         'blood': 35,
         'bloody': 35,
         'blown%20up': 33,
         'body%20bag': 33,
         'body%20bagging': 33,
         'body%20bags': 41,
         'bomb': 34,
         'bombed': 38,
         'bombing': 29,
         'bridge%20collapse': 35,
         'buildings%20burning': 35,
         'buildings%20on%20fire': 33,
         'burned': 33,
         'bu

In [9]:
train.location.unique()

array([nan, 'Birmingham', 'Est. September 2012 - Bristol', ...,
       'Vancouver, Canada', 'London ', 'Lincoln'], dtype=object)

In [10]:
Counter(train.target)

Counter({1: 3271, 0: 4342})

In [11]:
train.text

0       Our Deeds are the Reason of this #earthquake M...
1                  Forest fire near La Ronge Sask. Canada
2       All residents asked to 'shelter in place' are ...
3       13,000 people receive #wildfires evacuation or...
4       Just got sent this photo from Ruby #Alaska as ...
                              ...                        
7608    Two giant cranes holding a bridge collapse int...
7609    @aria_ahrary @TheTawniest The out of control w...
7610    M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...
7611    Police investigating after an e-bike collided ...
7612    The Latest: More Homes Razed by Northern Calif...
Name: text, Length: 7613, dtype: object

In [12]:
import re
from string import punctuation
from nltk.corpus import stopwords

from nltk.stem import PorterStemmer
from nltk.tokenize import TweetTokenizer

english_stopwords = stopwords.words('english')
stemmer = PorterStemmer()

def transform_tweet(text):
    text = re.sub("#", "", text)
    text = re.sub("\@[a-zA-Z_-]+", "", text)
    text = re.sub("^\s+", "", text)
    text = re.sub("\s+$", "", text)
    text = re.sub(f'[{punctuation}]', "", text)
    
    tokenizer = TweetTokenizer()
    words = tokenizer.tokenize(text)
    words = [word for word in words if word not in english_stopwords]
    words = [stemmer.stem(word) for word in words]
    
    return words

In [13]:
transform_tweet(train.iloc[10].text)

['three', 'peopl', 'die', 'heat', 'wave', 'far']

In [14]:
transformed_text = [transform_tweet(text) for text in train.text]

In [15]:
from tqdm import tqdm

In [16]:
freqs = {}
for index, sentence in tqdm(train.iterrows()):
    for word in transformed_text[index]:
        if sentence.target == 1:
            freqs[(word, 1.0)] = freqs.get((word, 1.0), 0) + 1
        else:
            freqs[(word, 0.0)] = freqs.get((word, 1.0), 0) + 1

7613it [00:01, 3885.98it/s]


In [17]:
freqs

{('our', 1.0): 16,
 ('deed', 1.0): 1,
 ('reason', 1.0): 8,
 ('earthquak', 1.0): 47,
 ('may', 1.0): 50,
 ('allah', 1.0): 6,
 ('forgiv', 1.0): 1,
 ('us', 1.0): 14,
 ('forest', 1.0): 50,
 ('fire', 1.0): 270,
 ('near', 1.0): 49,
 ('La', 1.0): 11,
 ('rong', 1.0): 1,
 ('sask', 1.0): 1,
 ('canada', 1.0): 10,
 ('all', 1.0): 22,
 ('resid', 1.0): 9,
 ('ask', 1.0): 5,
 ('shelter', 1.0): 6,
 ('place', 1.0): 17,
 ('notifi', 1.0): 1,
 ('offic', 1.0): 41,
 ('No', 1.0): 34,
 ('evacu', 1.0): 101,
 ('order', 1.0): 24,
 ('expect', 1.0): 22,
 ('13000', 1.0): 4,
 ('peopl', 1.0): 106,
 ('receiv', 1.0): 4,
 ('wildfir', 1.0): 80,
 ('california', 1.0): 115,
 ('just', 1.0): 19,
 ('got', 1.0): 29,
 ('sent', 1.0): 4,
 ('photo', 1.0): 28,
 ('rubi', 1.0): 1,
 ('alaska', 1.0): 5,
 ('smoke', 1.0): 14,
 ('pour', 1.0): 2,
 ('school', 1.0): 32,
 ('rockyfir', 1.0): 4,
 ('updat', 1.0): 36,
 ('hwi', 1.0): 10,
 ('20', 1.0): 14,
 ('close', 1.0): 29,
 ('direct', 1.0): 6,
 ('due', 1.0): 24,
 ('lake', 1.0): 9,
 ('counti', 1.0):

In [18]:
# EVALUATION

In [19]:
test = pd.read_csv("../../../data/real_or_not_nlp/test.csv")

In [20]:
test.head()

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan


In [21]:
test.describe()

Unnamed: 0,id
count,3263.0
mean,5427.152927
std,3146.427221
min,0.0
25%,2683.0
50%,5500.0
75%,8176.0
max,10875.0


In [22]:
# for _, row in test.iterrows():
#     transformed_tweet = transform_tweet(row.text)
#     print(transformed_tweet)

In [23]:
logprior = np.log(sum(train.target == 1)) - np.log(sum(train.target == 0))
V = len(train)
N_pos = sum(train.target == 1)
N_neg = sum(train.target == 0)

In [24]:
print(logprior)
print(V)
print(N_pos)
print(N_neg)

-0.28323932289985443
7613
3271
4342


In [25]:
def evaluate_tweet(transformed_tweet):
    result = logprior
    
    for word in transformed_tweet:
        freq_pos = freqs.get((word, 1.0), 0.0)
        freq_neg = freqs.get((word, 0.0), 0.0)
        
        result += (np.log((freq_pos + 1.0) / float(N_pos + V)) - np.log((freq_neg + 1.0) / float(N_neg + V)))
        
    return result

In [26]:
#evaluate_tweet(transform_tweet(test.iloc[2]['text']))
results = []
for index, tweet in tqdm(test.iterrows()):
    print(tweet.text)
    result = evaluate_tweet(transform_tweet(tweet.text))
    print(result)
    results.append(int(result >= 0))
    
test['target'] = results

176it [00:00, 754.19it/s]

Just happened a terrible car crash
0.20508546118490756
Heard about #earthquake is different cities, stay safe everyone.
0.1524611543602763
there is a forest fire at spot pond, geese are fleeing across the street, I cannot save them all
3.70929625257574
Apocalypse lighting. #Spokane #wildfires
0.8935533806385108
Typhoon Soudelor kills 28 in China and Taiwan
3.5172427461571463
We're shaking...It's an earthquake
0.08533939405764013
They'd probably still show more life than Arsenal did yesterday, eh? EH?
-0.012019979800116154
Hey! How are you?
-0.35257286591116976
What a nice hat?
-0.5902753077406651
Fuck off!
-0.12039067142361404
No I don't like cold!
0.09965696607298336
NOOOOOOOOO! Don't do that!
-0.11494584877837877
No don't tell me that!
0.10027078821517676
What if?!
-0.2183710797838181
Awesome!
-0.5948486510187312
Birmingham Wholesale Market is ablaze BBC News - Fire breaks out at Birmingham's Wholesale Market http://t.co/irWqCEZWEU
1.1880857563936265
@sunkxssedharry will you wear sho

383it [00:00, 876.33it/s]


RT @TheAdvocateMag: Owner of Chicago-Area Gay Bar Admits to Arson Scheme http://t.co/wHTMwtgROJ #p2 #LGBT
-1.6022010001811626
#LGBTQ News ?? Owner of Chicago-Area Gay Bar Admits to Arson Scheme: Frank Elliott pleaded... http://t.co/sGb9vNWqUx Via @TheAdvocateMag
-1.61686671518844
Mourning notices for stabbing arson victims stir Û÷politics of griefÛª in Israel: Posters for Shira Banki and A... http://t.co/6o92wDfcLu
1.010708171104024
Contra Costa Co. authorities arrest suspected serial arsonist: Authorities believe thatÛ_ http://t.co/bzCmzM7bi5 | http://t.co/LBQldyKgdp
0.4390418781855816
Owner of Chicago-Area Gay Bar Admits to Arson Scheme: Frank Elliott pleaded guilty to hiring an arsonist to to... http://t.co/L82mrYxfNK
-2.592985482777163
@local_arsonist @Cloudy_goldrush Man what ???? they be on some other shit
-0.14431792709070912
mo the way she says 'carry' https://t.co/vQzRUTHRNU
-0.2501971897283317
Arsonist Sets NYC Vegetarian Restaurant on Fire: Police #NewYork - http://t.co/

571it [00:00, 908.56it/s]


1.0409228311818506
The Yankees are the reason I'm on blood pressure medicine
-1.089190342767124
Why can't gay men donate blood? http://t.co/v2Etl8P9eQ http://t.co/NLnyzeljbw
0.14928615430759073
My new mashup of 'Bad Blood x Where Are U Now' comes out tomorrow!!! Make sure t... (Vine by @Shaylen_Carroll) https://t.co/zB6GZJLgkl
-0.6123637602802985
1860 BLOOD'S PENNY POST Philly OCTAGONAL CDS 1Ìâå¢ Black 15L18 Grid Cancel + #26! http://t.co/rpV4MQSCQA http://t.co/JrjEiBUGsE
3.0873205488448834
seeing more blood and surgery than a regular GP behind a desk and they say dentists aren't  real doctors .... Ok
-1.364750059065818
Sometimes blood ain't no thicker than water and sometimes family will bring you down quicker than strangers ???????
-1.4249428543535787
Name: Chizu
Gender: Male
Age: 10
Hair: Red
Eyes: Pink
Dere Type: Pasokon
Blood Type: Type O
http://t.co/cOyPF9ACTd
0.8505402629891652
The whole of New Zealand is shouting 'Bloody Marvellous'! John Campbell to join Radio NZ http://t.co/

758it [00:00, 920.29it/s]

-0.6443875745012955
WoW Legion ÛÒ Slouching Towards The Broken Isles: Warlords of Draenor wasnÛªt close enough to The Burning Crusad... http://t.co/RKpmoMQMUi
-0.17681519772027698
@TheVenskus you led a killer workout today @TheSweatShoppe my core is burning. Now #gymtime #worktime #auditiontime #roccotime #noBS
-0.5817439015223016
Peddle to the floorboard.. End up in a four door. Burning up a backroad song... Park it and we pile outÛ_ Baby watch your step nowÛ_
0.10233707324043717
Burning bridges is my forte!
0.001162507800192003
RT: A real burn book entry about CA: why the fuck is this place always BURNING
1.093234615711217
fear the fever Can you feel it now? I feel the fire Burning belo-ooow It's gonna trick ya Swallow you who-ooo-ole #MTVHottest Justin Bieber
-2.17571214287461
the Burning Legion has returned
0.22632830449517094
The Arab autocracies: Burning down their house http://t.co/xcjRamGQ22 via @TheEconomist
0.16130040610944585
@TarekFatah you are burning in enemity of Pak

848it [00:00, 873.43it/s]

8.997604601597025
Motorcyclist bicyclist injured in Denver collision on Broadway: At least two people were taken to a localÛ_ http://t.co/PMv8ZDFnmr
9.859513361480694
Traffic Collision - Ambulance Enroute: Florin Rd at Franklin Blvd South Sac http://t.co/dYEl9nMQ0A
5.585845685799279
The Denver Post - Motorcyclist bicyclist injured in Denver collision on Broadway http://t.co/yjrIi5mHii
11.078781350143466
that collision daaamn
-0.024068798939133274
Marin Sr37 / Sr121 **Trfc Collision-Unkn Inj** http://t.co/yqJVEVhSzx
2.9640183024709925
Riverside I15 N Sr91 E Con / I15 N Sr91 W Con **Trfc Collision-Unkn Inj** http://t.co/QqMLKvgPQk
3.3171796215960967
Techerit: Anti Collision Rear- #gadget #technology http://t.co/v3a5ZQaRFg
-0.6395891153450499
Anti Collision Rear- #innovation #gadgets http://t.co/YXD4c4XlGo
-1.346549368220746
Mom fights to recover in hospital from fiery car crash while kids miss her at home - http://t.co/0UH26R2zfX
1.1755116001644517
No citation for Billings police office

1021it [00:01, 799.63it/s]

0.1846082897048733
@tim55081 @BootlegAlbano @ShaunKing I know. He profits off the deaths of the people he claims to be fighting for. He makes me sick.
1.6421717380012901
@mohammedzismail because there was more deaths by regular gunsmissilesbombs and what not warfare than the A-bombs used in A war.
1.8206094396778418
Infections driving up neonatal deaths ÛÒ Health Minister #Guyana http://t.co/ImugNXrEBN
-0.045782912682521104
Bigamist and his Û÷firstÛª wife are charged in the deaths of his Û÷secondÛª pregnant wife her child 8 her mother her nephew 1 and their uÛ_
-0.01624385190284805
MSHA stepping up enforcement after mining deaths: http://t.co/4HM34lrUER via @elkodaily
0.14935746285078366
Bigamist and his 'first' wife are charged in the deaths of his 'second' pregnant wife her child 8 her mothe... http://t.co/RH15gGSgQG
-0.9750322300607905
Is a wing part enough to solve the MH370 mystery?: http://t.co/ys2bveKlxK
7.033402173224754
Debris Found in Indian Ocean could be Malaysia Flig

1216it [00:01, 882.52it/s]

1.1328528054065603
And the 10 horns which u saw on the beast these will hate the harlet make her desolate and naked eat her flesh and burn her with fire
-0.5136484058461606
@WilliamTCooper TY for the follow Go To http://t.co/l9MB2j5pXg BRUTALLY ABUSED+DESOLATE&amp;LOST + HER LOVELY MUM DIES..Is it Murder? Pls RT
-1.20852443991466
@mtnredneck It does when you are searching on a desolate part of the beach with no cell service and a mile away from your car.
3.0816185092923947
Why are you feeling desolate? Take the quiz: http://t.co/j4lM2ovoOs http://t.co/banrVjoTlf
0.09744862273843413
@peregrinekiwi @boymonster I recall CP2020 Australia was a blighted desolate corporate
0.29746077897017953
Now Playing Desolation Wilderness by Kodak To Graph
-1.0132966008725415
desolation #bored
-0.463252543046595
I liked a @YouTube video from @iglxenix http://t.co/dcxjIJtyYJ Desolation PvP: Beacon Blast Bombardment 3 (Minecraft Xbox One)
0.16334975042040378
Obama 2016? - The Abomination of Desolation' htt

1419it [00:01, 890.22it/s]

-0.8133774675431935
drown me in clementines
-0.09552776292127785
When I genuinely like someone I REALLYYY like them and I have to hold back or else I'll drown them with affection and attention ??
1.0210218541432439
Fuck around and drown ???? https://t.co/fr5z9WklMZ
0.19787403469359077
absolute drown your Wagyu steak in heaping piles of moist Smuckers Grape Jam
-2.1172702452467513
Drown me you make my heart beat like the rain.
0.16024351126729108
@Vanquiishher it can legit drown
-0.5009928710294433
Yeah I'm definitely just gonna go drown myself in the pool bc it's just too damn hot inside
0.4618302240746539
@GraysonDolan I'll fall and drown so I think I'll pass
0.13885574879353735
Hundreds feared drowned as migrant boat capsizes off Libya: Hundreds of migrants are feared to have drowned af... http://t.co/bF3OhacB1r
12.375380013582896
100s of migrants feared drowned after 700 squeeze onto boat in Mediterranean.  http://t.co/lsyPtk18se
8.593257764212257
Hundreds feared drowned as migrant 

1611it [00:01, 921.17it/s]


-0.10547296286797359
happy Justin makes my heart explode
-0.8401468256507849
Vanessa was about to explode! This is what she wanted to say to Shelli. Their alliance will survive. #BB17 #BBLF http://t.co/rypGKScHng
-2.2333211278768106
My ears are gonna explode smh
-0.5265256893704606
Some guys explode ??
-0.04946264676833945
@magicallester I will die. I'm actually being serious. My heart will beat so fast it will fly out off my chest &amp; explode
0.9846524291708705
Facebook Influence 2.0: Simple strategy to explode your Facebook page and create moreÛ_ http://t.co/rWPRtMIbHl
-1.5831977217639173
im sooooooo full my stomach is going to explode
-0.518612339181896
I need a follow before I explode @GraysonDolan
0.11630269088986189
I feel like I'm going to explode with excitement! Wonder begins within the hourÛ_Û_ https://t.co/zDZJ5kRbzr
0.3588174674988944
Twitter will explode...light the match @realmandyrain
-0.2893540553837699
The fact checking machine must have exploded today following 

1815it [00:01, 970.83it/s]


-2.475289715548305
2pcs 18W CREE Led Work Light  Offroad Lamp Car Truck Boat Mining 4WD FLOOD BEAM - Full reaÛ_ http://t.co/yo9q6WxweU http://t.co/n581wQqyAS
-1.865523889977231
How did I know as soon as I walked out of class that Calgary would flood again today
0.6926670122080196
2pcs 18W CREE Led Work Light  Offroad Lamp Car Truck Boat Mining 4WD FLOOD BEAM - Full reaÛ_ http://t.co/Yrd3nPC9V0 http://t.co/AnRd0VIfwK
-1.1078381882797146
12' 72W CREE LED Work Light Bar Alloy Spot Flood Combo Diving Offroad 4WD Boat - Full readÛ_ http://t.co/8Mk9TD4RRL http://t.co/W20rH3Ai9J
-2.36804606323685
Myanmar Flood Victims Need More Relief Aid and Food: Regions struggle with flood aftermath and dwindling suppl... http://t.co/O5adXdNnII
9.340522825381983
Flash Flood Watch in effect through 7:00am Thursday morning/12:00pm Thursday afternoon.
For: Perry Wayne Cape... http://t.co/fs7vro5seS
4.902484141493358
12' 72W CREE LED Work Light Bar Alloy Spot Flood Combo Diving Offroad 4WD Boat - Full read

2017it [00:02, 984.71it/s]


0.4425985896015945
#hot  Funtenna: hijacking computers to send data as sound waves [Black Hat 2015] http://t.co/XU9u3NNxpK #prebreak #best
0.7406620850525449
#hot  Funtenna: hijacking computers to send data as sound waves [Black Hat 2015] http://t.co/wG1rt0kB4g #prebreak #best
0.7406620850525449
Who is Tomislav Salopek the Islamic State's Most Recent Hostage? http://t.co/puT3LgsDnf
-1.1854397789348425
Chris Brown's aunft was 'held hostage in a closet by armed intruders at his LA home while sin http://t.co/i1Fhb8QSZ7 http://t.co/7C5BG48BRJ
0.0596104650437681
Islamic State threatens to kill another hostage: @DailyGleaner @TJProvincial @TJGreaterSJ http://t.co/dfPLiqjMk2
3.492310751837535
Ya kisses hold me hostage &amp; I don't wanna stop it. I only wanna give it to you
0.3793156434618963
Pakistani Terrorist Was Captured By Villagers He Took Hostage - NDTV http://t.co/C5X10JAkGE
9.671170597848622
Oh god Diane is following them to the hostage exchange in a taxi. 
#RandomActsOfRomance
3.26

2222it [00:02, 982.92it/s]

1.8070804363571034
Sooooo shooting up movie theaters the new mass murderer wave??
3.519001183315198
@AParra210 So you are stereotyping the people. Are you a mass murderer like today's shooter?
1.0452045338722602
So wait he really not gonna tell his wife that he is a mass murderer now and a cheat #derailed @itv2...I'm done with this film ????
0.5094301031701036
This is worth a re-read: 'A MASS-MURDERER' By Stuart Syvret http://t.co/C9UAyjrXt9 via @gojam_i_am
-0.030908134195183123
A Tomb Raider / Uncharted game where Lara Croft / Nathan Drake is finally on the run for being a ruthless mass murderer in the recent games.
-0.6444979323418725
I am disgusted by @libertarianism and @CatoInstitute for celebrating a mass murderer of innocents today.
-0.5950269635533165
Mass murderer had sex without touching each other or taking their clothes off because he was so fanatical on hygiene http://t.co/RfewQRGzbs
-0.6166476984867746
Sounds pretty sensible for a mass murderer... https://t.co/qZZxFYSIU1


2413it [00:02, 899.52it/s]

3.718474627886211
This beautifully rapturous facade that we've poured life into has itself given birth to a deplorable fate our total obliteration.
1.010507998480822
Refugio oil spill may have been costlier bigger than projected: A Plains All American PipelineÛ_ http://t.co/sryaFqj9gZ #globetrottingwino
13.874510947764842
Refugio oil spill may have been costlier bigger than projected - http://t.co/6aUOT9vaIS  http://t.co/j2SyewbHE2
9.599770566825773
#breaking #LA Refugio oil spill may have been costlier bigger than projected http://t.co/5ueCmcv2Pk
10.385627145489988
National Briefing | West: California: Spring Oil Spill Estimate Grows: Documents released on Wednesday disclos... http://t.co/PsYWDxCCmj
5.500427276394338
Got the gas and the splash like an oil spill.
1.3514658344715933
PODCAST: Oil spill anniversary http://t.co/wVdAVXTDaq
-0.13632805339308351
'California: Spring Oil Spill Estimate Grows ' by THE ASSOCIATED PRESS via NYT http://t.co/gPKkHhBRIy
5.009197928273707
Refugio oil

2505it [00:02, 870.24it/s]

-1.524002309952242
@wcvh01 @1233newcastle @aaronkearneyaus @LesMcKeownUK  PS I notice you are a vet?? I rescue kitties ?? .thank you for your service ??????????
-0.40373912450489957
Officials rescue 367 migrants off Libya; 25 bodies found - Fox News http://t.co/9CAignPR6S #NoAgenda #MvDint
8.52823438368962
kayaking about killed us so mom and grandma came to the rescue.
-0.5213587158745838
8' MTech Assisted Open RESCUE Pocket Knife - NEW BLUE MT-A801BL zix http://t.co/51n2rZEBis http://t.co/P4lNbjDo0x
-0.8300889301394525
Beauty Deals : http://t.co/eUd317Eptp #4552 Lot of 50Mixed Colors 7.5' Scissors FirstAid Princess Care Rescue TrÛ_ http://t.co/mAHkV79SmW
-0.860560707346961
If a picture is worth a thousand words what would this one say??

How a Lexington veterinarian's passion for... http://t.co/Y8dY39OhAL
0.04952735192345159
Revel8ion Media recently went out to support the 'Rocky Fire' rescue efforts. This is what we filmed.
#RockyFire  http://t.co/Haxz4XLs8d
3.1775980693166885
I don

2702it [00:02, 856.88it/s]

1.7629530777766558
Sinkhole Selfies: You Wont Believe What's In The Brooklyn Sinkhole!: 
        Sinkhole Selfies: You Wont Belie... http://t.co/OFEbaKatNh
-0.17192611829058135
Large sinkhole swallows entire pond in Lowndes County Georgia http://t.co/20Gi4Gyud5
6.628535773050863
A sinkhole grows ... in Brooklyn? http://t.co/UiqKDdVwKz http://t.co/aa8FwtOHyJ
0.5821574710344102
Giant Sinkhole Swallows NYC Intersection http://t.co/ozIZdsDWP4 via @NewsBeatSocial
2.146852891068402
A sinkhole grows in Brooklyn: six-meter crater swallows street http://t.co/LTLh53aRN4
1.9115287713447149
The sinkhole that ate Brooklyn http://t.co/28r2IgxmIE
-0.20805687076940638
Watch This Giant Sinkhole Form on New York City Street http://t.co/BEnWu5IARa
0.8644134892942654
Sinking a little slower everyday ?? @ Muh Pond https://t.co/KuA48GdREL
2.0029368572948316
If you're lost and alone
Or you're sinking like a stone.
Carry on
May your past be the sound
Of your feet upon the ground
-0.8033848955262437
@andreajma

2888it [00:03, 886.12it/s]

2.4410195881937393
@samfbiddle If Gawker so badly wants to commit suicide why don't you just volunteer your offices to test the new F35's bombing capabilities.
1.9900221810427645
#ISIS claims credit for suicide bombing 'with 6 tons of explosives' at the central command of #Syria(n) T4 airbase http://t.co/QOaU2QLJ3B
6.06256483682212
IF SUICIDE BOMBING WASTHE SMARTEST THING2 DO FOR ALLAH/GODJESUS/THE HOLY PROPHET MUHAMMAD COULD HAVE KILLEDSOMEBODY? http://t.co/tGfWuVVHxj
0.5127509735779974
Kurdish Militants Target Turkish Military In Suicide Attack - Huffington Post http://t.co/03bJm4ORoW
6.259253077578653
@PieroCastellano @nipped suicide bombing civilians in major TR cities yet?
0.7039476292196651
Remembering Sgt.-Maj. Roni Ghanem 28 of Maghar murdered by Hamas terrorists in the suicide bombing of Egged bus No. 361 13 years ago today
7.916655638030183
Turkish troops killed in Kurdish militant 'suicide attack' http://t.co/q1eIXnjjpt
6.810159597110334
@mehdirhasan percentages of young Mus

3064it [00:03, 828.99it/s]

14.701799278513867
Rly tragedy in MP: Some live to recount horror: ÛÏWhen I saw coaches of my train plunging into water I c... http://t.co/AWWg8kNMZg Over.
15.981278724160894
Rly tragedy in MP: Some live to recount horror: ÛÏWhen I saw coaches of my train plunging into water I called ... http://t.co/HR4GNyGSiC
14.008652097953922
@LauraE303B @SheilaGunnReid A war we'll never win? There r 2500 Yezidi women trapped as slaves.We know where they are. Doing nothing is evil
0.5026196006497088
Think of it as a reality TV show I tell myself to quell the panic rising in my throat. Introvert: Trapped in Tokyo. The 150-square-foot
1.9269332216522574
Been signed off work for a week as i have a trapped nerve and can not move.! Life is a struggle right now.! ????
1.9262019866015931
Confused Westie Believes Dogs Are Trapped In The Computer
http://t.co/aWZoNRYc9y http://t.co/HwJ7GwAVvB
-0.12130495484229664
(#TeambrianMundial) Hollywood Movie About Trapped Miners Released in Chile: 'The 33' Holly... h

3234it [00:03, 824.62it/s]

0.3334565927340343
My new sounds: War Zone https://t.co/hNXRfqRk3P on #SoundCloud
-0.9245325893245964
Sammy is here in this war zone. Jamal spoke to me on the phone. Now my wife is next to speak to me ...what else can&gt; http://t.co/2CppfprxoG
0.12851557930723345
@thelovatoagent omg i feel like i am in a war zone
0.3037953423769828
I think it's time for a cup of tea and a scone before I tackle the war zone that is is my bedroom. I wonder how many condom packets Ill find
0.023981320592729727
Saipan looks like a war zone though
3.4660459962961765
T Shirts $10 male or female get wit me. 2 days until its game changin time. War Zone single will beÛ_ https://t.co/Z0poYR096J
1.238308550220757
Um OK '@dandre_5: Sundays during football seasonfrom about 9 am - 11 pm women shouldn't even log onshit be a complete war zone'
-1.7995071365393693
Just trying to find my my peace in a Zone of war
0.33841442582468595
Twitter is going to be a war zone today  https://t.co/1QHNqCZvod
0.2805611569875923
Th

3263it [00:03, 896.42it/s]


MEG issues Hazardous Weather Outlook (HWO) http://t.co/3X6RBQJHn3
4.09808843302343
#CityofCalgary has activated its Municipal Emergency Plan. #yycstorm
3.72167801881311





In [28]:
sum(test.target == 1)

2125

In [30]:
sum(test.target == 0)

1138

In [31]:
test

Unnamed: 0,id,keyword,location,text,target
0,0,,,Just happened a terrible car crash,1
1,2,,,"Heard about #earthquake is different cities, s...",1
2,3,,,"there is a forest fire at spot pond, geese are...",1
3,9,,,Apocalypse lighting. #Spokane #wildfires,1
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan,1
...,...,...,...,...,...
3258,10861,,,EARTHQUAKE SAFETY LOS ANGELES ÛÒ SAFETY FASTE...,1
3259,10865,,,Storm in RI worse than last hurricane. My city...,1
3260,10868,,,Green Line derailment in Chicago http://t.co/U...,1
3261,10874,,,MEG issues Hazardous Weather Outlook (HWO) htt...,1


In [32]:
test.to_csv('submission_naive_bayes1.csv', columns=['id', 'target'], index=False)