In [1]:
import pandas as pd
import numpy as np
import json
import tweepy
import config
from sklearn.preprocessing import MinMaxScaler

In [40]:
df = pd.read_csv('all_hurricanes_tweets_train.csv')
df = df.drop_duplicates(subset=['text', 'class_label'], keep=False)  # drop duplicates in dataset
df['date'] =  pd.to_datetime(df['created_at'])
len(df)

13279

In [41]:
df1 = pd.read_csv('hurricane/hurricane_matthew_2016_train.tsv', delimiter='\t')
df1['hurricane'] = 'matthew'
df1['fatalities'] = 603
df1['damage(billion USD)'] = 16.47
df1.head()

Unnamed: 0,tweet_id,tweet_text,class_label,hurricane,fatalities,damage(billion USD)
0,783409770493571076,Horrifying. My heart breaks for Haiti. Thinkin...,sympathy_and_support,matthew,603,16.47
1,783683862018818049,Our thoughts and prayers are with those suffer...,sympathy_and_support,matthew,603,16.47
2,784696725285908481,#BreakingNews Hurricane Matthew kills over 800...,injured_or_dead_people,matthew,603,16.47
3,783549594416377856,Southern Haiti effectively cut off after the b...,other_relevant_information,matthew,603,16.47
4,783398732926779392,"Hurricane Matthew hits Haiti, aims at US East ...",other_relevant_information,matthew,603,16.47


In [42]:
df2 = pd.read_csv('hurricane/hurricane_irma_2017_train.tsv', delimiter='\t')
df2['hurricane'] = 'irma'
df2['fatalities'] = 52
df2['damage(billion USD)'] = 77.16
df2.head()

Unnamed: 0,tweet_id,tweet_text,class_label,hurricane,fatalities,damage(billion USD)
0,906347001712926721,An inside look at what its like inside a shelt...,displaced_people_and_evacuations,irma,52,77.16
1,908729925905068032,Our parent company @Realogy is matching $75K f...,rescue_volunteering_or_donation_effort,irma,52,77.16
2,907080161513889792,UNICEF is working to bring safe water &amp; sa...,other_relevant_information,irma,52,77.16
3,906964492193595392,Hurricane Irma ripping entire roofs off buildi...,infrastructure_and_utility_damage,irma,52,77.16
4,907227218593685506,Cosmic Kamer on @richardbranson as he reveals ...,infrastructure_and_utility_damage,irma,52,77.16


In [43]:
df3 = pd.read_csv('hurricane/hurricane_maria_2017_train.tsv', delimiter='\t')
df3['hurricane'] = 'maria'
df3['fatalities'] = 2982
df3['damage(billion USD)'] = 90
df3.head()

Unnamed: 0,tweet_id,tweet_text,class_label,hurricane,fatalities,damage(billion USD)
0,914134332226330625,San Juan: Trump lashes out with good reason. #...,other_relevant_information,maria,2982,90
1,910783670134476800,Hurricane Maria Live Updates: Catastrophic Flo...,caution_and_advice,maria,2982,90
2,912134938727780355,"Getting food to the island is, obviously, crit...",rescue_volunteering_or_donation_effort,maria,2982,90
3,910669838842056704,My heart breaks for the families in Puerto Ric...,sympathy_and_support,maria,2982,90
4,912287091026997248,"#B-FAST sending medical, reconstruction &amp; ...",rescue_volunteering_or_donation_effort,maria,2982,90


In [44]:
df4 = pd.read_csv('hurricane/hurricane_harvey_2017_train.tsv', delimiter='\t')
df4['hurricane'] = 'harvey'
df4['fatalities'] = 107
df4['damage(billion USD)'] = 125
df4.head()

Unnamed: 0,tweet_id,tweet_text,class_label,hurricane,fatalities,damage(billion USD)
0,903388763073720321,"Hurricane Harvey killed at least 38 people, bu...",injured_or_dead_people,harvey,107,125
1,901136425348759552,Harvey upped to Category 2 hurricane with 110+...,other_relevant_information,harvey,107,125
2,902853707607535616,A huge shoutout @TexasGuard for all the work y...,rescue_volunteering_or_donation_effort,harvey,107,125
3,902859729701044224,Our thoughts and prayers are with the people h...,sympathy_and_support,harvey,107,125
4,901405975856414720,Homes destroyed on Broadway St in Rockport. Pe...,infrastructure_and_utility_damage,harvey,107,125


In [48]:
df_h = pd.concat([df1, df2, df3, df4], ignore_index=True)
df_h = df_h.rename(columns={'tweet_text' : 'text'})
df_h

Unnamed: 0,tweet_id,text,class_label,hurricane,fatalities,damage(billion USD)
0,783409770493571076,Horrifying. My heart breaks for Haiti. Thinkin...,sympathy_and_support,matthew,603,16.47
1,783683862018818049,Our thoughts and prayers are with those suffer...,sympathy_and_support,matthew,603,16.47
2,784696725285908481,#BreakingNews Hurricane Matthew kills over 800...,injured_or_dead_people,matthew,603,16.47
3,783549594416377856,Southern Haiti effectively cut off after the b...,other_relevant_information,matthew,603,16.47
4,783398732926779392,"Hurricane Matthew hits Haiti, aims at US East ...",other_relevant_information,matthew,603,16.47
...,...,...,...,...,...,...
19203,902606726146482179,NEW: Officials confirm a family of 6 died afte...,injured_or_dead_people,harvey,107,125.00
19204,902657415895592960,"If youre asking, How can I help? in response t...",rescue_volunteering_or_donation_effort,harvey,107,125.00
19205,901835308290850817,RT @albyselkie: Astounding: local news station...,other_relevant_information,harvey,107,125.00
19206,901764351996153856,@juliettekayyem Hurricanes are supposed to blo...,caution_and_advice,harvey,107,125.00


In [56]:
df_train = df_h.merge(df, on = ['tweet_id', 'class_label'],  how = 'inner')
len(df_train)

13279

In [57]:
df_mat = df_train[df_train['hurricane'] == 'matthew']
scaler = MinMaxScaler(feature_range=(0, 100))
df_mat['norm_date'] = scaler.fit_transform(df_mat['date'].values.reshape(-1,1))

df_irma = df_train[df_train['hurricane'] == 'irma']
df_irma['norm_date'] = scaler.fit_transform(df_irma['date'].values.reshape(-1,1))

df_maria = df_train[df_train['hurricane'] == 'maria']
df_maria['norm_date'] = scaler.fit_transform(df_maria['date'].values.reshape(-1,1))

df_harvey = df_train[df_train['hurricane'] == 'harvey']
df_harvey['norm_date'] = scaler.fit_transform(df_harvey['date'].values.reshape(-1,1))

df_new_train = pd.concat([df_mat, df_irma, df_maria, df_harvey], ignore_index=True)
df_new_train.head(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mat['norm_date'] = scaler.fit_transform(df_mat['date'].values.reshape(-1,1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_irma['norm_date'] = scaler.fit_transform(df_irma['date'].values.reshape(-1,1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_maria['norm_date'] = scaler.fit_transfor

Unnamed: 0.1,tweet_id,text_x,class_label,hurricane,fatalities,damage(billion USD),Unnamed: 0,text_y,author_id,created_at,...,in_reply_to_user_id,attachments,lang,possibly_sensitive,public_metrics,referenced_tweets,reply_settings,source,date,norm_date
0,783409770493571076,Horrifying. My heart breaks for Haiti. Thinkin...,sympathy_and_support,matthew,603,16.47,0,Horrifying. My heart breaks for Haiti. Thinkin...,30316960,2016-10-04 20:53:38+00:00,...,,,en,,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",[<ReferencedTweet id=783408890222444548 type=q...,everyone,TweetDeck,2016-10-04 20:53:38+00:00,2.052895
1,784696725285908481,#BreakingNews Hurricane Matthew kills over 800...,injured_or_dead_people,matthew,603,16.47,1,#BreakingNews Hurricane Matthew kills over 800...,2182497116,2016-10-08 10:07:32+00:00,...,,,en,,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,everyone,dlvr.it,2016-10-08 10:07:32+00:00,99.741796
2,783549594416377856,Southern Haiti effectively cut off after the b...,other_relevant_information,matthew,603,16.47,2,Southern Haiti effectively cut off after the b...,97883743,2016-10-05 06:09:15+00:00,...,,,en,,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",[<ReferencedTweet id=783548240650244097 type=q...,everyone,Twitter for Windows Phone,2016-10-05 06:09:15+00:00,12.666631
3,783398732926779392,"Hurricane Matthew hits Haiti, aims at US East ...",other_relevant_information,matthew,603,16.47,3,"Hurricane Matthew hits Haiti, aims at US East ...",232417464,2016-10-04 20:09:47+00:00,...,,,en,,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,everyone,Twitter for Android,2016-10-04 20:09:47+00:00,1.215245
4,783683291782213632,Our thoughts and prayers are today for the peo...,sympathy_and_support,matthew,603,16.47,4,Our thoughts and prayers are today for the peo...,262805991,2016-10-05 15:00:31+00:00,...,,,en,,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,everyone,Hootsuite,2016-10-05 15:00:31+00:00,22.815217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,783786213610254336,"US is sending 150-200 troops, 9 helos to help ...",rescue_volunteering_or_donation_effort,matthew,603,16.47,95,"US is sending 150-200 troops, 9 helos to help ...",394759287,2016-10-05 21:49:30+00:00,...,,,en,,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",,everyone,TweetDeck,2016-10-05 21:49:30+00:00,30.627871
96,783446920454778880,"Laura Sewell, CARE-Haiti, says 60,000+ pregnan...",other_relevant_information,matthew,603,16.47,96,"Laura Sewell, CARE-Haiti, says 60,000+ pregnan...",1230617006,2016-10-04 23:21:16+00:00,...,,,en,,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,everyone,Twitter Web App,2016-10-04 23:21:16+00:00,4.873079
97,783997568241971202,Local Red Cross assists in Hurricane Matthew r...,rescue_volunteering_or_donation_effort,matthew,603,16.47,97,Local Red Cross assists in Hurricane Matthew r...,3044618818,2016-10-06 11:49:20+00:00,...,,,en,,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,everyone,dlvr.it,2016-10-06 11:49:20+00:00,46.670890
98,783832083445710849,Praying for the sweet sweet children of Haiti ...,sympathy_and_support,matthew,603,16.47,98,RT @HaleyNikole__: Praying for the sweet sweet...,107557007,2016-10-06 00:51:46+00:00,...,,,en,,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",[<ReferencedTweet id=783819814859075585 type=r...,everyone,Twitter for iPhone,2016-10-06 00:51:46+00:00,34.109643


In [59]:
df_new_train = df_new_train.rename(columns={'text_x' : 'text'})
df_new_train = df_new_train.drop(columns=['text_y'])


df_new_train.to_csv('train_hurricane_with_name.csv')