# Importing the libraries

In [2]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import numpy as np
import pandas as pd

import re
import string

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

import tweepy
import json

import pyrebase

# Reading the dataset

In [3]:
df = pd.read_csv('/home/poobalan/College/Sem 6/mini project/Disaster-Mitigation-Support-System/DatasetFinal2.csv')

In [4]:
df.shape

(225, 9)

In [5]:
df.columns

Index(['date', 'tweetid', 'username', 'followers_count', 'tweet',
       'tweet_created', 'Place', 'Category', 'Unnamed: 8'],
      dtype='object')

In [6]:
df = df.drop(['Unnamed: 8'],axis=1)

In [7]:
df.head()

Unnamed: 0,date,tweetid,username,followers_count,tweet,tweet_created,Place,Category
0,2021-04-23 13:53,1385486062793883652,Senju Hashirama,0,Volunteers for rescue operations in #Chennaifl...,2021-04-23 06:49:53,Coimbatore,general
1,2021-04-23 13:53,1385485726951755777,Senju Hashirama,0,Rescue volunteers for chennai flood are doing ...,2021-04-23 06:48:33,Chennai,general
2,2021-04-23 13:54,1385485008995950595,kavinsabapathy,0,our volunteers are in full swing in helping th...,2021-04-23 06:45:41,Chennai,volunteer
3,2021-04-23 13:54,1385484338163175426,kavinsabapathy,0,helping others is the best thing in life. So w...,2021-04-23 06:43:01,Cuddalore,volunteer
4,2021-04-23 13:54,1385484705626169346,kavinsabapathy,0,we are happy to announce that we are there to ...,2021-04-23 06:44:29,Chennai,volunteer


# Splitting the train and test data

In [8]:
train_x,test_x,train_y,test_y = model_selection.train_test_split(df['tweet'],df['Category'],test_size=0.25,random_state=0)

In [9]:
train_x

71     #chennaiflood we are for the people helping or...
22     #ChennaiFlood #Resucuevolunteer As of now we  ...
204    I am kesav from guindy Got jammed in here with...
45     #chennaiflood Days of sufferings The people of...
199    Happy to see the people of Chennai helping eac...
                             ...                        
67     #chennaiflood please donate the things to the ...
192    We group of peoples came here to work from Har...
117    Ppl are drowning and the government is just wa...
47     #rescuevolunteer Those who are willing to resc...
172                Can anyone pls help me..#ChennaiFlood
Name: tweet, Length: 168, dtype: object

In [10]:
train_y

71     volunteer
22     volunteer
204         Help
45     volunteer
199         Help
         ...    
67       general
192         Help
117      general
47     volunteer
172      general
Name: Category, Length: 168, dtype: object

# Label encoding the train test data

In [11]:
encoder = preprocessing.LabelEncoder()
train_y = encoder.fit_transform(train_y)
test_y = encoder.fit_transform(test_y)

In [12]:
train_y

array([2, 2, 0, 2, 0, 2, 1, 1, 2, 2, 1, 2, 0, 2, 2, 0, 1, 2, 2, 2, 2, 1,
       0, 2, 1, 1, 2, 0, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 1, 2, 2, 0, 2,
       0, 2, 0, 1, 0, 0, 2, 2, 2, 2, 0, 0, 2, 1, 2, 1, 2, 0, 0, 0, 0, 0,
       1, 2, 1, 2, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 0, 2, 1, 1, 1, 1, 1, 2,
       2, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2, 1, 2, 0, 1, 2, 0,
       1, 2, 2, 1, 2, 0, 0, 2, 2, 2, 0, 0, 0, 2, 1, 2, 1, 2, 1, 0, 0, 0,
       2, 1, 0, 1, 0, 0, 1, 2, 1, 0, 2, 0, 2, 0, 1, 1, 0, 0, 2, 0, 2, 0,
       1, 1, 2, 2, 2, 0, 1, 1, 0, 1, 0, 1, 2, 1])

In [13]:
print(encoder.classes_)

['Help' 'general' 'volunteer']


In [14]:
enc_name_mapping = dict(zip(encoder.transform(encoder.classes_),encoder.classes_))
print(enc_name_mapping)

{0: 'Help', 1: 'general', 2: 'volunteer'}


# Vectorizing

In [15]:
count_vect = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}')
count_vect.fit(train_x)

# transform the training and validation data using count vectorizer object
xtrain_count =  count_vect.transform(train_x)
xvalid_count =  count_vect.transform(test_x)

In [16]:
print(xtrain_count)

  (0, 42)	1
  (0, 46)	1
  (0, 97)	1
  (0, 159)	1
  (0, 212)	1
  (0, 221)	1
  (0, 283)	2
  (0, 287)	1
  (0, 315)	1
  (0, 320)	1
  (0, 404)	1
  (0, 415)	1
  (0, 449)	1
  (0, 464)	2
  (0, 620)	2
  (0, 641)	1
  (0, 658)	1
  (0, 669)	1
  (0, 690)	1
  (1, 33)	1
  (1, 38)	1
  (1, 46)	2
  (1, 50)	2
  (1, 83)	1
  (1, 86)	1
  :	:
  (165, 632)	1
  (165, 641)	1
  (165, 687)	1
  (166, 46)	1
  (166, 56)	1
  (166, 96)	1
  (166, 97)	1
  (166, 112)	1
  (166, 334)	1
  (166, 464)	1
  (166, 514)	1
  (166, 515)	1
  (166, 620)	1
  (166, 631)	1
  (166, 641)	1
  (166, 669)	1
  (166, 703)	1
  (166, 707)	1
  (166, 710)	1
  (167, 43)	1
  (167, 87)	1
  (167, 97)	1
  (167, 283)	1
  (167, 384)	1
  (167, 475)	1


# Training model

In [17]:
def train_model(classifier, feature_vector_train, label, feature_vector_valid, is_neural_net=False):
    # fit the training dataset on the classifier
    classifier.fit(feature_vector_train, label)
    
    # predict the labels on validation dataset
    predictions = classifier.predict(feature_vector_valid)
    
    if is_neural_net:
        predictions = predictions.argmax(axis=-1)
    
    return metrics.accuracy_score(predictions, test_y)

# Various Models Accuracy 

In [18]:
accuracy = train_model(naive_bayes.MultinomialNB(), xtrain_count, train_y, xvalid_count)
print ("NB, Count Vectors: ", accuracy)

NB, Count Vectors:  0.5263157894736842


In [19]:
accuracy = train_model(svm.SVC(), xtrain_count, train_y, xvalid_count)
print ("SVM, N-Gram Vectors: ", accuracy)

SVM, N-Gram Vectors:  0.5087719298245614


In [20]:
accuracy = train_model(linear_model.LogisticRegression(), xtrain_count, train_y, xvalid_count)
print ("Linear Regression, Count Vectors: ", accuracy)

Linear Regression, Count Vectors:  0.5263157894736842


# Data Preprocessing

In [21]:
def datapreprocessing(textdata):
    #1.conerting the text into lower case
    textdata=textdata.lower()
    textdata = re.sub(r"http\S+", "", textdata)
    #2.removing the numbers
    result = re.sub(r'\d+', '', textdata)

    entity_prefixes = ['@','#']
    for separator in  string.punctuation:
        if separator not in entity_prefixes :
            result = result.replace(separator,' ')
    words = []
    for word in result.split():
        word = word.strip()
        if word:
            if word[0] not in entity_prefixes:
                words.append(word)
    result=(' '.join(words)) 

    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002500-\U00002BEF"  # chinese char
        u"\U00002702-\U000027B0"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U0001f926-\U0001f937"
        u"\U00010000-\U0010ffff"
        u"\u2640-\u2642" 
        u"\u2600-\u2B55"
        u"\u200d"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\ufe0f"  # dingbats
        u"\u3030"
                           "]+", flags=re.UNICODE)
    
    result=emoji_pattern.sub(r'', result)
    #3.removing the punctuations
  
    punc = '!()-[]{};:\'\",<>/?@#$%^&*_~\'0123456789+.,' 
    for ele in result:  
        if ele in punc:  
            result = result.replace(ele, "")
    
    
    #4.removing whitespaces
    result=result.strip()
    
    #5.removing stopwords
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(result)
    result = [i for i in tokens if not i in stop_words]

    #6 stemming
    word_stemmer=PorterStemmer()
    for i in range(len(result)):
        result[i]=word_stemmer.stem(result[i])
    
    print(result)
    # 7 lematization
    lematizer=WordNetLemmatizer()
    for i in range(len(result)):
        result[i]=lematizer.lemmatize(result[i])

    return result

In [22]:
data=pd.DataFrame(df,columns=['tweet'])
data.head()

Unnamed: 0,tweet
0,Volunteers for rescue operations in #Chennaifl...
1,Rescue volunteers for chennai flood are doing ...
2,our volunteers are in full swing in helping th...
3,helping others is the best thing in life. So w...
4,we are happy to announce that we are there to ...


In [23]:
for i in range(df['tweet'].count()):
    df['tweet'][i] = datapreprocessing(df['tweet'][i])
    
df.head()

['volunt', 'rescu', 'oper', 'need']
['rescu', 'volunt', 'chennai', 'flood', 'god', 'workheroeswithoutcap']
['volunt', 'full', 'swing', 'help', 'needi', 'pleas', 'kindli', 'inform', 'us', 'ur', 'need', 'help', 'u']
['help', 'other', 'best', 'thing', 'life', 'took', 'initi', 'suppli', 'food', 'cloth', 'contact', 'us']
['happi', 'announc', 'help', 'u', 'disast', 'situat', 'pl', 'contact', 'us', 'info']
['readi', 'volunt', 'suppli', 'food', 'poepl', 'chennai', 'contact', 'us', 'info']
['would', 'like', 'extend', 'help', 'hand', 'peopl', 'chennai', 'pl', 'contact', 'us', 'case', 'utmost', 'emerg', 'alway']
['readi', 'volunt', 'help', 'peopl', 'chennai', 'overcom', 'flood', 'pl', 'contact', 'us']
['calam', 'make', 'thing', 'wors', 'young', 'volunt', 'set', 'thing', 'right', 'need', 'medicin', 'medic', 'aid', 'feel', 'free', 'contact', 'get', 'soon', 'love']
['heart', 'weep', 'see', 'peopl', 'suffer', 'huge', 'sinc', 'get', 'know', 'thing', 'will', 'help', 'financi', 'ask', 'help']
['regard',

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tweet'][i] = datapreprocessing(df['tweet'][i])


Unnamed: 0,date,tweetid,username,followers_count,tweet,tweet_created,Place,Category
0,2021-04-23 13:53,1385486062793883652,Senju Hashirama,0,"[volunt, rescu, oper, need]",2021-04-23 06:49:53,Coimbatore,general
1,2021-04-23 13:53,1385485726951755777,Senju Hashirama,0,"[rescu, volunt, chennai, flood, god, workheroe...",2021-04-23 06:48:33,Chennai,general
2,2021-04-23 13:54,1385485008995950595,kavinsabapathy,0,"[volunt, full, swing, help, needi, plea, kindl...",2021-04-23 06:45:41,Chennai,volunteer
3,2021-04-23 13:54,1385484338163175426,kavinsabapathy,0,"[help, other, best, thing, life, took, initi, ...",2021-04-23 06:43:01,Cuddalore,volunteer
4,2021-04-23 13:54,1385484705626169346,kavinsabapathy,0,"[happi, announc, help, u, disast, situat, pl, ...",2021-04-23 06:44:29,Chennai,volunteer


# Vectorizing the data

In [24]:
count_vect = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}')
count_vect.fit(train_x)

# transform the training and validation data using count vectorizer object
xtrain_count =  count_vect.transform(train_x)
xvalid_count =  count_vect.transform(test_x)

# Accuracy after preprocessing

In [25]:
accuracy = train_model(naive_bayes.MultinomialNB(), xtrain_count, train_y, xvalid_count)
print ("NB, Count Vectors: ", accuracy)

NB, Count Vectors:  0.5263157894736842


In [26]:
accuracy = train_model(svm.SVC(), xtrain_count, train_y, xvalid_count)
print ("SVM, N-Gram Vectors: ", accuracy)

SVM, N-Gram Vectors:  0.5087719298245614


In [27]:
accuracy = train_model(linear_model.LogisticRegression(), xtrain_count, train_y, xvalid_count)
print ("Linear Regression, Count Vectors: ", accuracy)

Linear Regression, Count Vectors:  0.5263157894736842


# Checking User input

In [28]:
def input_predict(classifier, feature_vector_train, label, feature_vector_valid):
    # fit the training dataset on the classifier
    classifier.fit(feature_vector_train, label)
    # predict the labels on validation dataset
    predictions = classifier.predict(feature_vector_valid)    
    return predictions

In [29]:
new_input = ['I need some 10 foods packets and 20 waterbottles people are suffering please help us ']
#new_input = datapreprocessing(new_input)
new_input_count = count_vect.transform(new_input)
print(new_input_count)

  (0, 1)	1
  (0, 38)	1
  (0, 46)	1
  (0, 219)	1
  (0, 283)	1
  (0, 312)	1
  (0, 415)	1
  (0, 464)	1
  (0, 472)	1
  (0, 566)	1
  (0, 597)	1
  (0, 669)	1


In [30]:
result = input_predict(naive_bayes.MultinomialNB(),xtrain_count,train_y,new_input_count)
print(result)

[2]


In [31]:
result = input_predict(linear_model.LogisticRegression(),xtrain_count,train_y,new_input_count)
print(result)

[0]


In [32]:
result = input_predict(svm.SVC(),xtrain_count,train_y,new_input_count)
print(result)

[0]


# Initializing the tweepy object

In [33]:
CONSUMER_KEY = "tNsobFwICnhvijyXWNC7selyC"
CONSUMER_SECRET = "nOBFm8D9qUS5A7JlqjLU7yOTXFLIkwLvYYae2fGB7JF0NKHKkf"
ACCESS_KEY = "1355541876741300227-dIdVt9dimhdibKGGdTidCoxTz8sJSR"
ACCESS_SECRET = "soyCrMxlRHcFASlyzbrT5yCNsFJ20YzjQM5zoRn8jY1eY"

In [34]:
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)

# Initializing the firebase object

In [35]:
config = {
    "apiKey": "AIzaSyAysywWbD4O4UPpCQrci50kaLHVQbrJm7Y",
    "authDomain": "dmss-308701.firebaseapp.com",
    "databaseURL": "https://dmss-308701-default-rtdb.firebaseio.com",
    "projectId": "dmss-308701",
    "storageBucket": "dmss-308701.appspot.com",
    "messagingSenderId": "245597012331",
    "appId": "1:245597012331:web:0da20d1dc511e17a074c2e",
    "measurementId": "G-HJ72TBGN7T"
}

firebase = pyrebase.initialize_app(config)

db=firebase.database()

# Extracting tweets live from twitter and classifying it and updating the database

In [39]:
for tweet in tweepy.Cursor(api.search, q="#ChennaiFloods", count=1, lang="en", since="2021-04-25").items():  

    if tweet.id_str not in list(db.child("Tweetids").child().get().val()):
        new_input = [tweet.text]
#         new_input = datapreprocessing(new_input)
        new_input_count = count_vect.transform(new_input)
        predict = input_predict(linear_model.LogisticRegression(),xtrain_count,train_y,new_input_count)
        
        if predict[0] == 0:
            textdata="Thanks for reaching for help visit here https://disaster-mitigation-support-system.netlify.app/ and contact the volunteer nearby you Be Safe"
        if predict[0]== 1:
            textdata="Thanks for sharing the general information.Be Safe"
        if predict[0] == 2:
            textdata="Thanks for reaching to help others visit here https://disaster-mitigation-support-system.netlify.app/ and register as our volunter and help the people in need.Be Safe"
        
        api.update_status(textdata, in_reply_to_status_id=tweet.id,auto_populate_reply_metadata=True)
        db.child("Tweetids").child(tweet.id).set(tweet.id)
        
        from geopy.geocoders import Nominatim
        
        geolocator = Nominatim(user_agent="DMSS")
        loc=tweet.user.location
        location = geolocator.geocode(loc)
        data={"location":loc,"latitude":location.latitude,"longitude":location.longitude}
        
        if loc not in list(db.child("Locations").child().get().val()):
            db.child("Locations").child(loc).set(data)
        else:
            print("Already Marked on the Map")
    else:
        print("Already replied to Tweet with the details")
        
    

Already Marked on the Map
Already replied to Tweet with the details
