In [14]:
import pandas as pd
import numpy as np
import os
from subprocess import call
from tqdm import tqdm
import datetime
import pymongo
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

In [2]:
df = pd.read_csv("./india_info.csv")
india_info = df[['state', 'city']]

In [3]:
india_info

Unnamed: 0,state,city
0,Maharashtra,Mumbai
1,Delhi,Delhi
2,Karnataka,Bangalore
3,Telangana,Hyderabad
4,Gujarat,Ahmedabad
...,...,...
304,Bihar,Buxar
305,Assam,Tezpur
306,Bihar,Jehanabad
307,Bihar,Aurangabad


In [4]:
bash_command_list = []

for i, row in india_info.iterrows():
    state, city = row['state'], row['city']
    temp = "twint -s 'Unlock1 AND {}' --limit 1 -o 'output/{}--{}.csv' --csv --since '2020-7-1 00:00:00'".format(city, state, city)
    bash_command_list.append(temp)

In [5]:
file1 = open("bash_command_list.txt","w")
file1.writelines(bash_command_list) 
file1.close()

In [6]:
bash_command_list[0]

"twint -s 'Unlock1 AND Mumbai' --limit 1 -o 'output/Maharashtra--Mumbai.csv' --csv --since '2020-7-1 00:00:00'"

In [30]:
for bash_command in bash_command_list[:5]:
    call(bash_command, shell=True)

In [7]:
scrapped_data = os.listdir("./output/")

In [8]:
scrapped_data

['Delhi--Delhi.csv',
 'Gujarat--Ahmedabad.csv',
 'Karnataka--Bangalore.csv',
 'Maharashtra--Mumbai.csv',
 'Telangana--Hyderabad.csv']

In [9]:
output_df = pd.DataFrame()

for file in scrapped_data:
    file_df = pd.read_csv('output/{}'.format(file))
    
    temp = file.split(".")[0].split("--")
    state, city = temp[0], temp[1]
    
    file_df['state'] = state
    file_df['city'] = city
    
    output_df = output_df.append(file_df)

In [10]:
output_df = output_df.reset_index()

In [11]:
output_df.columns

Index(['index', 'id', 'conversation_id', 'created_at', 'date', 'time',
       'timezone', 'user_id', 'username', 'name', 'place', 'tweet', 'mentions',
       'urls', 'photos', 'replies_count', 'retweets_count', 'likes_count',
       'hashtags', 'cashtags', 'link', 'retweet', 'quote_url', 'video', 'near',
       'geo', 'source', 'user_rt_id', 'user_rt', 'retweet_id', 'reply_to',
       'retweet_date', 'translate', 'trans_src', 'trans_dest', 'state',
       'city'],
      dtype='object')

In [12]:
df_to_work_with = output_df[['tweet', 'link', 'date', 'time', 'state', 'city']]

In [15]:
df_to_work_with

Unnamed: 0,tweet,link,date,time,state,city
0,That’s when d role of govts. B’comes v. releva...,https://twitter.com/DrLahariya/status/12814632...,2020-07-10,11:10:16,Delhi,Delhi
1,- Delhi trying to chase numbers - #politics in...,https://twitter.com/sarungss/status/1281128386...,2020-07-09,12:59:44,Delhi,Delhi
2,सरकार कुछ समय के लिए ही आपकी मदद कर सकती है। ब...,https://twitter.com/im_delhi/status/1280171474...,2020-07-06,21:37:18,Delhi,Delhi
3,Even Delhi has a software to find out where be...,https://twitter.com/praveennayak/status/127936...,2020-07-04,15:58:58,Delhi,Delhi
4,"ਜਾਣੋ, ਕੋਰੋਨਾ ਨੂੰ ਲੈ ਕੇ ਕੀ ਹੈ ਭਾਰਤ ਦੀ ਸਥਿਤੀ ?\n...",https://twitter.com/ptcnews/status/12793229729...,2020-07-04,13:25:40,Delhi,Delhi
5,India has recorded over 6.25 lakh cases of #co...,https://twitter.com/moneycontrolcom/status/127...,2020-07-03,10:09:26,Delhi,Delhi
6,#Delhi #Coronavirus Update (Data of 2 July 202...,https://twitter.com/BugleInstant/status/127870...,2020-07-02,20:28:14,Delhi,Delhi
7,AUTO DRIVER/ऑटो ड्राईवर ने COVID-19 में ऑटो/AU...,https://twitter.com/laatsaabnews/status/127863...,2020-07-02,16:09:06,Delhi,Delhi
8,#Delhi #coronavirus Update (Data of 1 July 202...,https://twitter.com/BugleInstant/status/127835...,2020-07-01,21:20:47,Delhi,Delhi
9,Just thinking 🤔 #1stJuly #Delhi #lockdown #Unl...,https://twitter.com/shree14667178/status/12783...,2020-07-01,20:50:17,Delhi,Delhi


In [16]:
model = load_model("./model/model_v6.h5")
tokenizer = pickle.load(open("./model/tokenizer_v6.pickle", "rb"))

In [17]:
score_list = ["anger", "happiness", "neutral", "sadness", "worry"]

In [19]:
for i, row in df_to_work_with.iterrows():
    sequences = tokenizer.texts_to_sequences([row['tweet']])
    new_processed = pad_sequences(sequences, padding="post", maxlen=30)
    score = model.predict(new_processed)
    print(score_list[np.argmax(score)])

neutral
worry
neutral
worry
neutral
anger
worry
worry
worry
neutral
worry
neutral
neutral
neutral
neutral
sadness
neutral
worry
worry
neutral
neutral
neutral
worry
worry
worry
worry
worry
worry
anger
worry
worry
worry
sadness
worry
sadness
neutral
neutral
neutral
neutral
neutral
happiness
neutral
happiness
anger
neutral
neutral
neutral
neutral
neutral
neutral
neutral


In [20]:
def analyze(tweet):
    sequences = tokenizer.texts_to_sequences([row['tweet']])
    new_processed = pad_sequences(sequences, padding="post", maxlen=30)
    return(model.predict(new_processed))

In [21]:
analyze('this is so sad')

array([[0.00141264, 0.16456127, 0.6715156 , 0.0526654 , 0.10984511]],
      dtype=float32)

In [22]:
analyze('this is so sad this is so sad this is so sad this is so sad')

array([[0.00141264, 0.16456127, 0.6715156 , 0.0526654 , 0.10984511]],
      dtype=float32)

In [29]:
output = pd.DataFrame()

for index, row in tqdm(df_to_work_with[['date', 'time', 'tweet', 'link', 'state', 'city']].iterrows()):

    if(index % 20 == 0):
        print("At Index: {}".format(index))

    date = row['date']
    time = row['time']
    
    date_time = date + ' ' + time
    date_time = datetime.datetime.strptime(date_time, '%Y-%m-%d %H:%M:%S')
    
    tweet = row['tweet']
    link = row['link']
    state = row['state']
    city = row['city']

    score = analyze(tweet)
    
    anger = score[0][0]
    happiness = score[0][1]
    neutral = score[0][2]
    sadness = score[0][3]
    worry = score[0][4]
    
    result = {'date_time': date_time, 'tweet': tweet, 'link': link, 'state': state, 'city': city,
             'anger': anger, 'happiness': happiness, 'neutral': neutral, 'sadness': sadness, 'worry': worry }
    
    output = output.append(result, ignore_index=True)

1it [00:00,  7.94it/s]

At Index: 0


22it [00:01, 11.32it/s]

At Index: 20


42it [00:03, 12.04it/s]

At Index: 40


51it [00:04, 11.03it/s]


In [30]:
output[['date_time', 'tweet', 'link', 'state', 'city', 'anger', 'happiness', 'neutral' , 'sadness', 'worry']].to_csv('{}-backup.csv'.format(datetime.date.today()))
output[['date_time', 'tweet', 'link', 'state', 'city', 'anger', 'happiness', 'neutral' , 'sadness', 'worry']]

Unnamed: 0,date_time,tweet,link,state,city,anger,happiness,neutral,sadness,worry
0,2020-07-10 11:10:16,That’s when d role of govts. B’comes v. releva...,https://twitter.com/DrLahariya/status/12814632...,Delhi,Delhi,0.026598,0.168588,0.36816,0.114622,0.322033
1,2020-07-09 12:59:44,- Delhi trying to chase numbers - #politics in...,https://twitter.com/sarungss/status/1281128386...,Delhi,Delhi,0.043682,0.080206,0.218414,0.290058,0.36764
2,2020-07-06 21:37:18,सरकार कुछ समय के लिए ही आपकी मदद कर सकती है। ब...,https://twitter.com/im_delhi/status/1280171474...,Delhi,Delhi,0.044808,0.144106,0.384185,0.163461,0.263441
3,2020-07-04 15:58:58,Even Delhi has a software to find out where be...,https://twitter.com/praveennayak/status/127936...,Delhi,Delhi,0.114904,0.071018,0.235845,0.248292,0.329941
4,2020-07-04 13:25:40,"ਜਾਣੋ, ਕੋਰੋਨਾ ਨੂੰ ਲੈ ਕੇ ਕੀ ਹੈ ਭਾਰਤ ਦੀ ਸਥਿਤੀ ?\n...",https://twitter.com/ptcnews/status/12793229729...,Delhi,Delhi,0.088762,0.096447,0.381141,0.09444,0.33921
5,2020-07-03 10:09:26,India has recorded over 6.25 lakh cases of #co...,https://twitter.com/moneycontrolcom/status/127...,Delhi,Delhi,0.830461,0.00632,0.013225,0.041317,0.108677
6,2020-07-02 20:28:14,#Delhi #Coronavirus Update (Data of 2 July 202...,https://twitter.com/BugleInstant/status/127870...,Delhi,Delhi,0.052666,0.036132,0.218386,0.184205,0.508611
7,2020-07-02 16:09:06,AUTO DRIVER/ऑटो ड्राईवर ने COVID-19 में ऑटो/AU...,https://twitter.com/laatsaabnews/status/127863...,Delhi,Delhi,0.03955,0.050978,0.39097,0.065751,0.452751
8,2020-07-01 21:20:47,#Delhi #coronavirus Update (Data of 1 July 202...,https://twitter.com/BugleInstant/status/127835...,Delhi,Delhi,0.050735,0.043553,0.232353,0.190447,0.482912
9,2020-07-01 20:50:17,Just thinking 🤔 #1stJuly #Delhi #lockdown #Unl...,https://twitter.com/shree14667178/status/12783...,Delhi,Delhi,0.029798,0.12303,0.445056,0.163829,0.238288


In [75]:
mongo_url = os.environ.get("mongo_url")
myclient = pymongo.MongoClient(os.getenv("covidian_server_internal_mongo_url"))
mydb = myclient["covidian"]

# Collection: Sentiments

In [28]:
mycol = mydb["sentiments"]

In [31]:
df_for_sentiments = output[['date_time', 'tweet', 'link', 'state', 'city', 'anger', 'happiness', 'neutral' , 'sadness', 'worry']]

In [38]:
for i, row in df_for_sentiments[:1].iterrows():
    mycol.insert_one(dict(row))

# Collection: Sentiments_City

In [48]:
mycol = mydb["sentiments_city"]
df_for_sentiments_city = output[['date_time', 'tweet', 'link', 'state', 'city', 'anger', 'happiness', 'neutral' , 'sadness', 'worry']].groupby(['state', 'city'], as_index=False).mean()

In [50]:
df_for_sentiments_city

Unnamed: 0,state,city,anger,happiness,neutral,sadness,worry
0,Delhi,Delhi,0.122905,0.089006,0.286426,0.169298,0.332365
1,Gujarat,Ahmedabad,0.028146,0.102312,0.381432,0.196105,0.292006
2,Karnataka,Bangalore,0.065285,0.106214,0.261186,0.295185,0.272129
3,Maharashtra,Mumbai,0.08214,0.107188,0.264237,0.206801,0.339634
4,Telangana,Hyderabad,0.032825,0.229237,0.468903,0.106881,0.162153


In [51]:
for i, row in df_for_sentiments_city[:1].iterrows():
    mycol.insert_one(dict(row))

# Collection: Sentiments_State

In [53]:
mycol = mydb["sentiments_state"]
df_for_sentiments_state = output[['date_time', 'tweet', 'link', 'state', 'city', 'anger', 'happiness', 'neutral' , 'sadness', 'worry']].groupby(['state'], as_index=False).mean()

In [54]:
df_for_sentiments_state

Unnamed: 0,state,anger,happiness,neutral,sadness,worry
0,Delhi,0.122905,0.089006,0.286426,0.169298,0.332365
1,Gujarat,0.028146,0.102312,0.381432,0.196105,0.292006
2,Karnataka,0.065285,0.106214,0.261186,0.295185,0.272129
3,Maharashtra,0.08214,0.107188,0.264237,0.206801,0.339634
4,Telangana,0.032825,0.229237,0.468903,0.106881,0.162153


In [55]:
for i, row in df_for_sentiments_state[:1].iterrows():
    mycol.insert_one(dict(row))

# Collection: Sentiments_Country

In [57]:
mycol = mydb["sentiments_country"]
df_for_sentiments_country = output[['date_time', 'tweet', 'link', 'state', 'city', 'anger', 'happiness', 'neutral' , 'sadness', 'worry']].mean()

In [66]:
sentiments_country = dict()
sentiments_country['country'] = 'India'
sentiments_country.update(df_for_sentiments_country.to_dict())

In [67]:
sentiments_country

{'country': 'India',
 'anger': 0.0751771296958859,
 'happiness': 0.138653963804245,
 'neutral': 0.33177240061409335,
 'sadness': 0.17357787387628182,
 'worry': 0.2808186382639642}

In [68]:
mycol = mydb["sentiments_country"]
mycol.insert_one(sentiments_country)

<pymongo.results.InsertOneResult at 0x64f91ddc0>

In [69]:
from dotenv import load_dotenv
load_dotenv()

True

In [74]:
print(os.getenv("covidian_client_external_port"))

5000
