Minds Research Lab Programming Challenge

Author: Kanish Shah (MS in Computer Science, University of Southern California)

Email I'd:- kanishni@usc.edu

Goal: To demonstrate your ability to design a solution to a
problem and implement this solution in Python using software engineering best practices.

In [75]:
#Importing necessary libraries
import json
from pandas import json_normalize
import pandas as pd
from langdetect import detect
import langid
from tqdm import tqdm

file = open('result.json')      #Reading json file
extracted_data = json.load(file)   #Loading json file
df = pd.DataFrame()


In [45]:
#This function converts any emojis to their actual meaning of text
import re
from emot.emo_unicode import UNICODE_EMOJI,EMOTICONS_EMO

def emoji_conversion(messages):
    for emot in UNICODE_EMOJI:
        messages = messages.replace(emot, "_".join(UNICODE_EMOJI[emot].replace(",","").replace(":","").split()))
    return messages
msg = "Damn!! Man 😂. It was superb and very fun 😎"
emoji_conversion(msg)

'Damn!! Man face_with_tears_of_joy. It was superb and very fun smiling_face_with_sunglasses'

In [66]:
#Pre-processing the data and using tqdm for showing the performance
for i in tqdm(range(0, len(extracted_data['messages']))):     #Implementing tqdm as per instructed
    str_new = ''
    if isinstance(extracted_data["messages"][i]["text"], list):
        for msgs in extracted_data["messages"][i]["text"]:
            if isinstance(msgs, dict):
                pass
            else:
                str_new = str_new + msgs
    else:
        str_new = extracted_data["messages"][i]["text"]              #Pre-process the text    
    
    str_new = str_new.lower().strip()    #Converting msgs to lower case for uniformity
    if 'doge' in str_new or 'shib' in str_new:
        language = langid.classify(str_new)
        str_new = emoji_conversion(str_new)    #Handling emojis in entire messages data
        if language[0]=='en':      #Fetching number of days from the date 
            df = df.append({'Number of days': extracted_data["messages"][i]["date"][8:10], 'text':str_new},ignore_index=True)


100%|██████████| 47231/47231 [00:43<00:00, 1097.75it/s]


In [67]:
df

Unnamed: 0,Number of days,text
0,01,doge is going cray
1,01,sell target of doge
2,01,doge
3,01,"anyway, is doge a good crypto for long term in..."
4,01,who else going to mars with dogebeaming_face_w...
...,...,...
2695,14,i want to buy shiba again.
2696,14,hello friends. we came together to ask for co...
2697,14,new coin shib is good or not
2698,14,bhai scope nhi hai and not even on par with do...


In [68]:
#Applying TextBlob NLP model for finding out sentiment polarity
from textblob import TextBlob
import numpy as np
df["score"] = df["text"].apply(lambda i: TextBlob(str(i)).sentiment.polarity)
df["sentiment"] = np.select([df["score"] > 0, df["score"] == 0, df["score"] < 0],
                           ['pos', 'neu', 'neg'])

In [69]:
df

Unnamed: 0,Number of days,text,score,sentiment
0,01,doge is going cray,0.000000,neu
1,01,sell target of doge,0.000000,neu
2,01,doge,0.000000,neu
3,01,"anyway, is doge a good crypto for long term in...",0.325000,pos
4,01,who else going to mars with dogebeaming_face_w...,0.000000,neu
...,...,...,...,...
2695,14,i want to buy shiba again.,0.000000,neu
2696,14,hello friends. we came together to ask for co...,0.215000,pos
2697,14,new coin shib is good or not,0.418182,pos
2698,14,bhai scope nhi hai and not even on par with do...,0.500000,pos


In [70]:
#Converting dataframe to csv for understanding the final cleaned data
df.to_csv('sentiment_extracted_data.csv')

In [73]:
#Using plotly, Number of days vs Average snetiments made per day was plotted
import plotly.express as py
sentiment_df = df.groupby(["Number of days", "sentiment"]).count().reset_index()
fig = py.bar(sentiment_df, x=sentiment_df['Number of days'], y=sentiment_df["text"], color=sentiment_df["sentiment"], title="Sentiment Predictions for different days")
fig.show()

------------------------------End---------------------------------------------------------------

**Thank You**