In [25]:
import dask.dataframe as dd
from nltk.corpus import stopwords
import emoji
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
import re
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,f1_score

In [82]:
needed_columns = ['review_description', 'rating']
ddf = dd.read_csv('Data/Ola Customer Reviews.csv',usecols=needed_columns,assume_missing = True )

In [83]:
ddf.shape[0].compute() # no. of rows

357698

In [84]:
ddf.head()

Unnamed: 0,review_description,rating
0,"The map in Ola is so messed up, i have to pay ...",1.0
1,Deepak Kumar.... 🙏🙏🙏🙏🙏],5.0
2,Such aa irresponsible app more then I waiting ...,1.0
3,Worst,1.0
4,Too much expensive .. try UBer... They are pro...,1.0


In [85]:
ddf = ddf.dropna()

In [86]:
def preprocess(text):
    text = text.lower().strip()
    extracted_emojis = ''.join(char for char in text if char in emoji.EMOJI_DATA)
    important_words = {'but', 'than'}
    text = re.sub(r'[^\w\s]', '', text)
    text = ' '.join([word for word in text.split() if word not in stopwords.words('english') or word in important_words])
    return text +" "+ extracted_emojis

In [87]:
ddf['review_description'] = ddf['review_description'].apply(preprocess, meta=('review_description', 'str'))

In [88]:
ddf.head()

Unnamed: 0,review_description,rating
0,map ola messed pay rs100 extra map incorrect a...,1.0
1,deepak kumar 🙏🙏🙏🙏🙏,5.0
2,aa irresponsible app waiting 1 hour waste app ...,1.0
3,worst,1.0
4,much expensive try uber providing cheap rides ...,1.0


In [89]:
def analyze_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity

    if polarity > 0.5:
        return 'positive'
    if polarity < 0:
        return 'negative'
    else:
        return 'neutral'
    

In [90]:
ddf['sentiment'] = ddf['review_description'].apply(analyze_sentiment, meta=('review_description', 'str'))

In [91]:
ddf.head(15)

Unnamed: 0,review_description,rating,sentiment
0,map ola messed pay rs100 extra map incorrect a...,1.0,neutral
1,deepak kumar 🙏🙏🙏🙏🙏,5.0,neutral
2,aa irresponsible app waiting 1 hour waste app ...,1.0,negative
3,worst,1.0,negative
4,much expensive try uber providing cheap rides ...,1.0,neutral
5,ola drivers demanding extra money ride auto be...,2.0,neutral
6,pathetic service option contact service repres...,1.0,negative
7,proper support ola simply asking us check onli...,1.0,neutral
8,fraud app everytime estimated fare final fare ...,1.0,negative
9,driver doesnt pick phone charging cancellation...,1.0,negative
