## Generating mock review data where mentor gives review to mentee

In [1]:
import sys
import os
# We appended the path with the root folder to access modules in sibling directories.
sys.path.append(os.path.abspath('../'))

import pandas as pd
import random as r
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from data_generators.data_options import *

In [2]:
df = pd.read_csv('review.csv')

df.drop(columns=['Id','Label'],inplace=True)
df

Unnamed: 0,Review
0,good and interesting
1,"This class is very helpful to me. Currently, I..."
2,like!Prof and TAs are helpful and the discussi...
3,Easy to follow and includes a lot basic and im...
4,Really nice teacher!I could got the point eazl...
...,...
596,Good content but the Chinese translation is ba...
597,"Not difficult to understand,interesting,helpfu..."
598,very useful course. And thank for professor's ...
599,An interesting course and thank you for transl...


## Adding Mentee And Mentor ID's Mock Data to the data frame

In [3]:
df['mentor_id'] =["O" + str(hash(r.randint(2000000, 70000000000000000))) for _ in range(df.shape[0]) ]

In [4]:
df['mentee_id'] = ["E" + str(r.randint(1000000, 70000000000000)) for _ in range(df.shape[0])]

In [5]:
df

Unnamed: 0,Review,mentor_id,mentee_id
0,good and interesting,O41984876155545732,E55577062079585
1,"This class is very helpful to me. Currently, I...",O6546692836197236,E54699983255974
2,like!Prof and TAs are helpful and the discussi...,O16893362738472578,E25563363815050
3,Easy to follow and includes a lot basic and im...,O16242555097262361,E56026179245734
4,Really nice teacher!I could got the point eazl...,O2529716609252145,E66288469510473
...,...,...,...
596,Good content but the Chinese translation is ba...,O18321689877786694,E12607468562613
597,"Not difficult to understand,interesting,helpfu...",O19897097203109266,E25255368502180
598,very useful course. And thank for professor's ...,O30417679071323234,E13012798326763
599,An interesting course and thank you for transl...,O16026662156057096,E44773836242141


In [6]:
vader = SentimentIntensityAnalyzer()

def one_vader_score(text):
    """Return compound score of text using vader analysis."""
    return vader.polarity_scores(text)['compound']

In [7]:
df["compound_score"] = df['Review'].apply(one_vader_score)

In [8]:
def is_positive(number):
    '''
    Returns a 1 for positive reviews and a 0 for neutral 
    or negative reviews.
    '''
    if number >= 0.05:
        return 1
    else:
        return 0

In [9]:
df['positive'] = df['compound_score'].apply(is_positive)

In [10]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive
0,good and interesting,O41984876155545732,E55577062079585,0.6808,1
1,"This class is very helpful to me. Currently, I...",O6546692836197236,E54699983255974,0.4754,1
2,like!Prof and TAs are helpful and the discussi...,O16893362738472578,E25563363815050,0.8843,1
3,Easy to follow and includes a lot basic and im...,O16242555097262361,E56026179245734,0.5719,1
4,Really nice teacher!I could got the point eazl...,O2529716609252145,E66288469510473,0.3266,1


In [11]:
df['positive'].value_counts()

1    523
0     78
Name: positive, dtype: int64

In [12]:
df['first'] = 'dummy'
df['last'] = 'dummy'

In [13]:
# Ignore the SettingWithCopyWarning
pd.options.mode.chained_assignment = None

for i in range(df.shape[0]):
    df['first'].iloc[i] = random_first_name()
    df['last'].iloc[i] = choice(last_names)
df

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first,last
0,good and interesting,O41984876155545732,E55577062079585,0.6808,1,Charli,Taylor
1,"This class is very helpful to me. Currently, I...",O6546692836197236,E54699983255974,0.4754,1,Dustin,Sanders
2,like!Prof and TAs are helpful and the discussi...,O16893362738472578,E25563363815050,0.8843,1,Alessandra,Rogers
3,Easy to follow and includes a lot basic and im...,O16242555097262361,E56026179245734,0.5719,1,Reign,Taylor
4,Really nice teacher!I could got the point eazl...,O2529716609252145,E66288469510473,0.3266,1,Lainey,Watson
...,...,...,...,...,...,...,...
596,Good content but the Chinese translation is ba...,O18321689877786694,E12607468562613,-0.5859,0,Tommy,Cooper
597,"Not difficult to understand,interesting,helpfu...",O19897097203109266,E25255368502180,0.2755,1,Colter,Martinez
598,very useful course. And thank for professor's ...,O30417679071323234,E13012798326763,0.8070,1,Allison,Wilson
599,An interesting course and thank you for transl...,O16026662156057096,E44773836242141,0.6369,1,Andrea,Ruiz


# Run Vader Analysis on the user review

In [14]:
def run_vader_sentiment_anaylsis_on_review(review):
    """
    Returna a 1 for positive reviews from mentees or
    else it returns a 0 for a non-positve review.
    """
    
    def is_positive(number):
        if number >= 0.05:
            return 1
        else:
            return 0

In [15]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first,last
0,good and interesting,O41984876155545732,E55577062079585,0.6808,1,Charli,Taylor
1,"This class is very helpful to me. Currently, I...",O6546692836197236,E54699983255974,0.4754,1,Dustin,Sanders
2,like!Prof and TAs are helpful and the discussi...,O16893362738472578,E25563363815050,0.8843,1,Alessandra,Rogers
3,Easy to follow and includes a lot basic and im...,O16242555097262361,E56026179245734,0.5719,1,Reign,Taylor
4,Really nice teacher!I could got the point eazl...,O2529716609252145,E66288469510473,0.3266,1,Lainey,Watson


In [16]:
df.rename(columns = {'first': 'first_name_mentor', 'last': 'last_name_mentor'}, inplace=True)

In [17]:
df['first_name_mentee'] = 'dummy'
df['last_name_mentee'] = 'dummy'

for i in range(df.shape[0]):
    df['first_name_mentee'].iloc[i] = random_first_name()
    df['last_name_mentee'].iloc[i] = choice(last_names)
df

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first_name_mentor,last_name_mentor,first_name_mentee,last_name_mentee
0,good and interesting,O41984876155545732,E55577062079585,0.6808,1,Charli,Taylor,Colson,Hall
1,"This class is very helpful to me. Currently, I...",O6546692836197236,E54699983255974,0.4754,1,Dustin,Sanders,Truett,Garcia
2,like!Prof and TAs are helpful and the discussi...,O16893362738472578,E25563363815050,0.8843,1,Alessandra,Rogers,Ariyah,Sanchez
3,Easy to follow and includes a lot basic and im...,O16242555097262361,E56026179245734,0.5719,1,Reign,Taylor,Haisley,Jimenez
4,Really nice teacher!I could got the point eazl...,O2529716609252145,E66288469510473,0.3266,1,Lainey,Watson,Zhuri,Evans
...,...,...,...,...,...,...,...,...,...
596,Good content but the Chinese translation is ba...,O18321689877786694,E12607468562613,-0.5859,0,Tommy,Cooper,Andrea,Kelly
597,"Not difficult to understand,interesting,helpfu...",O19897097203109266,E25255368502180,0.2755,1,Colter,Martinez,Nathalie,Bennet
598,very useful course. And thank for professor's ...,O30417679071323234,E13012798326763,0.8070,1,Allison,Wilson,Xavier,Lee
599,An interesting course and thank you for transl...,O16026662156057096,E44773836242141,0.6369,1,Andrea,Ruiz,Persephone,Mitchell


In [18]:
# Saving final dataframe to csv
df.to_csv('reviews_with_sentiment.csv', encoding = 'utf-8',index = False)