## Generating mock review data where mentor gives review to mentee

In [1]:
import sys
import os
# We appended the path with the root folder to access modules in sibling directories.
sys.path.append(os.path.abspath('../'))

import pandas as pd
import random as r
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from data_generators.data_options import *

In [2]:
df = pd.read_csv('../data_generators/review.csv')

df.drop(columns=['Id','Label'],inplace=True)
df

Unnamed: 0,Review
0,good and interesting
1,"This class is very helpful to me. Currently, I..."
2,like!Prof and TAs are helpful and the discussi...
3,Easy to follow and includes a lot basic and im...
4,Really nice teacher!I could got the point eazl...
...,...
596,Good content but the Chinese translation is ba...
597,"Not difficult to understand,interesting,helpfu..."
598,very useful course. And thank for professor's ...
599,An interesting course and thank you for transl...


## Adding Mentee And Mentor ID's Mock Data to the data frame

In [3]:
df['mentor_id'] =["O" + str(hash(r.randint(2000000, 70000000000000000))) for _ in range(df.shape[0]) ]

In [4]:
df['mentee_id'] = ["E" + str(r.randint(1000000, 70000000000000)) for _ in range(df.shape[0])]

In [5]:
df

Unnamed: 0,Review,mentor_id,mentee_id
0,good and interesting,O44503340950231362,E28298780577335
1,"This class is very helpful to me. Currently, I...",O23977281253334163,E32632766128800
2,like!Prof and TAs are helpful and the discussi...,O45128710456502674,E5935827629469
3,Easy to follow and includes a lot basic and im...,O14234770968590402,E9667247467388
4,Really nice teacher!I could got the point eazl...,O44438918637394939,E33313715789517
...,...,...,...
596,Good content but the Chinese translation is ba...,O49809001659291221,E55904571669504
597,"Not difficult to understand,interesting,helpfu...",O5628543595822208,E11532801367124
598,very useful course. And thank for professor's ...,O47433682692219883,E11192196511583
599,An interesting course and thank you for transl...,O36281502326934516,E45770662725270


In [6]:
vader = SentimentIntensityAnalyzer()

def one_vader_score(text):
    """Return compound score of text using vader analysis."""
    return vader.polarity_scores(text)['compound']

In [7]:
df["compound_score"] = df['Review'].apply(one_vader_score)

In [8]:
def is_positive(number):
    '''
    Returns a 1 for positive reviews and a 0 for neutral 
    or negative reviews.
    '''
    if number >= 0.05:
        return 1
    else:
        return 0

In [9]:
df['positive'] = df['compound_score'].apply(is_positive)

In [10]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive
0,good and interesting,O44503340950231362,E28298780577335,0.6808,1
1,"This class is very helpful to me. Currently, I...",O23977281253334163,E32632766128800,0.4754,1
2,like!Prof and TAs are helpful and the discussi...,O45128710456502674,E5935827629469,0.8843,1
3,Easy to follow and includes a lot basic and im...,O14234770968590402,E9667247467388,0.5719,1
4,Really nice teacher!I could got the point eazl...,O44438918637394939,E33313715789517,0.3266,1


In [11]:
df['positive'].value_counts()

1    523
0     78
Name: positive, dtype: int64

In [12]:
df['first'] = 'dummy'
df['last'] = 'dummy'

In [13]:
# Ignore the SettingWithCopyWarning
pd.options.mode.chained_assignment = None

for i in range(df.shape[0]):
    df['first'].iloc[i] = random_first_name()
    df['last'].iloc[i] = choice(last_names)
df

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first,last
0,good and interesting,O44503340950231362,E28298780577335,0.6808,1,Kadence,Wood
1,"This class is very helpful to me. Currently, I...",O23977281253334163,E32632766128800,0.4754,1,Wayne,Collins
2,like!Prof and TAs are helpful and the discussi...,O45128710456502674,E5935827629469,0.8843,1,Sydney,Scott
3,Easy to follow and includes a lot basic and im...,O14234770968590402,E9667247467388,0.5719,1,Mira,Mitchell
4,Really nice teacher!I could got the point eazl...,O44438918637394939,E33313715789517,0.3266,1,Francisco,Green
...,...,...,...,...,...,...,...
596,Good content but the Chinese translation is ba...,O49809001659291221,E55904571669504,-0.5859,0,Blaze,Flores
597,"Not difficult to understand,interesting,helpfu...",O5628543595822208,E11532801367124,0.2755,1,Angel,Patel
598,very useful course. And thank for professor's ...,O47433682692219883,E11192196511583,0.8070,1,Catherine,Clark
599,An interesting course and thank you for transl...,O36281502326934516,E45770662725270,0.6369,1,Roy,Miller


# Run Vader Analysis on the user review

In [14]:
def run_vader_sentiment_anaylsis_on_review(review):
    """
    Returna a 1 for positive reviews from mentees or
    else it returns a 0 for a non-positve review.
    """
    
    def is_positive(number):
        if number >= 0.05:
            return 1
        else:
            return 0

In [15]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first,last
0,good and interesting,O44503340950231362,E28298780577335,0.6808,1,Kadence,Wood
1,"This class is very helpful to me. Currently, I...",O23977281253334163,E32632766128800,0.4754,1,Wayne,Collins
2,like!Prof and TAs are helpful and the discussi...,O45128710456502674,E5935827629469,0.8843,1,Sydney,Scott
3,Easy to follow and includes a lot basic and im...,O14234770968590402,E9667247467388,0.5719,1,Mira,Mitchell
4,Really nice teacher!I could got the point eazl...,O44438918637394939,E33313715789517,0.3266,1,Francisco,Green


In [16]:
df.rename(columns = {'first': 'first_name_mentor', 'last': 'last_name_mentor'}, inplace=True)

In [17]:
df['first_name_mentee'] = 'dummy'
df['last_name_mentee'] = 'dummy'

for i in range(df.shape[0]):
    df['first_name_mentee'].iloc[i] = random_first_name()
    df['last_name_mentee'].iloc[i] = choice(last_names)
df

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first_name_mentor,last_name_mentor,first_name_mentee,last_name_mentee
0,good and interesting,O44503340950231362,E28298780577335,0.6808,1,Kadence,Wood,Heath,White
1,"This class is very helpful to me. Currently, I...",O23977281253334163,E32632766128800,0.4754,1,Wayne,Collins,Abner,Miller
2,like!Prof and TAs are helpful and the discussi...,O45128710456502674,E5935827629469,0.8843,1,Sydney,Scott,Juliet,Campbell
3,Easy to follow and includes a lot basic and im...,O14234770968590402,E9667247467388,0.5719,1,Mira,Mitchell,Josie,Ruiz
4,Really nice teacher!I could got the point eazl...,O44438918637394939,E33313715789517,0.3266,1,Francisco,Green,Marlowe,Ramirez
...,...,...,...,...,...,...,...,...,...
596,Good content but the Chinese translation is ba...,O49809001659291221,E55904571669504,-0.5859,0,Blaze,Flores,Vincenzo,Ruiz
597,"Not difficult to understand,interesting,helpfu...",O5628543595822208,E11532801367124,0.2755,1,Angel,Patel,Isaiah,Parker
598,very useful course. And thank for professor's ...,O47433682692219883,E11192196511583,0.8070,1,Catherine,Clark,Laylah,Harris
599,An interesting course and thank you for transl...,O36281502326934516,E45770662725270,0.6369,1,Roy,Miller,Abel,Young


In [18]:
# Saving final dataframe to csv
df.to_csv('reviews_with_sentiment.csv', encoding = 'utf-8',index = False)