## Generating mock review data where mentor gives review to mentee

In [1]:
import sys
import os
# We appended the path with the root folder to access modules in sibling directories.
sys.path.append(os.path.abspath('../'))

import pandas as pd
import random as r
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from data_generators.data_options import *

In [2]:
df = pd.read_csv('../data_generators/review.csv')

df.drop(columns=['Id','Label'],inplace=True)
df

Unnamed: 0,Review
0,good and interesting
1,"This class is very helpful to me. Currently, I..."
2,like!Prof and TAs are helpful and the discussi...
3,Easy to follow and includes a lot basic and im...
4,Really nice teacher!I could got the point eazl...
...,...
596,Good content but the Chinese translation is ba...
597,"Not difficult to understand,interesting,helpfu..."
598,very useful course. And thank for professor's ...
599,An interesting course and thank you for transl...


## Adding Mentee And Mentor ID's Mock Data to the data frame

In [3]:
df['mentor_id'] =["O" + str(hash(r.randint(2000000, 70000000000000000))) for _ in range(df.shape[0]) ]

In [4]:
df['mentee_id'] = ["E" + str(r.randint(1000000, 70000000000000)) for _ in range(df.shape[0])]

In [5]:
df

Unnamed: 0,Review,mentor_id,mentee_id
0,good and interesting,O7974805342017122,E62334911376169
1,"This class is very helpful to me. Currently, I...",O59416778240949068,E27773356675966
2,like!Prof and TAs are helpful and the discussi...,O49606449880069531,E30779409900267
3,Easy to follow and includes a lot basic and im...,O7750754913692512,E31875519906553
4,Really nice teacher!I could got the point eazl...,O4603721631244430,E65598346207813
...,...,...,...
596,Good content but the Chinese translation is ba...,O39672243842765488,E64097709554826
597,"Not difficult to understand,interesting,helpfu...",O8497217320592901,E26802833025707
598,very useful course. And thank for professor's ...,O47509504387053970,E23533619460288
599,An interesting course and thank you for transl...,O11350401248967897,E14185317102093


In [6]:
vader = SentimentIntensityAnalyzer()

def one_vader_score(text):
    """Return compound score of text using vader analysis."""
    return vader.polarity_scores(text)['compound']

In [7]:
df["compound_score"] = df['Review'].apply(one_vader_score)

In [8]:
def is_positive(number):
    '''
    Returns a 1 for positive reviews and a 0 for neutral 
    or negative reviews.
    '''
    if number >= 0.05:
        return 1
    else:
        return 0

In [9]:
df['positive'] = df['compound_score'].apply(is_positive)

In [10]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive
0,good and interesting,O7974805342017122,E62334911376169,0.6808,1
1,"This class is very helpful to me. Currently, I...",O59416778240949068,E27773356675966,0.4754,1
2,like!Prof and TAs are helpful and the discussi...,O49606449880069531,E30779409900267,0.8843,1
3,Easy to follow and includes a lot basic and im...,O7750754913692512,E31875519906553,0.5719,1
4,Really nice teacher!I could got the point eazl...,O4603721631244430,E65598346207813,0.3266,1


In [11]:
df['positive'].value_counts()

1    523
0     78
Name: positive, dtype: int64

In [12]:
df['first'] = 'dummy'
df['last'] = 'dummy'

In [13]:
# Ignore the SettingWithCopyWarning
pd.options.mode.chained_assignment = None

for i in range(df.shape[0]):
    df['first'].iloc[i] = random_first_name()
    df['last'].iloc[i] = choice(last_names)
df

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first,last
0,good and interesting,O7974805342017122,E62334911376169,0.6808,1,Allyson,Flores
1,"This class is very helpful to me. Currently, I...",O59416778240949068,E27773356675966,0.4754,1,Kayson,Roberts
2,like!Prof and TAs are helpful and the discussi...,O49606449880069531,E30779409900267,0.8843,1,Samuel,Campbell
3,Easy to follow and includes a lot basic and im...,O7750754913692512,E31875519906553,0.5719,1,Damon,Stewart
4,Really nice teacher!I could got the point eazl...,O4603721631244430,E65598346207813,0.3266,1,Cecilia,Flores
...,...,...,...,...,...,...,...
596,Good content but the Chinese translation is ba...,O39672243842765488,E64097709554826,-0.5859,0,Angie,Watson
597,"Not difficult to understand,interesting,helpfu...",O8497217320592901,E26802833025707,0.2755,1,Tessa,Moore
598,very useful course. And thank for professor's ...,O47509504387053970,E23533619460288,0.8070,1,Mae,Wright
599,An interesting course and thank you for transl...,O11350401248967897,E14185317102093,0.6369,1,Ellison,Bennet


In [14]:
df.rename(columns = {'first': 'first_name_mentor', 'last': 'last_name_mentor'}, inplace=True)

In [15]:
df['first_name_mentee'] = 'dummy'
df['last_name_mentee'] = 'dummy'

for i in range(df.shape[0]):
    df['first_name_mentee'].iloc[i] = random_first_name()
    df['last_name_mentee'].iloc[i] = choice(last_names)
df

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive,first_name_mentor,last_name_mentor,first_name_mentee,last_name_mentee
0,good and interesting,O7974805342017122,E62334911376169,0.6808,1,Allyson,Flores,Remy,Walker
1,"This class is very helpful to me. Currently, I...",O59416778240949068,E27773356675966,0.4754,1,Kayson,Roberts,Maliyah,Campbell
2,like!Prof and TAs are helpful and the discussi...,O49606449880069531,E30779409900267,0.8843,1,Samuel,Campbell,Mae,Brown
3,Easy to follow and includes a lot basic and im...,O7750754913692512,E31875519906553,0.5719,1,Damon,Stewart,Elliot,Watson
4,Really nice teacher!I could got the point eazl...,O4603721631244430,E65598346207813,0.3266,1,Cecilia,Flores,Asa,Ramirez
...,...,...,...,...,...,...,...,...,...
596,Good content but the Chinese translation is ba...,O39672243842765488,E64097709554826,-0.5859,0,Angie,Watson,Kira,Cox
597,"Not difficult to understand,interesting,helpfu...",O8497217320592901,E26802833025707,0.2755,1,Tessa,Moore,Nylah,James
598,very useful course. And thank for professor's ...,O47509504387053970,E23533619460288,0.8070,1,Mae,Wright,Jon,Flores
599,An interesting course and thank you for transl...,O11350401248967897,E14185317102093,0.6369,1,Ellison,Bennet,Cecelia,Ramirez


In [16]:
# Saving final dataframe to csv
df.to_csv('reviews_with_sentiment.csv', encoding = 'utf-8',index = False)