## Generating mock review data where mentor gives review to mentee

## Generating mock review Data

In [25]:
import pandas as pd
import random as r

In [26]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

vader = SentimentIntensityAnalyzer()

# compound score of the sentiment
def vader_score(text: list) -> list:
    """Return compound scores of text in list using vader analysis."""
    return [vader.polarity_scores(t)["compound"] for t in text]

In [27]:
df = pd.read_csv('review.csv')

In [28]:
df.drop(columns=['Id','Label'],inplace=True)
df

Unnamed: 0,Review
0,good and interesting
1,"This class is very helpful to me. Currently, I..."
2,like!Prof and TAs are helpful and the discussi...
3,Easy to follow and includes a lot basic and im...
4,Really nice teacher!I could got the point eazl...
...,...
596,Good content but the Chinese translation is ba...
597,"Not difficult to understand,interesting,helpfu..."
598,very useful course. And thank for professor's ...
599,An interesting course and thank you for transl...


## Adding Mentee And Mentor ID's Mock Data to the data frame

In [29]:
df['mentor_id'] =["O" + str(hash(r.randint(2000000, 70000000000000000))) for _ in range(df.shape[0]) ]

In [30]:
df['mentee_id'] = ["E" + str(r.randint(1000000, 70000000000000)) for _ in range(df.shape[0])]

In [31]:
df

Unnamed: 0,Review,mentor_id,mentee_id
0,good and interesting,O39355616041357391,E47787056121618
1,"This class is very helpful to me. Currently, I...",O63711415249884942,E6346282026941
2,like!Prof and TAs are helpful and the discussi...,O16815631995085741,E64575036208164
3,Easy to follow and includes a lot basic and im...,O20184069259210825,E58600201785405
4,Really nice teacher!I could got the point eazl...,O34181235347619286,E26707882022356
...,...,...,...
596,Good content but the Chinese translation is ba...,O10389185951347790,E52381453386515
597,"Not difficult to understand,interesting,helpfu...",O61229146869824254,E64169106164949
598,very useful course. And thank for professor's ...,O64067248228440244,E20782132835034
599,An interesting course and thank you for transl...,O11742605999752129,E3410031542639


In [32]:
def one_vader_score(text):
    return vader.polarity_scores(text)['compound']

In [33]:
df['Review'].apply(one_vader_score)


0      0.6808
1      0.4754
2      0.8843
3      0.5719
4      0.3266
        ...  
596   -0.5859
597    0.2755
598    0.8070
599    0.6369
600    0.3084
Name: Review, Length: 601, dtype: float64

In [34]:
df["compound_score"] = df['Review'].apply(one_vader_score)

In [35]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score
0,good and interesting,O39355616041357391,E47787056121618,0.6808
1,"This class is very helpful to me. Currently, I...",O63711415249884942,E6346282026941,0.4754
2,like!Prof and TAs are helpful and the discussi...,O16815631995085741,E64575036208164,0.8843
3,Easy to follow and includes a lot basic and im...,O20184069259210825,E58600201785405,0.5719
4,Really nice teacher!I could got the point eazl...,O34181235347619286,E26707882022356,0.3266


In [36]:
def is_positive(number):
    if number >= 0.05:
        return 1
    else:
        return 0

In [37]:
df['positive'] = df['compound_score'].apply(is_positive)

In [38]:
df.head()

Unnamed: 0,Review,mentor_id,mentee_id,compound_score,positive
0,good and interesting,O39355616041357391,E47787056121618,0.6808,1
1,"This class is very helpful to me. Currently, I...",O63711415249884942,E6346282026941,0.4754,1
2,like!Prof and TAs are helpful and the discussi...,O16815631995085741,E64575036208164,0.8843,1
3,Easy to follow and includes a lot basic and im...,O20184069259210825,E58600201785405,0.5719,1
4,Really nice teacher!I could got the point eazl...,O34181235347619286,E26707882022356,0.3266,1


In [39]:
df['positive'].value_counts()

1    523
0     78
Name: positive, dtype: int64

In [40]:
df.to_csv('reviews_with_sentiment.csv', encoding = 'utf-8')