In [1]:
# import dependencies
import pandas as pd

In [2]:
# read in the csv
vine_df = pd.read_csv("resources/vine_table.csv")
vine_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,R9CO86UUJCAW5,3.0,0.0,0.0,N,Y
1,R3PR8X6QGVJ8B1,5.0,0.0,0.0,N,Y
2,R39BO2819ABUPF,4.0,0.0,0.0,N,Y
3,R3ADL7V6EGGEEP,4.0,0.0,0.0,N,Y
4,R1OXYPBPLVRMI5,5.0,0.0,0.0,N,Y


In [3]:
# Create a new DataFrame that filters for total votes greater than or equal to 20
filter_vine_df = vine_df[vine_df["total_votes"] >= 20].dropna()
filter_vine_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
45,R11IBSD5E6HPSD,1.0,29.0,31.0,N,Y
180,R2KVWAYBPWK1OV,5.0,20.0,22.0,N,N
209,R1FLOE9E4ODIGR,5.0,34.0,38.0,N,Y
239,R6XTEZCSCUJ4J,5.0,20.0,23.0,N,Y
283,R2WVV9LQKIVMOX,5.0,23.0,23.0,N,N


In [4]:
# Create a new DataFrame that filters for helpful_votes divided by total_votes greater than or equal to 50%
clean_vine_df = filter_vine_df[filter_vine_df["helpful_votes"]/filter_vine_df["total_votes"] >= 0.5]
clean_vine_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
45,R11IBSD5E6HPSD,1.0,29.0,31.0,N,Y
180,R2KVWAYBPWK1OV,5.0,20.0,22.0,N,N
209,R1FLOE9E4ODIGR,5.0,34.0,38.0,N,Y
239,R6XTEZCSCUJ4J,5.0,20.0,23.0,N,Y
283,R2WVV9LQKIVMOX,5.0,23.0,23.0,N,N


In [5]:
# Create a new DataFrame for reviews from the Vine program (paid)
vine_review_df = clean_vine_df[clean_vine_df["vine"]=="Y"]
vine_review_df.head(10)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
19527,RPJ7C4HP3BHXN,5.0,25.0,29.0,Y,N
21361,R3SB08XK0M7993,4.0,44.0,49.0,Y,N
33098,R26X1DACMCN3YF,3.0,16.0,21.0,Y,N
38418,R17HKKDE9ZRVOO,4.0,31.0,35.0,Y,N
39255,R1R7GNSCMO8U7I,4.0,24.0,28.0,Y,N
40555,R2T83WIPG3IX5D,4.0,72.0,76.0,Y,N
44926,RD006IWFCUA5A,5.0,122.0,133.0,Y,N
64738,REOQCBUVUPXP1,4.0,23.0,27.0,Y,N
65596,R2336R80ZEOKTX,5.0,63.0,63.0,Y,N
65750,RC1ZXE5GVL9TA,4.0,20.0,23.0,Y,N


In [6]:
# Determine total number of paid reviews
total_reviews = vine_review_df.loc[:,"review_id"].count()
print("There are", total_reviews, "vine reviews.")

There are 21 vine reviews.


In [7]:
# Find the number of 5-star reviews
star_number = vine_review_df.groupby(['star_rating']).count()["review_id"].loc[5.0]
print("Out of the vine reviews,", star_number, "gave 5-stars")

Out of the vine reviews, 10 gave 5-stars


In [8]:
# Find the percentage of reviews that are 5-stars
star_percent = (star_number/total_reviews*100).round(2)
print(star_percent,"% of the vine reviews had a 5-star rating")

47.62 % of the vine reviews had a 5-star rating


In [9]:
# Create a new DataFrame for reviews not from the Vine program (paid)
nonvine_review_df = clean_vine_df[clean_vine_df["vine"]=="N"]
nonvine_review_df.head(10)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
45,R11IBSD5E6HPSD,1.0,29.0,31.0,N,Y
180,R2KVWAYBPWK1OV,5.0,20.0,22.0,N,N
209,R1FLOE9E4ODIGR,5.0,34.0,38.0,N,Y
239,R6XTEZCSCUJ4J,5.0,20.0,23.0,N,Y
283,R2WVV9LQKIVMOX,5.0,23.0,23.0,N,N
292,RYIU6GIB4TYT2,5.0,30.0,30.0,N,N
343,RNXGKYVCJDDL7,5.0,28.0,28.0,N,Y
354,R1E8V7J0K61754,5.0,18.0,20.0,N,Y
504,RK52L3B5733BH,5.0,22.0,22.0,N,N
544,R213PW7RSI6Z7N,5.0,48.0,50.0,N,Y


In [10]:
# Determine total number of paid reviews
total_reg_reviews = nonvine_review_df.loc[:,"review_id"].count()
print("There are", total_reg_reviews, "non-Vine reviews.")

There are 6690 non-Vine reviews.


In [11]:
# Find the number of 5-star reviews
reg_star_number = nonvine_review_df.groupby(['star_rating']).count()["review_id"].loc[5.0]
print("Out of the non-Vine reviews,", reg_star_number, "gave 5-stars")

Out of the non-Vine reviews, 3448 gave 5-stars


In [12]:
# Find the percentage of reviews that are 5-stars
reg_star_percentage = (reg_star_number/total_reg_reviews*100).round(2)
print(reg_star_percentage,"% of the non-vine reviews had a 5-star rating")

51.54 % of the non-vine reviews had a 5-star rating


In [13]:
# read dataframes to csv files
nonvine_review_df.to_csv("resources/non_vine_reviews.csv", index=False)
vine_review_df.to_csv("resources/vine_reviews.csv", index=False)