In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import os

In [2]:
# Import the data into a DataFrame
vine_data = os.path.join("Tables", "vine_table.csv")
df = pd.read_csv(vine_data)
df.head(20)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,R203HPW78Z7N4K,5,0,0,N,Y
1,R2EAIGVLEALSP3,5,1,1,N,Y
2,R1K1CD73HHLILA,5,0,0,N,Y
3,R2KZBMOFRMYOPO,5,1,1,N,Y
4,R6BIZOZY6UD01,5,0,0,N,Y
5,R1MCXZFNF8E7Y0,1,0,0,N,Y
6,R3EMB3E3ODR6BW,5,2,2,N,Y
7,RJTONVTTOPJ5S,5,0,0,N,Y
8,R21U5QZ2CQECUM,4,0,0,N,Y
9,RL2BBC51H89DH,4,0,0,N,Y


In [3]:
# Filter for reviews with at least twenty votes
popular_df = df.loc[df["total_votes"] >= 20].sort_values("review_id")
popular_df.head(20)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
76037,R100212BMGLGI0,1,18,21,N,N
45827,R1002U1ZIY5BUT,5,44,46,N,N
44623,R100YVAS0FCQ9C,5,36,37,N,N
79894,R101W5V3SZ8LFQ,1,19,25,N,Y
38378,R1028R2IEONQZL,5,10,54,N,N
47393,R10328L5NKZHYU,2,24,27,N,Y
30827,R104QJT5QCZKZ8,1,38,50,N,N
94719,R1055S1327MCXB,5,45,48,N,N
73860,R1057TGFKBGGHS,5,23,23,N,N
93275,R1058IT10ZEUI7,5,69,72,N,N


In [4]:
# Filter for reviews where at least half of the votes mark them as helpful
helpful_df = popular_df.loc[(popular_df["helpful_votes"] / popular_df["total_votes"]) >= 0.5].sort_values("review_id")
helpful_df.head(20)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
76037,R100212BMGLGI0,1,18,21,N,N
45827,R1002U1ZIY5BUT,5,44,46,N,N
44623,R100YVAS0FCQ9C,5,36,37,N,N
79894,R101W5V3SZ8LFQ,1,19,25,N,Y
47393,R10328L5NKZHYU,2,24,27,N,Y
30827,R104QJT5QCZKZ8,1,38,50,N,N
94719,R1055S1327MCXB,5,45,48,N,N
73860,R1057TGFKBGGHS,5,23,23,N,N
93275,R1058IT10ZEUI7,5,69,72,N,N
89325,R1058JBPKY772B,1,22,25,N,N


In [5]:
# Separate the results between Vine (paid) and non-Vine (unpaid)
vine_df = helpful_df.loc[helpful_df["vine"] == "Y"].sort_values("review_id")
vine_df.head(20)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
60213,R15NQCQUNOSI4U,4,19,22,Y,N
84147,R15VXK8I0FPU0A,4,22,23,Y,N
85877,R19OUHMKNXU3DC,4,76,78,Y,N
62875,R1B3OGO36CJKDA,5,15,20,Y,Y
86005,R1CXC7HG447Y0A,4,65,69,Y,N
26403,R1IDQ0G1S7J9TN,5,178,196,Y,N
36014,R1L2R8OHO171YG,5,66,68,Y,N
83514,R1SYUVX7GZZ469,4,46,51,Y,N
10115,R20O97P5V9CZ21,3,21,27,Y,N
83368,R214D7Q3CO5W5J,5,51,52,Y,N


In [6]:
nonvine_df = helpful_df.loc[helpful_df["vine"] == "N"].sort_values("review_id")
nonvine_df.head(20)

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
76037,R100212BMGLGI0,1,18,21,N,N
45827,R1002U1ZIY5BUT,5,44,46,N,N
44623,R100YVAS0FCQ9C,5,36,37,N,N
79894,R101W5V3SZ8LFQ,1,19,25,N,Y
47393,R10328L5NKZHYU,2,24,27,N,Y
30827,R104QJT5QCZKZ8,1,38,50,N,N
94719,R1055S1327MCXB,5,45,48,N,N
73860,R1057TGFKBGGHS,5,23,23,N,N
93275,R1058IT10ZEUI7,5,69,72,N,N
89325,R1058JBPKY772B,1,22,25,N,N


In [7]:
# Count up the reviews
total_count = helpful_df["review_id"].count()
vine_count = vine_df["review_id"].count()
nonvine_count = nonvine_df["review_id"].count()
five_count = helpful_df["review_id"].loc[helpful_df["star_rating"] == 5].count()
five_vine = vine_df["review_id"].loc[vine_df["star_rating"] == 5].count()
five_nonvine = nonvine_df["review_id"].loc[nonvine_df["star_rating"] == 5].count()

In [8]:
# Calculate the percentages
star_percent = five_count / total_count * 100
vine_percent = five_vine / vine_count * 100
nonvine_percent = five_nonvine / nonvine_count * 100

In [9]:
# Print the results
print(f"There are {total_count:,} reviews that are considered helpful.")
print(f"{vine_count:,} reviews come from Vine members; and {nonvine_count:,} reviews come from non-Vine members.")
print(f"{five_count:,} are 5-Star reviews, comprising {star_percent:.3f}% of all the helpful reviews.")
print(f"{five_vine:,} Vine reviews have a 5-Star rating, forming {vine_percent:.3f}% of the Vine reviews.")
print(f"{five_nonvine:,} non-Vine reviews have a 5-Star rating, forming {nonvine_percent:.3f}% of the non-Vine reviews.")

There are 4,992 reviews that are considered helpful.
35 reviews come from Vine members; and 4,957 reviews come from non-Vine members.
1,981 are 5-Star reviews, comprising 39.683% of all the helpful reviews.
18 Vine reviews have a 5-Star rating, forming 51.429% of the Vine reviews.
1,963 non-Vine reviews have a 5-Star rating, forming 39.601% of the non-Vine reviews.
