In [17]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

ice_cream_products = pd.read_csv('combined_products.csv')
ice_cream_reviews = pd.read_csv('combined_reviews.csv')

# Getting the shape of both datasets

In [18]:
ice_cream_products.head()

Unnamed: 0,brand,key,name,subhead,description,rating,rating_count,ingredients
0,bj,0_bj,Salted Caramel Core,Sweet Cream Ice Cream with Blonde Brownies & a...,Find your way to the ultimate ice cream experi...,3.7,208,"CREAM, SKIM MILK, LIQUID SUGAR (SUGAR, WATER),..."
1,bj,1_bj,Netflix & Chilll'd™,Peanut Butter Ice Cream with Sweet & Salty Pre...,There’s something for everyone to watch on Net...,4.0,127,"CREAM, SKIM MILK, LIQUID SUGAR (SUGAR, WATER),..."
2,bj,2_bj,Chip Happens,A Cold Mess of Chocolate Ice Cream with Fudge ...,Sometimes “chip” happens and everything’s a me...,4.7,130,"CREAM, LIQUID SUGAR (SUGAR, WATER), SKIM MILK,..."
3,bj,3_bj,Cannoli,Mascarpone Ice Cream with Fudge-Covered Pastry...,As a Limited Batch that captured the rapture o...,3.6,70,"CREAM, SKIM MILK, LIQUID SUGAR (SUGAR, WATER),..."
4,bj,4_bj,Gimme S’more!™,Toasted Marshmallow Ice Cream with Chocolate C...,It’s a gimme: there’s always room for s’more. ...,4.5,281,"CREAM, SKIM MILK, WATER, LIQUID SUGAR (SUGAR, ..."


In [19]:
ice_cream_products.tail()

Unnamed: 0,brand,key,name,subhead,description,rating,rating_count,ingredients
236,breyers,64_breyers,CINNABON®,,Calling all cinnamon roll lovers! We teamed up...,4.0,28,"MILK, CORN SYRUP, SUGAR, BROWN SUGAR, SOYBEAN ..."
237,breyers,65_breyers,CarbSmart™ Caramel Swirl Bar,,Watching your grams of sugar or carbs? Try our...,4.7,18,"MILK, WATER, CARAMEL SWIRL, SUGAR, WATER, CORN..."
238,breyers,66_breyers,Layered Dessert S'mores,,Calling all S'mores lovers! Make sure you don'...,2.5,31,"MILK, CORN SYRUP, SUGAR, WHEAT FLOUR, BUTTER, ..."
239,breyers,67_breyers,Layered Dessert Peach Cobbler,,Love peach cobbler topped with Breyers®? Then ...,3.2,38,"MILK, CORN SYRUP, ENRICHED WHEAT FLOUR, WHEAT ..."
240,breyers,68_breyers,Layered Dessert Brownie Cheesecake,,Love brownie cheesecake? What about Breyers®? ...,2.8,25,"MILK, CORN SYRUP, SUGAR, ENRICHED WHEAT FLOUR,..."


In [20]:
ice_cream_products.describe()

Unnamed: 0,rating,rating_count
count,241.0,241.0
mean,4.223237,90.016598
std,0.620383,111.349449
min,1.2,2.0
25%,4.0,28.0
50%,4.4,59.0
75%,4.7,112.0
max,5.0,983.0


In [21]:
ice_cream_products.shape

(241, 8)

In [22]:
ice_cream_products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241 entries, 0 to 240
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   brand         241 non-null    object 
 1   key           241 non-null    object 
 2   name          241 non-null    object 
 3   subhead       57 non-null     object 
 4   description   237 non-null    object 
 5   rating        241 non-null    float64
 6   rating_count  241 non-null    int64  
 7   ingredients   241 non-null    object 
dtypes: float64(1), int64(1), object(6)
memory usage: 15.2+ KB


In [23]:
ice_cream_products.isnull().sum()

brand             0
key               0
name              0
subhead         184
description       4
rating            0
rating_count      0
ingredients       0
dtype: int64

# Subhead toward the end of products is mostly na, and four columns in reviews don't look like they ever have any relevance or values. 

# Alternatively, Subhead can be renamed to "motto/slogan" or something thereabouts. That would be a little less confusing.

# Though even with renaming "subhead", it doesn't change the fact that it's basically half empty. So it only applies to certain products. It could probably be dropped. 

In [24]:
ice_cream_reviews.head()

Unnamed: 0,brand,key,author,date,stars,title,helpful_yes,helpful_no,text,taste,ingredients,texture,likes
0,bj,0_bj,Ilovebennjerry,2017-04-15,3,Not enough brownies!,10.0,3.0,"Super good, don't get me wrong. But I came for...",,,,
1,bj,0_bj,Sweettooth909,2020-01-05,5,I’m OBSESSED with this pint!,3.0,0.0,I decided to try it out although I’m not a hug...,,,,
2,bj,0_bj,LaTanga71,2018-04-26,3,My favorite...More Caramel Please,5.0,2.0,My caramel core begins to disappear about half...,,,,
3,bj,0_bj,chicago220,2018-01-14,5,Obsessed!!!,24.0,1.0,Why are people complaining about the blonde br...,,,,
4,bj,0_bj,Kassidyk,2020-07-24,1,Worst Ice Cream Ever!,1.0,5.0,This ice cream is worst ice cream I’ve ever ta...,,,,


In [25]:
ice_cream_reviews.tail()

Unnamed: 0,brand,key,author,date,stars,title,helpful_yes,helpful_no,text,taste,ingredients,texture,likes
21669,breyers,68_breyers,Randi,2020-04-22,1,Terrible missing the chocolate ice cream,0.0,0.0,There was no chocolate ice cream in this at al...,,,,
21670,breyers,68_breyers,Bethie,2020-09-08,1,Terrible,0.0,0.0,This ice cream has no flavor at all. No one in...,,,,
21671,breyers,68_breyers,Nshaw1994,2020-09-09,5,Flavor of the week!,0.0,0.0,Absolutely love this flavor! The only thing th...,,,,
21672,breyers,68_breyers,PamelaG,2020-06-01,5,Love this flavor!!,0.0,0.0,Brilliant combo - love the cheesecake and brow...,,,,
21673,breyers,68_breyers,Fanchon,2020-08-22,5,Great,0.0,0.0,Has a delicious taste with all natural ingredi...,,,,


In [26]:
ice_cream_reviews.describe()

Unnamed: 0,stars,helpful_yes,helpful_no,taste,ingredients,texture
count,21674.0,21674.0,21674.0,4265.0,4265.0,4265.0
mean,4.223955,1.364815,0.443019,4.234701,4.323798,4.284642
std,1.384121,8.234123,2.568762,1.401392,1.298888,1.363423
min,1.0,0.0,0.0,1.0,1.0,1.0
25%,4.0,0.0,0.0,4.0,4.0,4.0
50%,5.0,0.0,0.0,5.0,5.0,5.0
75%,5.0,1.0,0.0,5.0,5.0,5.0
max,5.0,421.0,121.0,5.0,5.0,5.0


In [27]:
ice_cream_reviews.shape

(21674, 13)

In [28]:
ice_cream_reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21674 entries, 0 to 21673
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   brand        21674 non-null  object 
 1   key          21674 non-null  object 
 2   author       20874 non-null  object 
 3   date         21674 non-null  object 
 4   stars        21674 non-null  int64  
 5   title        16275 non-null  object 
 6   helpful_yes  21674 non-null  float64
 7   helpful_no   21674 non-null  float64
 8   text         21674 non-null  object 
 9   taste        4265 non-null   float64
 10  ingredients  4265 non-null   float64
 11  texture      4265 non-null   float64
 12  likes        2295 non-null   object 
dtypes: float64(5), int64(1), object(7)
memory usage: 2.1+ MB


In [29]:
ice_cream_reviews.isnull().sum()

brand              0
key                0
author           800
date               0
stars              0
title           5399
helpful_yes        0
helpful_no         0
text               0
taste          17409
ingredients    17409
texture        17409
likes          19379
dtype: int64

# SQL Queries would really help with getting information from these datasets. Could be joined at "key" and "brand", and could use ice_cream_products ingredients list to fill in for ice_cream_reviews. 

# That being said, taste, ingredients, texture, and likes are all missing from a majority of the data (~80%, and ~90% for Likes). They could be dropped without too much consequence, and I'm unsure if they'll (soft)serve a purpose if so much of it is missing. 