# Requirements
1. Filter wine ratings dataset by a distinct field.
2. Save filtered subset in a formatted new JSON file.
3. Create a Demo project to illustrate steps 1 and 2 above.
4. Record a Demo video to document the project.

In [None]:
# Bring in required modules
import pandas as pd # Used to process datasets as dataframes
# Read in raw data 
df = pd.read_csv("wine-ratings-small.csv", index_col=0) # read the csv file and set the index column to 0
# Display a few lines to confirm a valid read 
df.head() # show the first 5 rows of the dataframe

In [None]:
# Get a quick description of the dataset
df.info()

In [None]:
# Remove the grape column since all values are null
df.drop(columns=['grape'], inplace=True) # Drop the grape column from the dataframe
# Confirm removal
df.info()

In [None]:
# Filter the dataset - create a subset of sweet wines
sweet_df = df[df['notes'].str.contains('sweet', case=False, na=False)] # Filter notes column on the string 'sweet' 
sweet_df.head()

In [None]:
# Get a quick description of the dataset
sweet_df.info()

In [None]:
# Save filtered dataframe to a JSON file
sweet_df.to_json("sweet-wine-ratings.json", orient="records", indent=4)