## Loading data from a CSV file

### 1. Using Python Dictionary

In [None]:
from csv import DictReader

In [None]:
# Open the CSV file and read it into a list of Dictionaries and ignore unicode errors

with open('sample_data/wine-ratings-small.csv', encoding='utf-8', errors='ignore') as f:
    reader = DictReader(f)
    wines = list(reader)

#print(wines)

In [None]:
# Create new lists with wines with red variety
red_wines = []

for wine in wines:
    if 'Red Wine' in wine['variety']:
        red_wines.append(wine)
   
#print(red_wines)

In [None]:
# Print the dictionaries of first 5 red wines
for red_wine in red_wines[:5]:
    print(red_wine)

In [None]:
# Create new lists with wines with white variety
white_wines = []

for wine in wines:
    if 'White Wine' in wine['variety']:
        white_wines.append(wine)
   
#print(white_wines)

In [None]:
for white_wine in white_wines[:5]:
    print(white_wine)

In [None]:
# Create new lists with wines from Napa region
napa_wines = []

for wine in wines:
    if 'Napa' in wine['region']:
        napa_wines.append(wine)
   
#print(napa_wines)

In [None]:
# Print first 5 spain_wines
for napa_wine in napa_wines[:5]:
    print(napa_wine)

In [None]:
# Create another list with the highest ratings (e.g. over 90)

highest_rated_wines = []

# Filter ratings that are over 90.0
for wine in wines:
    rating = float(wine['rating']) # Convert rating to float for numerical comparisons
    if rating > 90.0:
        highest_rated_wines.append(wine)

print(highest_rated_wines)


In [None]:
for highest_rated_wine in highest_rated_wines[:5]:
    print(highest_rated_wine)

### Saving all the outputs into a new JSON file.

In [None]:
import json

In [None]:
# Saving red wines rows to JSON file
with open('red_wines.json', 'w') as f:
    json.dump(red_wines, f, indent=4)
    
print(f"red_wines list has been saved to 'red_wines.json'")

In [None]:
# Saving white wines rows to JSON file
with open('white_wines.json', 'w') as f:
    json.dump(white_wines, f, indent=4)
    
print(f"white_wines list has been saved to 'white_wines.json'")

In [None]:
# Saving napa wines rows to JSON file
with open('napa_wines.json', 'w') as f:
    json.dump(napa_wines, f, indent=4)
    
print(f"napa_wines list has been saved to 'napa_wines.json'")

In [None]:
# Saving highest rated rows to JSON file
with open('highest_rated_wines.json', 'w') as f:
    json.dump(highest_rated_wines, f, indent=4)
    
print(f"highest_rated_wines list has been saved to 'highest_rated_wines.json'")

### 2. Using Pandas

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('sample_data/wine-ratings-small.csv', index_col=0)
df.head()

In [None]:
# Filter for red wines variety
red_wines = df[df['variety'] == "Red Wine"]
red_wines.head()

In [None]:
# Filter for white wines variety
white_wines = df[df['variety'] == "White Wine"]
white_wines.head()

In [None]:
# Filter for wines rated > 90.0 ratings
highest_rated_wines = df[df['rating'] > 90]
highest_rated_wines.head()

In [None]:
italy_wines = df[df['region'] == 'Italy']
italy_wines.head()

In [None]:
# OR
italy_wines = df[df['region'].isin(['Italy'])]
italy_wines.head()

**Note:** The output using same method for 'Napa' is empty as shown below because 'Napa' is only a substring in the column 'region'. Hence, .str.contains() can be used to filter substring of a column.

In [None]:
napa_wines = df[df['region'] == 'Napa']
napa_wines.head()

In [None]:
napa_wines = df[df['region'].str.contains('Napa')]
napa_wines.head()