# Imports and CoW

In [20]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
pd.set_option("mode.copy_on_write", True) # Will be default in pandas 3.0

# Read in data

In [22]:
pd.set_option("mode.copy_on_write", True)
cwd = Path().cwd()
wine_quality_red_filename = Path('winequality-red.csv')
wine_quality_white_filename = Path('winequality-white.csv')

if not wine_quality_red_filename.exists():
    raise FileNotFoundError(f"Dataset file not found: {wine_quality_red_filename}")
if not wine_quality_white_filename.exists():
    raise FileNotFoundError(f"Dataset file not found: {wine_quality_white_filename}")

wine_red = pd.read_csv(wine_quality_red_filename, sep=';') # 1599 data points
wine_white = pd.read_csv(wine_quality_white_filename, sep=';') # 4898 data points

# Question 1

In [None]:
wine_red_quality = wine_red['quality'] # Range 3-8
wine_white_quality = wine_white['quality'] # Range 3-9

# Ranges found with below functions
print("Red wine quality scores range: [{0},{1}]".format(wine_red_quality.min(), wine_red_quality.max()))
print("White wine quality scores range: [{0},{1}]".format(wine_white_quality.min(), wine_white_quality.max()))

# Format bin size
bins = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5]

# Make both plots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))  # side by side

# Plot red wine quality
ax1.hist(wine_red_quality, bins=bins, color='red', edgecolor='black')
ax1.set_xticks(range(0, 11))
ax1.set_title("Red Wine Quality")
ax1.set_xlabel("Quality")
ax1.set_ylabel("Count")

# Plot white wine quality
ax2.hist(wine_white_quality, bins=bins, color='gold', edgecolor='black')
ax2.set_xticks(range(0, 11))
ax2.set_title("White Wine Quality")
ax2.set_xlabel("Quality")
ax2.set_ylabel("Count")

plt.tight_layout()
plt.show()

# Question 2

In [None]:
# Fixed acidity
wine_red_fixed_acidity = wine_red['fixed acidity']
wine_white_fixed_acidity = wine_white['fixed acidity']
# Make both plots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))  # side by side
ax1.boxplot(wine_red_fixed_acidity)
ax1.set_title("Red Wine Fixed acidity")
ax1.set_ylim(3, 17)
ax2.boxplot(wine_white_fixed_acidity)
ax2.set_title("White Wine Fixed acidity")
ax2.set_ylim(3, 17)
ax1.set_xticks([]) # Hides the 1
ax2.set_xticks([]) # Hides the 1
plt.tight_layout()
plt.show()


# Volatile acidity
wine_red_volatile_acidity = wine_red['volatile acidity']
wine_white_volatile_acidity = wine_white['volatile acidity']
# Make both plots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))  # side by side
ax1.boxplot(wine_red_volatile_acidity)
ax1.set_title("Red Wine Volatile acidity")
ax1.set_ylim(0, 1.7) 
ax2.boxplot(wine_white_volatile_acidity)
ax2.set_title("White Wine Volatile acidity")
ax2.set_ylim(0, 1.7)
ax1.set_xticks([]) # Hides the 1
ax2.set_xticks([]) # Hides the 1
plt.tight_layout()
plt.show()

# pH
wine_red_pH = wine_red['pH']
wine_white_pH = wine_white['pH']
# Make both plots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))  # side by side
ax1.boxplot(wine_red_pH)
ax1.set_title("Red Wine pH")
ax1.set_ylim(2.6, 4.2)
ax2.boxplot(wine_white_pH)
ax2.set_title("White Wine pH")
ax2.set_ylim(2.6, 4.2)
ax1.set_xticks([]) # Hides the 1
ax2.set_xticks([]) # Hides the 1
plt.tight_layout()
plt.show()

# Density
wine_red_density = wine_red['density']
wine_white_density = wine_white['density']
# Make both plots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))  # side by side
ax1.boxplot(wine_red_density)
ax1.set_title("Red Wine density")
ax1.set_ylim(0.98, 1.04)
ax2.boxplot(wine_white_density)
ax2.set_title("White Wine density")
ax2.set_ylim(0.98, 1.04)
ax1.set_xticks([]) # Hides the 1
ax2.set_xticks([]) # Hides the 1
plt.tight_layout()
plt.show()