# Wine Dataset
DataSet Link: https://www.kaggle.com/datasets/salohiddindev/wine-dataset-scraping-from-wine-com
# Load the dataset



In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = r"C:\Users\david\OneDrive\Documents\DaveShevy\Datasets\Wine Dataset\vivno_dataset.xlsx"
wine_data = pd.read_excel(file_path)

# Data Cleaning Process

# Removing $ sign and converting Prices to numeric
wine_data['Prices'] = wine_data['Prices'].replace('[\$,]', '', regex=True).astype(float)

# Removing duplicates
wine_data = wine_data.drop_duplicates()

# Replace 0 ABV with NaN since 0 is not a valid alcohol by volume percentage
wine_data['ABV %'] = wine_data['ABV %'].replace(0, pd.NA)

# Adjusting the 'color_wine' column to replace '0' with 'Other'
wine_data['color_wine'] = wine_data['color_wine'].replace(['0', 0, 'O'], 'Other')
wine_data['color_wine'] = wine_data['color_wine'].replace('O', 'Other')

# Define the price bins and labels for categorizing prices
price_bins = [0, 20, 50, 100, 500, 1000, wine_data['Prices'].max()]
price_labels = ["$0-20", "$20-50", "$50-100", "$100-500", "$500-1000", "$1000+"]
wine_data['Price Range'] = pd.cut(wine_data['Prices'], bins=price_bins, labels=price_labels)

# Set the seaborn style to dark for a full dark theme
sns.set_theme(style="darkgrid", palette="deep")
sns.set_context("talk")

# Set the default color of the plot to dark
plt.rcParams['axes.facecolor'] = '#313133'

# Visualization 1: Wine Price Distribution with Gradient Bars
plt.figure(figsize=(10, 6))
price_range_order = wine_data['Price Range'].value_counts().index
sns.countplot(data=wine_data, y='Price Range', order=price_range_order, palette=sns.cubehelix_palette(n_colors=len(price_range_order), start=.5, rot=-.75, reverse=True))
plt.title('Wine Price Distribution', color='white')
plt.xlabel('Number of Wines', color='white')
plt.ylabel('Price Range ($)', color='white')
plt.xticks(color='white')
plt.yticks(color='white')
plt.tight_layout()
plt.savefig(r"C:\Users\david\OneDrive\Documents\DaveShevy\Datasets\Wine Dataset\price_distribution_dark.png", facecolor='#313133')  # Save with dark background
plt.close()

# Visualization 2: Distribution of Wine Types with Gradient Bars
plt.figure(figsize=(12, 8))
wine_types_order = wine_data['color_wine'].value_counts().index
sns.countplot(data=wine_data, y='color_wine', order=wine_types_order, palette=sns.cubehelix_palette(n_colors=len(wine_types_order), start=.5, rot=-.75, reverse=True))
plt.title('Distribution of Wine Types', color='white')
plt.xlabel('Number of Wines', color='white')
plt.ylabel('Wine Type', color='white')
plt.xticks(color='white')
plt.yticks(color='white')
plt.tight_layout()
plt.savefig(r"C:\Users\david\OneDrive\Documents\DaveShevy\Datasets\Wine Dataset\wine_types_dark.png", facecolor='#313133')  # Save with dark background
plt.close()



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(data=wine_data, y='Price Range', order=price_range_order, palette=sns.cubehelix_palette(n_colors=len(price_range_order), start=.5, rot=-.75, reverse=True))

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(data=wine_data, y='color_wine', order=wine_types_order, palette=sns.cubehelix_palette(n_colors=len(wine_types_order), start=.5, rot=-.75, reverse=True))
