## 📌 Description
### This notebook presents a detailed exploratory data analysis (EDA) and visualization on the Wine Quality dataset. The goal is to understand how different physicochemical properties affect the perceived wine quality.
#### The dataset includes 12 features such as acidity, sugar content, alcohol, and more, along with a quality score ranging from 0 to 10. This notebook provides a visual overview of distributions, correlations, and relationships between variables.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("/kaggle/input/wine-quality-dataset/WineQT.csv")
data.head()

In [None]:
data.describe()

In [None]:
numeric_columns = data.select_dtypes(include='number').drop(columns='Id').columns

# Subplot grid settings
n_cols = 3  
n_rows = -(-len(numeric_columns) // n_cols) 

plt.figure(figsize=(n_cols * 5, n_rows * 4))

for i, column in enumerate(numeric_columns, 1):
    plt.subplot(n_rows, n_cols, i)
    sns.histplot(data[column], kde=True, color='skyblue')
    plt.title(f"Distribution of {column}")
    plt.xlabel(column)
    plt.ylabel("Count")

plt.tight_layout()
plt.show()


In [None]:
selected_features = ['alcohol', 'sulphates', 'citric acid', 'density', 'quality']
sns.pairplot(data[selected_features], hue='quality', palette='husl')
plt.show()

In [None]:
data.drop(columns="Id").groupby('quality').mean().plot(kind='bar', figsize=(12, 6))
plt.title('Average Feature Values by Wine Quality')
plt.ylabel('Mean Value')
plt.xticks(rotation=0)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(data.corr(),annot=True)

In [None]:

sns.boxplot(x='quality', y='alcohol', data=data)
plt.title('Alcohol Distribution by Wine Quality')
plt.show()

In [None]:
data_qualityScoreCount = data["quality"].value_counts().sort_values()

labels = data_qualityScoreCount.index
sizes = data_qualityScoreCount.values
total = sum(sizes)

# Pie chart
plt.figure(1, (10, 6))
wedges, texts = plt.pie(
    sizes,
    labels=None,               
    startangle=90,
    shadow=False
)
legend_labels = [f"{label} ({size / total:.1%})" for label, size in zip(labels, sizes)]

plt.legend(
    wedges,
    legend_labels,
    title="Quality Scores",
    loc="center left",
    bbox_to_anchor=(1, 0, 0.5, 1)
)

# Başlık ve eksen adları (Pie'da eksenler sembolik)
plt.xlabel('Quality Classes')
plt.ylabel('Percentage')
plt.title('Distribution of Quality Scores')

plt.tight_layout()
plt.show()

In [None]:
plt.figure(1,(10,6))
correlation_with_quality = data.corr()['quality'].drop('quality').sort_values(ascending=True)
correlation_with_quality.plot(kind='barh', color='skyblue')
plt.xlabel('Correlation Coefficient')
plt.title('Correlation of Features with Quality')
plt.grid(True, axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 1))  # Tek satır heatmap için boyut
sns.heatmap([correlation_with_quality.values],
            annot=True, cmap='coolwarm', xticklabels=correlation_with_quality.index, yticklabels=["Quality"])
plt.title("Feature Correlations with Quality")
plt.show()