In [2]:
import pandas as pd
import numpy as np

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=';')

print("1. Data shape:")
print(f"Rows: {df.shape[0]}, Columns: {df.shape[1]}")

print("\n2. First 5 rows:")
print(df.head())

print("\n3. Data info:")
df.info()

print("\n4. Basic statistics:")
print(df.describe())

print("\n5. Data types:")
print(df.dtypes)

print("\n6. Missing values:")
print(df.isnull().sum())

print("\n7. Quality value counts:")
print(df['quality'].value_counts().sort_index())

print("\n8. Sort by alcohol:")
sorted_df = df.sort_values('alcohol', ascending=False)
print(sorted_df[['alcohol', 'quality', 'pH']].head())

print("\n9. Group by quality:")
print(df.groupby('quality')['alcohol'].mean())

print("\n10. Pivot table:")
pivot = pd.pivot_table(df, values='alcohol', index='quality', aggfunc=['mean', 'min', 'max'])
print(pivot)

print("\n11. Correlation with quality:")
corr = df.corr()['quality'].sort_values(ascending=False)
print(corr.round(3))

print("\n12. Unique quality values:")
print(df['quality'].unique())

print("\n13. Number of unique quality scores:")
print(df['quality'].nunique())



1. Data shape:
Rows: 1599, Columns: 12

2. First 5 rows:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      