4.	Perform the following operations in python on given wine quality dataset [WnieQT.csv]
a.	Subset data for wines with quality ≥ 7 and alcohol > 10%.
b.	Merge red and white wine datasets to form a complete dataset with a new type column.
c.	Sort wines by citric acid and residual sugar.
d.	Transpose summary statistics of chemical properties for different quality levels.
e.	Reshape using pivot_table() to show average values of key features by wine quality

In [2]:
import pandas as pd

In [16]:
df = pd.read_csv("WineQT.csv")

In [18]:
print(df.head())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  Id  
0      9.4        5   0  
1      9.8        5   1  
2      9

# a. Subset wines with quality ≥ 7 and alcohol > 10%


In [43]:
subset_df = df[(df['quality'] >= 7) & (df['alcohol'] > 10)]
print("\nSubset of wines with quality ≥ 7 and alcohol > 10%:\n", subset_df.head())


Subset of wines with quality ≥ 7 and alcohol > 10%:
      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
12             8.5              0.28         0.56             1.8      0.092   
89             8.0              0.59         0.16             1.8      0.065   
143            9.6              0.32         0.47             1.4      0.056   
145           12.8              0.30         0.74             2.6      0.095   
146           12.8              0.30         0.74             2.6      0.095   

     free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
12                  35.0                 103.0  0.99690  3.30       0.75   
89                   3.0                  16.0  0.99620  3.42       0.92   
143                  9.0                  24.0  0.99695  3.22       0.82   
145                  9.0                  28.0  0.99940  3.20       0.77   
146                  9.0                  28.0  0.99940  3.20       0.77   

     alc

# b. Merge red and white wine datasets to form a complete dataset with 'type' column

In [45]:
red_wine_df = df[df['type'] == 'red']
red_wine_df['type'] = 'red'

KeyError: 'type'

In [26]:
white_df = pd.read_csv("winequality-white.csv")
white_df['type'] = 'white'

FileNotFoundError: [Errno 2] No such file or directory: 'winequality-white.csv'

In [None]:
merged_df = pd.concat([red_df, white_df], ignore_index=True)
print(merged_df[['type', 'quality']].head())

# c. Sort wines by citric acid and residual sugar

In [30]:
sorted_df = df.sort_values(by=['citric acid', 'residual sugar'])
print(sorted_df[['citric acid', 'residual sugar']].head())

     citric acid  residual sugar
7            0.0             1.2
933          0.0             1.2
935          0.0             1.2
451          0.0             1.4
733          0.0             1.4


# d. Transpose summary statistics of chemical properties for different quality levels

In [33]:
chemical_cols = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
                 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
                 'pH', 'sulphates', 'alcohol']

In [35]:
summary = df.groupby('quality')[chemical_cols].mean().transpose()
print(summary)

quality                       3          4          5          6          7  \
fixed acidity          8.450000   7.809091   8.161077   8.317749   8.851049   
volatile acidity       0.897500   0.700000   0.585280   0.504957   0.393671   
citric acid            0.211667   0.165758   0.240124   0.263680   0.386573   
residual sugar         2.666667   2.566667   2.540476   2.444805   2.760140   
chlorides              0.105333   0.094788   0.091770   0.085281   0.075217   
free sulfur dioxide    8.166667  14.848485  16.612836  15.215368  14.538462   
total sulfur dioxide  24.500000  40.606061  55.299172  39.941558  37.489510   
density                0.997682   0.996669   0.997073   0.996610   0.996071   
pH                     3.361667   3.391212   3.302091   3.323788   3.287133   
sulphates              0.550000   0.637879   0.613375   0.676537   0.743566   
alcohol                9.691667  10.260606   9.902277  10.655339  11.482634   

quality                       8  
fixed acidity    

# e. Reshape using pivot_table to show average values of key features by wine quality

In [38]:
pivot_df = df.pivot_table(
    index='quality',
    values=['alcohol', 'pH', 'citric acid', 'sulphates', 'residual sugar'],
    aggfunc='mean',
    observed=False
)

In [40]:
print(pivot_df)

           alcohol  citric acid        pH  residual sugar  sulphates
quality                                                             
3         9.691667     0.211667  3.361667        2.666667   0.550000
4        10.260606     0.165758  3.391212        2.566667   0.637879
5         9.902277     0.240124  3.302091        2.540476   0.613375
6        10.655339     0.263680  3.323788        2.444805   0.676537
7        11.482634     0.386573  3.287133        2.760140   0.743566
8        11.937500     0.432500  3.240625        2.643750   0.766250
