# Statistical Analysis: Wine Dataset
We will calculate the following statistics for each attribute
- [] Central Tendancy
    - Mean
    - Median
    - Mode
- [] Dispersion
    - Range
    - Quartiles
    - InterQuartile Range
    - Variance
    - Santard Deviation
- [] Covariance Matrix
- [] Correlation Matrix


## Importing necessary Libraries

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Load datasets

In [3]:
wine_df = pd.read_csv('../wine/wine.data', header=None)
wine_df.columns = [
    "Class", "Alcohol", "Malic_acid", "Ash", "Alcalinity_of_ash", 
    "Magnesium", "Total_phenols", "Flavanoids", "Nonflavanoid_phenols",
    "Proanthocyanins", "Color_intensity", "Hue", "OD280/OD315_of_diluted_wines", "Proline"
]
wine_df

Unnamed: 0,Class,Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280/OD315_of_diluted_wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


## Compute central tendency (mean, median, mode)

In [6]:
mean_values = wine_df.mean()
median_values = wine_df.median()
mode_values = wine_df.mode().iloc[0]  # mode() returns a dataframe, so we take the first row

# Combine them into a single DataFrame
central_tendency_table = pd.DataFrame({
    'Mean': mean_values,
    'Median': median_values,
    'Mode': mode_values
})

# Display the table
central_tendency_table

Unnamed: 0,Mean,Median,Mode
Class,1.938202,2.0,2.0
Alcohol,13.000618,13.05,12.37
Malic_acid,2.336348,1.865,1.73
Ash,2.366517,2.36,2.28
Alcalinity_of_ash,19.494944,19.5,20.0
Magnesium,99.741573,98.0,88.0
Total_phenols,2.295112,2.355,2.2
Flavanoids,2.02927,2.135,2.65
Nonflavanoid_phenols,0.361854,0.34,0.26
Proanthocyanins,1.590899,1.555,1.35


## Measures of Dispersion (range, quartiles, InterQuartile Ranges, Variance, Standard Deviations)

In [13]:
wine_dispersion = wine_df.describe().T
wine_dispersion['IQR'] = wine_dispersion['75%'] - wine_dispersion['25%']
wine_dispersion['Variance'] = wine_dispersion['std'] ** 2

wine_dispersion

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,IQR,Variance
Class,178.0,1.938202,0.775035,1.0,1.0,2.0,3.0,3.0,2.0,0.600679
Alcohol,178.0,13.000618,0.811827,11.03,12.3625,13.05,13.6775,14.83,1.315,0.659062
Malic_acid,178.0,2.336348,1.117146,0.74,1.6025,1.865,3.0825,5.8,1.48,1.248015
Ash,178.0,2.366517,0.274344,1.36,2.21,2.36,2.5575,3.23,0.3475,0.075265
Alcalinity_of_ash,178.0,19.494944,3.339564,10.6,17.2,19.5,21.5,30.0,4.3,11.152686
Magnesium,178.0,99.741573,14.282484,70.0,88.0,98.0,107.0,162.0,19.0,203.989335
Total_phenols,178.0,2.295112,0.625851,0.98,1.7425,2.355,2.8,3.88,1.0575,0.39169
Flavanoids,178.0,2.02927,0.998859,0.34,1.205,2.135,2.875,5.08,1.67,0.997719
Nonflavanoid_phenols,178.0,0.361854,0.124453,0.13,0.27,0.34,0.4375,0.66,0.1675,0.015489
Proanthocyanins,178.0,1.590899,0.572359,0.41,1.25,1.555,1.95,3.58,0.7,0.327595


# Compute covariance and correlation matrices

In [8]:
wine_covariance = wine_df.cov()
wine_correlation = wine_df.corr()

### WINE Covariance Matrix

In [10]:
wine_covariance

Unnamed: 0,Class,Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280/OD315_of_diluted_wines,Proline
Class,0.600679,-0.206515,0.379039,-0.010555,1.340364,-2.315495,-0.348835,-0.656091,0.047177,-0.221413,0.477339,-0.109368,-0.433737,-154.667651
Alcohol,-0.206515,0.659062,0.085611,0.047115,-0.841093,3.139878,0.146887,0.192033,-0.015754,0.063518,1.028283,-0.013313,0.041698,164.567185
Malic_acid,0.379039,0.085611,1.248015,0.050277,1.076332,-0.87078,-0.234338,-0.45863,0.040733,-0.141147,0.644838,-0.143326,-0.292447,-67.548867
Ash,-0.010555,0.047115,0.050277,0.075265,0.406208,1.122937,0.022146,0.031535,0.006358,0.001516,0.164654,-0.004682,0.000762,19.319739
Alcalinity_of_ash,1.340364,-0.841093,1.076332,0.406208,11.152686,-3.97476,-0.671149,-1.172083,0.150422,-0.377176,0.145024,-0.209118,-0.656234,-463.355345
Magnesium,-2.315495,3.139878,-0.87078,1.122937,-3.97476,203.989335,1.91647,2.793087,-0.455563,1.932832,6.620521,0.180851,0.669308,1769.1587
Total_phenols,-0.348835,0.146887,-0.234338,0.022146,-0.671149,1.91647,0.39169,0.54047,-0.035045,0.219373,-0.079998,0.062039,0.311021,98.171057
Flavanoids,-0.656091,0.192033,-0.45863,0.031535,-1.172083,2.793087,0.54047,0.997719,-0.066867,0.373148,-0.399169,0.124082,0.558262,155.447492
Nonflavanoid_phenols,0.047177,-0.015754,0.040733,0.006358,0.150422,-0.455563,-0.035045,-0.066867,0.015489,-0.02606,0.040121,-0.007471,-0.044469,-12.203586
Proanthocyanins,-0.221413,0.063518,-0.141147,0.001516,-0.377176,1.932832,0.219373,0.373148,-0.02606,0.327595,-0.033504,0.038665,0.210933,59.554334


### WINE Correlation Matrix

In [11]:
wine_correlation

Unnamed: 0,Class,Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280/OD315_of_diluted_wines,Proline
Class,1.0,-0.328222,0.437776,-0.049643,0.517859,-0.209179,-0.719163,-0.847498,0.489109,-0.49913,0.265668,-0.617369,-0.78823,-0.633717
Alcohol,-0.328222,1.0,0.094397,0.211545,-0.310235,0.270798,0.289101,0.236815,-0.155929,0.136698,0.546364,-0.071747,0.072343,0.64372
Malic_acid,0.437776,0.094397,1.0,0.164045,0.2885,-0.054575,-0.335167,-0.411007,0.292977,-0.220746,0.248985,-0.561296,-0.36871,-0.192011
Ash,-0.049643,0.211545,0.164045,1.0,0.443367,0.286587,0.12898,0.115077,0.18623,0.009652,0.258887,-0.074667,0.003911,0.223626
Alcalinity_of_ash,0.517859,-0.310235,0.2885,0.443367,1.0,-0.083333,-0.321113,-0.35137,0.361922,-0.197327,0.018732,-0.273955,-0.276769,-0.440597
Magnesium,-0.209179,0.270798,-0.054575,0.286587,-0.083333,1.0,0.214401,0.195784,-0.256294,0.236441,0.19995,0.055398,0.066004,0.393351
Total_phenols,-0.719163,0.289101,-0.335167,0.12898,-0.321113,0.214401,1.0,0.864564,-0.449935,0.612413,-0.055136,0.433681,0.699949,0.498115
Flavanoids,-0.847498,0.236815,-0.411007,0.115077,-0.35137,0.195784,0.864564,1.0,-0.5379,0.652692,-0.172379,0.543479,0.787194,0.494193
Nonflavanoid_phenols,0.489109,-0.155929,0.292977,0.18623,0.361922,-0.256294,-0.449935,-0.5379,1.0,-0.365845,0.139057,-0.26264,-0.50327,-0.311385
Proanthocyanins,-0.49913,0.136698,-0.220746,0.009652,-0.197327,0.236441,0.612413,0.652692,-0.365845,1.0,-0.02525,0.295544,0.519067,0.330417
