# 1. Problem Information
- **Name:** Wine Type Classification
- **Date:** 12/02/2026
- **Type:** Multi-class

# 2. Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# 3. Data preparation

In [2]:
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
print(train.shape)
train.head(5)

(142, 15)


Unnamed: 0,SampleID,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,37,13.28,1.64,2.84,15.5,110.0,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880.0,0
1,31,13.73,1.5,2.7,22.5,101.0,3.0,3.25,0.29,2.38,5.7,1.19,2.71,1285.0,0
2,27,13.39,1.77,2.62,16.1,93.0,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195.0,0
3,13,13.75,1.73,2.41,16.0,89.0,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320.0,0
4,149,13.32,3.24,2.38,21.5,92.0,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650.0,2


In [3]:
train.describe()

Unnamed: 0,SampleID,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
count,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0
mean,89.260563,12.971479,2.34,2.364859,19.625352,99.633803,2.273732,1.985352,0.360141,1.600211,4.989648,0.948986,2.606197,739.478873,0.93662
std,51.279886,0.802521,1.101285,0.26838,3.380848,14.937448,0.621454,0.951952,0.119211,0.579777,2.334947,0.226647,0.68965,301.499014,0.773816
min,1.0,11.03,0.74,1.36,10.6,70.0,0.98,0.34,0.13,0.42,1.28,0.48,1.27,278.0,0.0
25%,45.25,12.37,1.61,2.2,17.2,88.0,1.705,1.2275,0.27,1.25,3.0575,0.78,1.97,496.25,0.0
50%,87.5,13.01,1.81,2.36,19.5,97.0,2.335,2.135,0.34,1.56,4.64,0.96,2.78,673.5,1.0
75%,135.5,13.56,3.17,2.56,21.5,107.75,2.8,2.775,0.43,1.95,6.1825,1.09,3.16,981.25,2.0
max,176.0,14.83,5.8,3.22,30.0,162.0,3.88,3.74,0.63,3.58,13.0,1.71,3.92,1515.0,2.0


# 4. Models

In [4]:
X = train.iloc[:,1:-1]
Y = train['target']

In [5]:
params = {
    'svc__C':[0.1,0.2,0.3,0.5,1,1.5,2,5,10,20],
    
}
pipeline = make_pipeline(StandardScaler(),SVC())
grin_search = GridSearchCV(pipeline,params,cv=4,scoring='f1_macro')
grin_search.fit(X, Y)
print("Best score: ",round(grin_search.best_score_,4))
print("Best parameters: ",grin_search.best_params_)

Best score :  0.9864
Best parameters:  {'svc__C': 0.3}


In [6]:
best_pipeline = grin_search.best_estimator_
predictions = best_pipeline.predict(test.iloc[:,1:])

# 5. Submission

In [7]:
submission = pd.DataFrame({
    "SampleID": test['SampleID'],
    "label": predictions,
})
submission.head()

Unnamed: 0,SampleID,label
0,11,0
1,135,2
2,29,0
3,122,1
4,63,1


In [8]:
submission.to_csv("submission.csv", index=False)