# Machine Learning using decision tree

Objective of this program is to train a model, which will later be used to assign wines to classes (1-3) based on its features (alcohol, ash, magnesium etc.)

## 1) Imports

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import joblib
from sklearn import tree

## 2) Creating list with column names (csv file doesn't have column names)

In [2]:
col_names = ['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 
             'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']

## 3) Reading csv file

In [3]:
df_wine = pd.read_csv('wine.data', header=None, names = col_names)
df_wine.head(10)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735
5,1,14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450
6,1,14.39,1.87,2.45,14.6,96,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290
7,1,14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295
8,1,14.83,1.64,2.17,14.0,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045
9,1,13.86,1.35,2.27,16.0,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045


## 4) Separating class values (labels) from other attributes. Spliting the data to create train and test dataframes (70%:30% split)

In [4]:
X = df_wine.drop('Class', axis=1)
y = df_wine['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

## 5) Creating model and training

In [5]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

DecisionTreeClassifier()

## 6) Calculating accuracy of the model

In [6]:
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred) * 100

print(f'Accuracy of the model: {acc}%')

Accuracy of the model: 94.44444444444444%


## 7) Saving trained model to file

In [7]:
joblib.dump(model, 'dt_wines.joblib')

['dt_wines.joblib']