# Problem 1 Abalone Data Set
### Created By: Ivor Zalud 

# Gradient-Boosted Trees

### Import Python modules

In [149]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix

## 1. Load the data and convert Sex to int

- M: 0
- F: 1
- I: 2

In [150]:
df_train = pd.read_csv('Data/abalone.data')
df_train['Sex'] = df_train['Sex'].map({'M': 0, 'F': 1, 'I': 2})
## Drop rows that contain only one instance of Ring values, y values need at least two instances for the model.
df_train = df_train[~df_train['Rings'].isin([1,26,29,2,25])]



## 2. Split the data set into a training and test set

In [151]:
## Define our indepedent and dependant variables
data_column_names = [column for column in df_train.columns]
x = df_train.loc[:, data_column_names]
y = df_train.loc[:,'Rings']



x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

print(y_train.value_counts())

9     557
10    501
8     458
11    385
7     313
12    215
6     201
13    159
14    104
5      93
15     83
16     53
17     49
4      42
18     37
19     22
20     22
3      13
21     12
23      8
22      6
24      2
27      2
Name: Rings, dtype: int64


## 3. Create the Gradient-boosted Trees Model and fit with the training data

In [153]:
GBT = GradientBoostingClassifier(n_estimators=5000,
                                       learning_rate=0.05,
                                       max_depth=3,
                                       subsample=0.5,
                                       validation_fraction=0.1,
                                       n_iter_no_change=20,
                                       max_features='log2',
                                       verbose=1).fit(x_train,y_train)

GBT.score(x_test,y_test)

predictions = GBT.predict(x_test)

print(classification_report(y_test, predictions))

      Iter       Train Loss      OOB Improve   Remaining Time 
         1           1.9940           0.4042            3.08m
         2           1.7617           0.2370            3.62m
         3           1.6364           0.1301            3.75m
         4           1.5203           0.1272            3.85m




         5           1.3706           0.1314            3.96m
         6           1.2700           0.0914            4.02m
         7           1.2012           0.0606            4.08m
         8           1.1038           0.0994            4.13m
         9           1.0303           0.0692            4.19m
        10           0.9738           0.0490            4.23m
        20           0.5317           0.0332            4.28m
        30           0.3147           0.0170            4.15m
        40           0.1899           0.0058            4.09m
        50           0.1223           0.0051            4.03m
        60           0.0771           0.0038            3.98m
        70           0.0496           0.0027            3.94m
        80           0.0328           0.0012            3.93m
        90           0.0212           0.0004            3.90m
       100           0.0134           0.0003            3.88m
       200           0.0003           0.0000            3.70m
       