### Importing the library

In [76]:
import pandas as pd

### 先設定original_table以及released_table的路徑

In [77]:
original_table_path = "abalone.data"
released_table_path = "k-anonmity+L-Diversity+T-closeness_abalone_data.csv"

### Importing and Preprocessing dataset

In [78]:
names = (
    "Sex",
    "Length",
    "Diameter",
    "Height",
    "Whole-weight",
    "Shucked-weight",
    "Viscera-weight",
    "Shell-weight",
    "Rings"
)
original_table = pd.read_csv(original_table_path, header=None, names=names)
released_table = pd.read_csv(released_table_path, header=0, names=names)

In [79]:
original_table.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole-weight,Shucked-weight,Viscera-weight,Shell-weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [80]:
released_table.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole-weight,Shucked-weight,Viscera-weight,Shell-weight,Rings
0,I,[0.075~0.155],[0.055~0.13],[0.01~0.05],[0.002~0.029],[0.001~0.0125],[0.0005~0.0065],[0.0015~0.008],0
1,I,[0.075~0.155],[0.055~0.13],[0.01~0.05],[0.002~0.029],[0.001~0.0125],[0.0005~0.0065],[0.0015~0.008],0
2,I,[0.075~0.155],[0.055~0.13],[0.01~0.05],[0.002~0.029],[0.001~0.0125],[0.0005~0.0065],[0.0015~0.008],0
3,M,[0.075~0.155],[0.055~0.13],[0.01~0.05],[0.002~0.029],[0.001~0.0125],[0.0005~0.0065],[0.0015~0.008],0
4,I,[0.075~0.155],[0.055~0.13],[0.01~0.05],[0.002~0.029],[0.001~0.0125],[0.0005~0.0065],[0.0015~0.008],0


In [81]:
def reScale(rings):
    if rings<=5:
        return 0
    elif rings<=10:
        return 1
    elif rings<=15:
        return 2
    elif rings<=20:
        return 3
    elif rings<=25:
        return 4
    elif rings<=30:
        return 5
    else:
        return None
original_table.iloc[:,-1] = original_table.iloc[:,-1].apply(reScale)

In [82]:
from sklearn.preprocessing import StandardScaler, RobustScaler


def encoder_and_scaler(table):
   table_x = table.iloc[:,:-1]
   table_y = table.iloc[:,-1].values
   table_x = pd.get_dummies(table_x)
   sc = StandardScaler()
   table_x = sc.fit_transform(table_x.values)
   return table_x, table_y

original_x, original_y = encoder_and_scaler(original_table)
released_x, released_y = encoder_and_scaler(released_table)

### Training

In [83]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score
# Import models from sklearn
from sklearn.svm import SVC
from xgboost import XGBClassifier

def print_metrics(y_true, preds):
   print('Accuracy score: ', format(accuracy_score(y_true, preds)))

def train_and_predict(X,y):
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)
   # Instantiate a number of our models
   svm_mod = SVC()
   xg1 = XGBClassifier(verbosity = 0, use_label_encoder=False)

   # Fit each of the 4 models
   svm_mod.fit(X_train, y_train)
   xg1=xg1.fit(X_train, y_train)

   svm_y_pred = svm_mod.predict(X_test)
   xg1_y_pred = xg1.predict(X_test)

   # Print scores
   print_metrics(y_test, svm_y_pred)
   print_metrics(y_test, xg1_y_pred)
   print("====================================")

In [84]:
train_and_predict(original_x, original_y)
train_and_predict(released_x, released_y)

Accuracy score:  0.7119617224880382
Accuracy score:  0.6813397129186602
Accuracy score:  0.6775119617224881
Accuracy score:  0.6641148325358852
