In [1]:
import numpy as np
import xgboost as xgb
import catboost as cb
import pandas as pd

from sklearn.datasets import load_breast_cancer, load_diabetes, load_wine
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder

# Diabet Regression

In [56]:
diabetes = load_diabetes()

X = diabetes.data
y = diabetes.target
X.shape

(442, 10)

In [None]:
xgb_model = xgb.XGBRegressor(objective="reg:squarederror",
                             n_estimators=1000,
                             learning_rate=0.1)
print(xgb_model)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)])

y_pred = xgb_model.predict(X_test)
mse=mean_squared_error(y_test, y_pred)
print(np.sqrt(mse))

xgb_model.score(X_test, y_test)

In [None]:
catboost = cb.CatBoostRegressor(
            loss_function='RMSE',
            iterations=1000,
            learning_rate=0.1)
print(catboost.get_params())

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

catboost.fit(X_train, y_train, eval_set=(X_test, y_test))

y_pred = catboost.predict(X_test)
mse=mean_squared_error(y_test, y_pred)
print(np.sqrt(mse))

catboost.score(X_test, y_test)

# Cancer Binary Classification

In [66]:
cancer = load_breast_cancer()

X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
xgb_model = xgb.XGBClassifier(objective="binary:logistic",
                             n_estimators=1000,
                             learning_rate=0.1)
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)])
print(xgb_model.score(X_test, y_test))

y_pred = xgb_model.predict(X_test)

print(confusion_matrix(y_test, y_pred))

In [None]:
catboost = cb.CatBoostClassifier(
            l2_leaf_reg=0.5,
            iterations=1000,
            learning_rate=0.1)

catboost.fit(X_train, y_train, eval_set=(X_test, y_test))
print(catboost.score(X_test, y_test))

y_pred = catboost.predict(X_test)

print(confusion_matrix(y_test, y_pred))

# Wine multi Classifier

In [87]:
wine = load_wine()

X = wine.data
y = wine.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
xgb_model = xgb.XGBClassifier(objective="multi:softprob",
                             n_estimators=1000,
                             learning_rate=0.1)
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)])
print(xgb_model.score(X_test, y_test))

y_pred = xgb_model.predict(X_test)

print(confusion_matrix(y_test, y_pred))

In [None]:
catboost = cb.CatBoostClassifier(
            #l2_leaf_reg=0.5,
            iterations=1000,
            learning_rate=0.1)

catboost.fit(X_train, y_train, eval_set=(X_test, y_test))
print(catboost.score(X_test, y_test))

y_pred = catboost.predict(X_test)

print(confusion_matrix(y_test, y_pred))

# Custom Dataset

In [None]:
df = pd.read_csv('Melbourne_housing_FULL.csv')
df

In [None]:
cols_to_use = ['Suburb', 'Rooms', 'Type', 'Method', 'SellerG', 'Regionname', 'Propertycount', 
               'Distance', 'CouncilArea', 'Bedroom2', 'Bathroom', 'Car', 'Landsize', 'BuildingArea', 'Price']
df = df[cols_to_use]

cols_to_fill_zero = ['Propertycount', 'Distance', 'Bedroom2', 'Bathroom', 'Car']
df[cols_to_fill_zero] = df[cols_to_fill_zero].fillna(0)


df['Landsize'] = df['Landsize'].fillna(df.Landsize.mean())
df['BuildingArea'] = df['BuildingArea'].fillna(df.BuildingArea.mean())

df.dropna(inplace=True)
df = pd.get_dummies(df, drop_first=True)
df

# Classifier Rooms

In [107]:
X = df.drop(['Rooms'], axis='columns')
y = df.Rooms
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

xgb_model = xgb.XGBClassifier(objective="multi:softprob",
                             n_estimators=100,
                             learning_rate=0.1)
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)])
print(xgb_model.score(X_test, y_test))

y_pred = xgb_model.predict(X_test)

print(confusion_matrix(y_test, y_pred))

In [None]:
catboost = cb.CatBoostClassifier(
            #l2_leaf_reg=0.5,
            iterations=2000,
            learning_rate=0.1)

catboost.fit(X_train, y_train, eval_set=(X_test, y_test))
print(catboost.score(X_test, y_test))

y_pred = catboost.predict(X_test)

print(confusion_matrix(y_test, y_pred))

# Price regression

In [109]:
X = df.drop(['Price'], axis='columns')
y = df.Price
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
xgb_model = xgb.XGBRegressor(objective="reg:squarederror",
                             n_estimators=1000,
                             learning_rate=0.1)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)])

y_pred = xgb_model.predict(X_test)
mse=mean_squared_error(y_test, y_pred)
print(np.sqrt(mse))

xgb_model.score(X_test, y_test)

In [None]:
catboost = cb.CatBoostRegressor(
            loss_function='RMSE',
            iterations=5000,
            learning_rate=0.1)
print(catboost.get_params())

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

catboost.fit(X_train, y_train, eval_set=(X_test, y_test))

y_pred = catboost.predict(X_test)
mse=mean_squared_error(y_test, y_pred)
print(np.sqrt(mse))

catboost.score(X_test, y_test)