## Importing libraries

In [8]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

## Importing dataset

In [2]:
df_x = pd.read_csv('../dataframe/05-working with skills/df_x.csv')
df_y = pd.read_csv('../dataframe/05-working with skills/df_y.csv')

## Fixing NAN Values

In [3]:
df_y["current experience"] = df_y["current experience"].fillna("")

In [None]:
indexes = df_y.loc[(df_y["current experience"] == "") | (df_y["current experience"] == "others")].index
df_y.drop(df_y[(df_y["current experience"] == "") | (df_y["current experience"] == "others")].index, inplace=True)
df_x.drop(indexes, inplace=True)
df_y["current experience"].unique()

In [5]:
le = LabelEncoder()

In [None]:
train_x = df_x.iloc[:, 1:].values.tolist()
train_y = df_y.values.ravel()
train_y = le.fit_transform(train_y)

np.shape(train_x), np.shape(train_y)

## K-Folding

In [7]:
cv = KFold(n_splits=10, random_state=1, shuffle=True)

## Train test split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(train_x, train_y, shuffle=True, test_size=0.20, random_state=0)
y_train

## Training the model

In [None]:
clf = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=4, random_state=0)
clf.fit(x_train, y_train)
clf.score(x_train, y_train)

## Cross validation using negative mean absolute error

In [None]:
scores = cross_val_score(clf, x_train, y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=1)
## Mean Absolute Error
print(f"MAE: {np.mean(np.absolute(scores))}")
## Root mean squared error
print(f"RMSE: {np.sqrt(np.mean(np.absolute(scores)))}")

## Cross Validation using score

In [None]:
cv = KFold(n_splits=20, random_state=1, shuffle=True)
scores = cross_val_score(clf, x_train, y_train, cv=cv)

print(f"scores: {scores}\nmin: {np.min(scores)}\nmax: {np.max(scores)}\nmean: {np.mean(scores)}")

In [11]:
clf.predict(x_test)
clf.score(x_test, y_test)

0.5632183908045977

## Using XGBoost

In [12]:
import xgboost as xgb

In [None]:
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(x_train, y_train)

In [14]:
predictions = xgb_classifier.predict(x_test)

In [None]:
accuracy_score(y_test, predictions)

In [None]:
clf = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=4, random_state=0)
clf.fit(x_train, y_train)
clf.score(x_train, y_train)

In [None]:
clf.predict(x_test)
clf.score(x_test, y_test)

## Using XGBoost

In [40]:
import xgboost as xgb

In [None]:
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(x_train, y_train)

In [19]:
predictions = xgb_classifier.predict(x_test)

In [None]:
accuracy_score(y_test, predictions)