# Modeling
>### In here, I will analyze result from 10 tried and tested models. After that, I will make a few deductions for our perusal. Note that, you may fork this notebook and help others see other dimensions to take.

### Firstly, let me import the libraries I will need 

In [1]:
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn import metrics as m
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier



### Next, I will the define the set of the 10 classifiers I have in mind to tackle the problem

In [2]:
classifiers = {'Gradient Boosting Classifier':GradientBoostingClassifier(),
               'XGBoost Classifier':XGBClassifier(),
               'Adaptive Boosting Classifier':AdaBoostClassifier(),
               'Linear Discriminant Analysis':LinearDiscriminantAnalysis(),
               'Logistic Regression':LogisticRegression(),
               'Random Forest Classifier': RandomForestClassifier(),
               'K Nearest Neighbour':KNeighborsClassifier(8),
               'Decision Tree Classifier':DecisionTreeClassifier(),
               'Gaussian Naive Bayes Classifier':GaussianNB(),
               'Support Vector Classifier':SVC(probability=True),}

### Drop, drop, drop! Next, I will drop all the fields that failed my causality test. That's why I went to the limit of verifying causal variables in the 3rd notebook of this repository. 
>### Note: the dataframe at this point is the 2.5MB one, not the original one

In [None]:
Xdata = df.drop(['y','duration','default','euribor3m','numEmployed','pdays','empVarRate','contact', 'consPriceIdx'],axis=1)
ydata = df['y']

print(Xdata.columns)

In [None]:
log_cols = [
    "Classifier", 
    "Accuracy",
    "Precision Score",
    "Recall Score",
    "F1-Score",
    "roc-auc_Score"
]
#metrics_cols = []
log = pd.DataFrame(columns=log_cols)
#metric = pd.DataFrame(columns=metrics_cols)

In [None]:
import warnings
warnings.filterwarnings('ignore')
rs = StratifiedShuffleSplit(n_splits=3, test_size=0.3,random_state=0)
rs.get_n_splits(Xdata,ydata)

for Name,classify in classifiers.items():
    for train_index, test_index in rs.split(Xdata,ydata):
        
        #print("TRAIN:", train_index, "TEST:", test_index)
        X,X_test = Xdata.iloc[train_index], Xdata.iloc[test_index]
        y,y_test = ydata.iloc[train_index], ydata.iloc[test_index]

        # Scaling of Features 
        from sklearn.preprocessing import StandardScaler
        sc_X = StandardScaler()
        X = sc_X.fit_transform(X)
        X_test = sc_X.transform(X_test)
        cls = classify
        cls =cls.fit(X,y)
        y_out = cls.predict(X_test)
        accuracy = m.accuracy_score(y_test,y_out)
        precision = m.precision_score(y_test,y_out,average='macro')
        recall = m.recall_score(y_test,y_out,average='macro')
        roc_auc = roc_auc_score(y_out,y_test)
        f1_score = m.f1_score(y_test,y_out,average='macro')
        log_entry = pd.DataFrame([[Name,accuracy,precision,recall,f1_score,roc_auc]], columns=log_cols)
        #metric_entry = pd.DataFrame([[precision,recall,f1_score,roc_auc]], columns=metrics_cols)
        log = log.append(log_entry)
        #metric = metric.append(metric_entry)
        
print(log)
plt.xlabel('Accuracy')
plt.title('Classifier Accuracy')
sns.set_color_codes("muted")
sns.barplot(x='Accuracy', y='Classifier', data=log, color="b")  
plt.show()

In [None]:
plt.scatter(log['Recall Score'], log['Precision Score'], color='navy',
         label='Precision-Recall')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.0])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall')
plt.legend(loc="lower left")
plt.show()