# Importing Libraries

In [None]:
#import the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


%matplotlib inline
sns.set()
plt.style.use('seaborn-whitegrid')
sns.set_style("white")
import warnings
warnings.filterwarnings("ignore")

import scipy.stats as st
#import pingouin as pg
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier
from sklearn.svm import SVC, NuSVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, auc, roc_auc_score, roc_curve, plot_confusion_matrix, classification_report
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_regression, RFE
#from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from statsmodels.api import OLS, add_constant


# EDA

## Description of Variables

In [None]:
# information on the variables
names=f"""ID	ID
Age	age
Gender	gender
Education	level of education
Country	country of current residence
Ethnicity	ethnicity
Nscore	NEO-FFI-R Neuroticism
Escore	NEO-FFI-R Extraversion
Oscore	NEO-FFI-R Openness
Ascore	NEO-FFI-R Agreeableness
Cscore	NEO-FFI-R Conscientiousness
Impulsive	BIS-11 Impulsiveness
SS	ImpSS sensation
Alcohol	alcohol consumption (output attribute)
Amphet	amphetamines consumption (output attribute)
Amyl	amyl nitrite consumption (output attribute)
Benzos	benzodiazepine consumption (output attribute)
Caff	caffeine consumption (output attribute)
Cannabis	cannabis consumption (output attribute)
Choc	chocolate consumption (output attribute)
Coke	cocaine consumption (output attribute)
Crack	crack consumption (output attribute)
Ecstasy	ecstasy consumption (output attribute)
Heroin	heroin consumption (output attribute)
Ketamine	ketamine consumption (output attribute)
Legalh	legal highs consumption (output attribute)
LSD	LSD consumption (output attribute)
Meth	methadone consumption (output attribute)
Mushrooms	magic mushrooms consumption (output attribute)
Nicotine	nicotine consumption (output attribute)
Semer	fictitious drug Semeron consumption (output attribute) Should be removed from our dataset, because of overclaiming observers!
VSA	volatile substance abuse consumption (output attribute)"""

## Downloading the data

In [None]:
# url to the dataset
url=f"https://archive.ics.uci.edu/ml/machine-learning-databases/00373/drug_consumption.data"

In [None]:
# Download dataset with the column titles
def download(url, titles):
    df=pd.read_csv(url, header=None)
    titles = [row.split("\t") for row in titles.split(f"\n")]
    df.columns=pd.DataFrame(titles)[0]
    display(df.info())
    print(f"""Missing Values: """)
    display(df.isna().sum())
    print(f"""Sample of the dataset: """)
    display(df.head())
    return df

In [None]:
df=download(url, names)

In [None]:
# personality test scores columns
scores= ["Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS"]

In [None]:
# groups labels for categorical variables
age_lb=["18-24", "25-34", "35-44", "45-54", "55-64", "65+"]
gender_lb=["Male", "Female"]
education_lb=["Left before 16", "Left at 16", "Left at 17", "Left at 18","Left at Uni", "Certificate/Diploma", "University", "Masters", "PhD"]
country_lb=["USA", "New Zealand", "Other", "Australia",  "Republic of Ireland", "Canada", "UK"]
ethnicity_lb=["Black", "Asian", "White", "Mixed-White/Black", "Other", "Mixed-White/Asian", "Mixed-Black/Asian"]
user_lb=["Non-user", "Drug user"]

### Removing overclaimers from our dataset!

In [None]:
# We found the persons or observations who exagertae about drugs they use so we deleted those observation and at the end we removed the Semer column!
def semer(df):
    display(df.Semer.value_counts())
    df.drop(df[df.Semer!="CL0"].index, axis=0, inplace=True)
    return df.drop(columns="Semer")

In [None]:
df=semer(df)

In [None]:
df

In [None]:
## transforming the non-numerical columns
def non_num_transform(df):
    output= list(set(df.columns).difference(set(df._get_numeric_data().columns)))
    for o in output:
        le = LabelEncoder()
        df[o] = le.fit_transform(df[o])
    return output

output_attributes=non_num_transform(df)

In [None]:
output_attributes

### Changing the Country variable type to categorical one

In [None]:
# convert intger counrtry into string
country = ['USA' if c < -0.5 else 
           'New Zealand' if c > -0.5 and c < -0.4 else 
           'Other' if c > -0.4 and c < -0.2 else 
           'Australia' if c > -0.2 and c < 0 else 
           'Ireland' if c > 0 and c < 0.23 else 
           'Canada' if c > 0.23 and c < 0.9 else 
           'UK' 
           for c in df['Country']]

In [None]:
df['Country'] = country

### Changing the Gender variable type to categorical one

In [None]:
#convert intger gender to string 
gender = ['Male' if c == -0.48246 else 
           'Female'
            for c in df['Gender']]

In [None]:
df['Gender'] = gender

In [None]:
Gender_count = df['Gender'].value_counts()
sns.set(style="darkgrid")
sns.barplot(Gender_count.index, Gender_count.values*100/len(df.index), alpha=0.9)
plt.title('Frequency Distribution of Gender', fontsize=14)
plt.ylabel('Fraction of Participants (%)', fontsize=12)
plt.xlabel('Gender', fontsize=12)
plt.xticks(rotation=60)
plt.show()

### Changing the Age variable type to categorical one

In [None]:
#convert intger age to string 
age = ['18-24' if c == -0.95197 else 
           '25-34' if c == -0.07854 else 
           '35-44' if c == 0.49788 else 
           '45-54' if c == 1.09449 else 
           '55-64' if c == 1.82213 else 
           '65+'
            for c in df['Age']]

In [None]:
df['Age'] = age

In [None]:
Age_count = df['Age'].value_counts()
sns.set(style="darkgrid")
sns.barplot(Age_count.index, Age_count.values*100/len(df.index), alpha=0.9)
plt.title('Frequency Distribution of Age', fontsize=14)
plt.ylabel('Fraction of Participants (%)', fontsize=12)
plt.xlabel('Age', fontsize=12)
plt.xticks(rotation=60)
plt.show()

### Changing the Ethnicity variable type to categorical one

In [None]:
ethnicity = ["Asian" if c == -0.50212 else 
           "Black" if c == -1.10702 else
           "Mixed-Black/Asian" if c == 1.90725 else
           "Mixed-White/Asian" if c == 0.12600 else
           "Mixed-White/Black" if c == -0.22166 else
           "White" if c == -0.31685 else
           "Others"
            for c in df['Ethnicity']]

In [None]:
df['Ethnicity'] = ethnicity

In [None]:
Ethnicity_count = df['Ethnicity'].value_counts()
sns.set(style="darkgrid")
sns.barplot(Ethnicity_count.index, Ethnicity_count.values*100/len(df.index), alpha=0.9)
plt.title('Frequency Distribution of Ethnicity', fontsize=14)
plt.ylabel('Fraction of Participants (%)', fontsize=12)
plt.xlabel('Ethnicity', fontsize=12)
plt.xticks(rotation=60)
plt.show()

In [None]:
Education_count = df['Education'].value_counts()
sns.set(style="darkgrid")
c = 'Education'
label=eval(c.lower()+"_lb")
g = sns.barplot(Education_count.index, Education_count.values*100/len(df.index), alpha=0.9)
g.set_xticklabels(label)
plt.title('Frequency Distribution of Education', fontsize=14)
plt.ylabel('Fraction of Participants (%)', fontsize=12)
plt.xlabel('Education', fontsize=12)
plt.xticks(rotation=60)
plt.show()

In [None]:
df['Education'].value_counts()

In [None]:
Country_count = df['Country'].value_counts()
sns.set(style="darkgrid")
sns.barplot(Country_count.index, Country_count.values*100/len(df.index), alpha=0.9)
plt.title('Frequency Distribution of Country', fontsize=14)
plt.ylabel('Fraction of Participants (%)', fontsize=12)
plt.xlabel('Country', fontsize=12)
plt.xticks(rotation=60)
plt.show()

### Removing ID as non-important Feature!

In [None]:
# removing the ID column from dataset
df.drop(columns="ID", inplace=True)

### Removing Ethnicty Feature because of being high biased data!

In [None]:
df.drop(df[df.Ethnicity!='White'].index, axis=0, inplace=True)

In [None]:
# removing the Ethnicty column from dataset
df.drop(columns="Ethnicity", inplace=True)

### Finding the over claimers and remove these observations from our statset!

## Hypothesis:

* Higher Nscore more probabilty of using hard drug.
* Higher Oscore more probabilty of using hard drug.
* lower Ascore more probabilty of using hard drug.
* lower Cscore more probabilty of using hard drug.
* Higher Impulsive more probabilty of using hard drug.
* Higher SS more probabilty of using hard drug.
* Men do more drugs than women.
* Young people use more drugs than older people.
* Low educated people use more drugs than high educated people.

## Defining Hard Drugs:

In [None]:
# define what are hard drugs
hard_drugs=["Amphet", "Benzos", "Coke", "Crack", "Ecstasy", "Heroin", "Legalh", "Meth"]

In [None]:
# encode a new column to identify whether an individual is a hard drug user or not (with frequency>1)
def hard_drug_user(df, hard_drugs):
    df["hard"]=df[df[hard_drugs]>2].any(axis=1)
    print(f"The distribution of hard drug user:")
    display(df.hard.value_counts(normalize=True).round(2))
    print(f"Descriptive summary of the hard drug user vs non user on personality test scores:")
    display(df.groupby("hard").agg(["mean", "std", "median", "min", "max"]).round(2).stack()[["Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS"]])
    return df

In [None]:
df=hard_drug_user(df, hard_drugs)

In [None]:
hard_count = df['hard'].value_counts()
sns.set(style="darkgrid")
sns.barplot(hard_count.index, hard_count.values*100/len(df.index), alpha=0.9)
plt.title('Frequency Distribution of Hard Drug Users', fontsize=14)
plt.ylabel('Fraction of Participants (%)', fontsize=12)
plt.xlabel('Being Hard Drug User', fontsize=12)
plt.xticks(rotation=60)
plt.show()

## Value	Description
0 - Never Used

1 - Used over a Decade Ago

2 - Used in Last Decade

3 - Used in Last Year

4 - Used in Last Month

5 - Used in Last Week

6 - Used in Last Day

In [None]:
def plot_demo(df, cat):
    plt.style.use('seaborn-whitegrid')
    graph_name="graph/hard drug user by {}.png"
    sns.set(style="white", font_scale = 1.5)
    for c in cat:
        f, ax = plt.subplots(figsize=(15, 7))
        label=eval(c.lower()+"_lb")
        p=round(pd.pivot_table(df[df.hard==True], values="hard", columns="Gender", index=c, aggfunc="count")/pd.pivot_table(df, values="hard", columns="Gender", index=c, aggfunc="count")*100, 2).reset_index().melt(id_vars=[c])
        g=sns.barplot(x=c, y="value", data=p, hue='Gender', palette=["b","r"])
        g.set_xticklabels(label)
        g.set_ylabel("Hard Drug User Percentage")
        g.set_xlabel(f"{c} Group")
        g.set_title(f"Distribution of Hard Drug User by {c} and Gender")
        for t, l in zip(g.legend().texts, gender_lb): t.set_text(l)
        g.set_xticklabels(g.get_xticklabels(), rotation=15)
        #plt.savefig(graph_name.format(c) , transparent=True)
        plt.show()
    return None

In [None]:
# define categories
cat=["Age", "Education", "Country"]

In [None]:
plot_demo(df,cat)

In [None]:
cat=["Age", "Education", "Country"]

In [None]:
def plot_demo(df, cat):
    plt.style.use('seaborn-whitegrid')
    graph_name="graph/hard drug user by {}.png"
    sns.set(style="white", font_scale = 1.5)
    for c in cat:
        f, ax = plt.subplots(figsize=(15, 7))
        label=eval(c.lower()+"_lb")
        p=round(pd.pivot_table(df, values="Nscore", columns="hard", index=c, aggfunc="mean"), 5).reset_index().melt(id_vars=[c])
        g=sns.barplot(x=c, y="value", data=p, hue='hard', palette=["b","r"])
        g.set_xticklabels(label)
        g.set_ylabel("Mean of Nscore")
        g.set_xlabel(f"{c} Group")
        g.set_title(f"Variation of mean Nscore by {c} Group and User")
        for t, l in zip(g.legend().texts, user_lb): t.set_text(l)
        g.set_xticklabels(g.get_xticklabels(), rotation=15)
        #plt.savefig(graph_name.format(c) , transparent=True)
        plt.show()
    return None

In [None]:
plot_demo(df,cat)

In [None]:
cat=["Age", "Education", "Country"]

In [None]:
def plot_demo(df, cat):
    plt.style.use('seaborn-whitegrid')
    graph_name="graph/hard drug user by {}.png"
    sns.set(style="white", font_scale = 1.5)
    for c in cat:
        f, ax = plt.subplots(figsize=(15, 7))
        label=eval(c.lower()+"_lb")
        p=round(pd.pivot_table(df, values="Oscore", columns="hard", index=c, aggfunc="mean"), 5).reset_index().melt(id_vars=[c])
        g=sns.barplot(x=c, y="value", data=p, hue='hard', palette=["b","r"])
        g.set_xticklabels(label)
        g.set_ylabel("Mean of Oscore")
        g.set_xlabel(f"{c} Group")
        g.set_title(f"Variation of mean Oscore by {c} Group and User")
        for t, l in zip(g.legend().texts, user_lb): t.set_text(l)
        g.set_xticklabels(g.get_xticklabels(), rotation=15)
        #plt.savefig(graph_name.format(c) , transparent=True)
        plt.show()
    return None

In [None]:
plot_demo(df,cat)

In [None]:
def plot_demo(df, cat):
    plt.style.use('seaborn-whitegrid')
    graph_name="graph/hard drug user by {}.png"
    sns.set(style="white", font_scale = 1.5)
    for c in cat:
        f, ax = plt.subplots(figsize=(15, 7))
        label=eval(c.lower()+"_lb")
        p=round(pd.pivot_table(df, values="Cscore", columns="hard", index=c, aggfunc="mean"), 5).reset_index().melt(id_vars=[c])
        g=sns.barplot(x=c, y="value", data=p, hue='hard', palette=["b","r"])
        g.set_xticklabels(label)
        g.set_ylabel("Mean of Cscore")
        g.set_xlabel(f"{c} Group")
        g.set_title(f"Variation of mean Cscore by {c} Group and User")
        for t, l in zip(g.legend().texts, user_lb): t.set_text(l)
        g.set_xticklabels(g.get_xticklabels(), rotation=15)
        #plt.savefig(graph_name.format(c) , transparent=True)
        plt.show()
    return None

In [None]:
plot_demo(df,cat)

In [None]:
def plot_demo(df, cat):
    plt.style.use('seaborn-whitegrid')
    graph_name="graph/hard drug user by {}.png"
    sns.set(style="white", font_scale = 1.5)
    for c in cat:
        f, ax = plt.subplots(figsize=(15, 7))
        label=eval(c.lower()+"_lb")
        p=round(pd.pivot_table(df, values="Ascore", columns="hard", index=c, aggfunc="mean"), 5).reset_index().melt(id_vars=[c])
        g=sns.barplot(x=c, y="value", data=p, hue='hard', palette=["b","r"])
        g.set_xticklabels(label)
        g.set_ylabel("Mean of Ascore")
        g.set_xlabel(f"{c} Group")
        g.set_title(f"Variation of mean Ascore by {c} Group and User")
        for t, l in zip(g.legend().texts, user_lb): t.set_text(l)
        g.set_xticklabels(g.get_xticklabels(), rotation=15)
        #plt.savefig(graph_name.format(c) , transparent=True)
        plt.show()
    return None

In [None]:
plot_demo(df,cat)

In [None]:
iv = ['Age', 'Gender', 'Education', 'Country', 'Nscore', 'Escore', 'Oscore', 'Ascore', 'Cscore', 'Impulsive', 'SS']
dv = ['hard']
X_train, X_test,y_train,y_test = train_test_split(df[iv], df[dv], test_size= 0.3, random_state=17, stratify=df[dv])
y_train=y_train.values.astype(bool).ravel()
y_test=y_test.values.astype(bool).ravel()

In [None]:
# list of models toa construct
model_list=["LogisticRegression", "SVC", "NuSVC", "GaussianNB", "DecisionTreeClassifier", "RandomForestClassifier", "AdaBoostClassifier", "KNeighborsClassifier", "CatBoostClassifier"]
#"XGBClassifier"

dct={"LogisticRegression": {"LogisticRegression": "LogisticRegression(max_iter=1e8)", "LogisticRegressionBalanced": "LogisticRegression(max_iter=1e8, class_weight='balanced')"}, "SVC": "SVC(probability= True)", "NuSVC": "NuSVC(nu=0.1, probability= True)","RandomForestClassifier" : {"RandomForestClassifier":"RandomForestClassifier()", "RandomForestClassifierBalanced":"RandomForestClassifier(class_weight='balanced')"}, "KNeighborsClassifier": {"KNeighborsClassifier":"KNeighborsClassifier(n_neighbors={})","KNeighborsClassifierWeightedDistance": "KNeighborsClassifier(weights='distance')"}}

In [None]:
def fit_model(model, m, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred=model.predict(X_test)
    if m=="CatBoostClassifier":
        y_pred=[eval(i) for i in y_pred]
    conf=confusion_matrix(y_test, y_pred)
    acc=accuracy_score(y_test,y_pred)
    rec=recall_score(y_test,y_pred)
    pr=precision_score(y_test,y_pred)
    f1=f1_score(y_test,y_pred)
    #contruct confusion matrix table
    
    np.set_printoptions(precision=2)

    # Plot normalized confusion matrix
    c=pd.DataFrame(conf, columns=["Predicted Non-User", "Predicted User"], index=["Actual Non-User", "Actual User"])
    c.index.name=m
    
    if m!="CatBoostClassifier":
        cm = plot_confusion_matrix(model, X_test, y_test, display_labels=["Non-User", "User"], cmap=plt.cm.Blues, normalize="true")
        cm.ax_.set_title(f"{m} Normalized confusion matrix")
        plt.show()    
    else:
        display(c)
    
    model_roc = roc_auc_score(y_test,  y_pred)
    fpr,tpr,thresholds=roc_curve(y_test, model.predict_proba(X_test)[:,1])
    return [m ,acc.round(2),pr.round(2),rec.round(2),f1.round(2), model_roc.round(2)], [fpr,tpr], c

In [None]:
def knn_elbow(knn, X_train, X_test, y_train, y_test):
    error_rate = []
    from math import log10, floor
    def round_1sf(x):
        return round(x, -int(floor(log10(abs(x)))))
    upper = int(round_1sf(X_train.shape[0]**0.5))
    for i in range(1,upper):
        model=eval(knn.format(i))
        model.fit(X_train, y_train)
        y_pred=model.predict(X_test)
        error_rate.append(np.mean(y_pred != y_test))
    print("Elbow curve for k")
    plt.figure(figsize=(10,6))
    plt.plot(range(1,upper),error_rate,color='blue', linestyle='dashed', 
             marker='o',markerfacecolor='red', markersize=10)
    plt.title('Error Rate vs. K Value')
    plt.xlabel('K')
    plt.ylabel('Error Rate')
    plt.show()
    print("Minimum error:-",min(error_rate),"at K =",error_rate.index(min(error_rate)))
    return int(error_rate.index(min(error_rate)))

In [None]:
def classification_models(models, X_train, X_test, y_train, y_test):
    performance=[]
    ROC=[]
    confusion=[]
    for m in models:
        if m in dct.keys() and type(dct[m])==str:
            model=eval(dct[m])
            p, r, c =fit_model(model, m, X_train, X_test, y_train, y_test)
            performance.append(p)
            ROC.append(r)
            confusion.append(c)
            
        elif m in dct.keys() and type(dct[m])==dict:
            for x in list(dct[m].keys()):
                if x == "KNeighborsClassifier":
                    i=knn_elbow(dct[m][x], X_train, X_test, y_train, y_test)
                    model=eval(dct[m][x].format(i))
                else:
                    model=eval(dct[m][x])
                p, r, c = fit_model(model, x, X_train, X_test, y_train, y_test)
                performance.append(p)
                ROC.append(r)
                confusion.append(c)

        else:
            model=eval(m)()
            p, r, c= fit_model(model, m, X_train, X_test, y_train, y_test)
            performance.append(p)
            ROC.append(r)
            confusion.append(c)
        
    perf=pd.DataFrame(performance, columns=["Model", "Accuracy", "Recall", "Precision", "F1", "ROC"]).set_index("Model")
    display(perf)
    return perf, ROC, confusion

In [None]:
performance, ROC, confusion = classification_models(model_list, X_train, X_test, y_train, y_test)

In [None]:
#hypert

In [None]:
# list of models for GridSearch
grid_model_list=["LogisticRegression", "SVC", "RandomForestClassifier", "KNeighborsClassifier"]

# parameters for GridSearch
from math import log10, floor
def round_1sf(x):
    return round(x, -int(floor(log10(abs(x)))))
upper = int(round_1sf(X_train.shape[0]**0.5))
grid_dct={"LogisticRegression": dict(solver=['newton-cg', 'lbfgs', 'liblinear'],penalty=['l2'],C=[100, 10, 1.0, 0.1, 0.01, 0.001]), "SVC": dict(kernel=['poly', 'rbf', 'sigmoid'],C=[50, 10, 1.0, 0.1, 0.01],gamma=['scale']),"RandomForestClassifier" : dict(n_estimators=[10, 100, 1000],max_features=['sqrt', 'log2']), "KNeighborsClassifier": dict(n_neighbors=range(1, upper, 2),weights=['uniform', 'distance'],metric=['euclidean', 'manhattan', 'minkowski'])}

In [None]:
def grid_models(models, parameters, X_train, X_test, y_train, y_test):
    performance=[]
    ROC=[]
    confusion=[]
    for m in models:
        if m=="SVC":
            model=SVC(probability= True)
        elif m=="LogisticRegression":
            model=LogisticRegression(max_iter=1e8)
        else:
            model=eval(m)()
        
        cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
        gs = GridSearchCV(estimator=model, param_grid=parameters[m], n_jobs=-1, cv=cv, scoring='recall',error_score=0, refit=True)
        grid_result = gs.fit(X_train, y_train)
        print("Best for %s: %f using %s" % (m, grid_result.best_score_, grid_result.best_params_))
        y_pred = gs.predict(X_test)
        
        conf=confusion_matrix(y_test, y_pred)
        acc=accuracy_score(y_test,y_pred)
        rec=recall_score(y_test,y_pred)
        pr=precision_score(y_test,y_pred)
        f1=f1_score(y_test,y_pred)
        np.set_printoptions(precision=2)

        # Plot normalized confusion matrix
        c=pd.DataFrame(conf, columns=["Predicted Non-User", "Predicted User"], index=["Actual Non-User", "Actual User"])
        c.index.name=m
        cm = plot_confusion_matrix(gs, X_test, y_test, display_labels=["Non-User", "User"], cmap=plt.cm.Blues, normalize="true")
        cm.ax_.set_title(f"{m} Normalized confusion matrix")
        plt.show()
        
        model_roc = roc_auc_score(y_test,  y_pred)
        fpr,tpr,thresholds=roc_curve(y_test, gs.predict_proba(X_test)[:,1])
        
        plt.style.use('seaborn-whitegrid')
        sns.set(style="white", font_scale = 1)
        graph_name="graph/ROC Grid {}.png"
    
        f, ax = plt.subplots(figsize=(10, 10))
        plt.clf()
        plt.plot(fpr,tpr, label=f'ROC curve={model_roc.round(2)}')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'{m} ROC Grid')
        plt.legend(loc="lower right")
        #plt.savefig(graph_name.format(names[i]) , transparent=True)
        plt.show()            
               
        performance.append([m ,acc.round(2),pr.round(2),rec.round(2),f1.round(2), model_roc.round(2)])
        ROC.append([fpr,tpr])
        confusion.append(c)
        
    perf=pd.DataFrame(performance, columns=["Model", "Accuracy", "Recall", "Precision", "F1", "ROC"]).set_index("Model")
    display(perf)
    return perf, ROC, confusion

In [None]:
grid_performance, grid_ROC, grid_confusion = grid_models(grid_model_list, grid_dct, X_train, X_test, y_train, y_test)

In [None]:
model = AdaBoostClassifier().fit(X_train, y_train)
importances = model.feature_importances_

In [None]:
import visuals_script as vs
vs.feature_plot(importances, X_train, y_train)