In [15]:
import tensorflow as tf
from tf.keras.models import Sequential
from tf.keras.layers import Dense
from tf.keras.wrappers.scikit_learn import KerasClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

ModuleNotFoundError: No module named 'tf'

In [12]:
def loadData(file):
    return pd.read_csv(f"/data/projects/car-insurance/data/{file}")

def dropColumns(df):
    df.drop(inplace=True, columns=["default_or_not", "last_contact_month", "no_of_contacts", "days_passed", "last_contact_day", "communication", "car_loan", "balance_amt", "education_level", "marital_status", "job_type", "prev_attempts"])

def getSeconds(time):
    return 3600*int(time[0])+60*int(time[1])+int(time[2])
    
def duration(start, end):
    slst = start.split(':')
    elst = end.split(':')
    ss = getSeconds(slst)
    ee = getSeconds(elst)
    dur = ee-ss
    if dur<0:
        return dur+3600*24
    return dur

# Inserts the call duration into the dataframe inplace
def insertCallDur(df):
    call_duration = [duration(row['call_start'], row['call_end']) for _, row in df.iterrows()]
    df.drop(inplace=True, columns=['call_start', 'call_end'])
    df.insert(3,'duration', call_duration)
    
def dataProcess(df):
    df.loc[(df.Outcome=="success"), "Outcome"] = 1
    df.loc[(df.Outcome=="other") | (df.Outcome.isna()), "Outcome"] = 0
    df.loc[(df.Outcome=="failure"), "Outcome"] = -1

def createDataFrame(file):
    df = loadData(file)
    # Part of the data cleanup, removing columns
    dropColumns(df)
    # Part of preprocessing the data, finding the duration
    insertCallDur(df)
    # More data processing, this time setting outcome from string to numerical values.
    dataProcess(df)
    return df
def savePredictionsv1(preds, name):
    with open(f"/data/projects/car-insurance/data/{name}", "w") as csv_file:
        fieldnames = ["prediction"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for i, pred in enumerate(target):
            writer.writerow({"prediction": str(pred)})
            
def savePredictionsv2(preds, name, outcomes):
    with open(f"/data/projects/car-insurance/data/{name}", "w") as csv_file:
        fieldnames = ["prediction"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for i, pred in enumerate(target):
            if outcomes[i]==1:
                writer.writerow({"prediction": str(1)})
            else:
                writer.writerow({"prediction": str(pred)})
def savePredictionsv3(preds, name, outcomes, duration):
    with open(f"/data/projects/car-insurance/data/{name}", "w") as csv_file:
        fieldnames = ["prediction"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for i, pred in enumerate(target):
            if outcomes[i]==1 or duration[i]>1000:
                writer.writerow({"prediction": str(1)})
            else:
                writer.writerow({"prediction": str(pred)})
                
def compAlgorithms(X_train, y_train):
    models = []
    models.append(("LR", LogisticRegression(solver="liblinear")))
    models.append(("LDA", LinearDiscriminantAnalysis()))
    models.append(("KNN", KNeighborsClassifier()))
    models.append(("CART", DecisionTreeClassifier()))
    models.append(("NB", GaussianNB()))
    models.append(("SVM", SVC(gamma="auto")))
    models.append(("SVMscale", SVC(gamma="scale")))
    # Evaluate each model in turn
    results = []
    names = []
    for name, model in models:
        kfold = StratifiedKFold(n_splits=5, random_state=1, shuffle=True)
        cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring="accuracy")
        results.append(cv_results)
        names.append(name)
        print(f"{name}: {cv_results.mean()} ({cv_results.std()})")
    # compare algorithms
    plt.boxplot(results, labels=names)
    plt.title("Algorithm Comparison")
    plt.show()

In [19]:
df_test = createDataFrame('Test_data.csv')
X_test = np.array(df_test)
df_train = createDataFrame('Train_data.csv')
X_train, y_train = np.array(df_train.drop(columns = 'car_insurance')).astype('float32'), np.array(df_train.car_insurance)
def createModel():
    model = Sequential()
    model.add(Dense(4, input_dim=4, activation='relu'))
    # using sigmoid because I want an output of 0 or 1, binary classification
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# def compNN():
    
estimator = KerasClassifier(build_fn=createModel, epochs=5, batch_size=10, verbose=1)
kfold = StratifiedKFold(n_splits=5, shuffle=True)
results = cross_val_score(estimator, X_train, y_train, cv=kfold
                          

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
print(results)

[0.82608694 0.74396133 0.81451613 0.77903223 0.74032259]
