# Sklearn Binary

In [74]:
import pandas as pd
binaryDf = pd.read_csv('binary_dataset.csv')

In [46]:
X = binaryDf[['WHITE',
            'BLACK_OR_AFRICAN_AMERICAN',
            'AMERICAN_INDIAN_AND_ALASKA_NATIVE',
            'ASIAN',
            'NATIVE_HAWAIIAN_AND_OTHER_PACIFIC_ISLANDER',
            'HISPANIC_OR_LATINO'
            ]]
y = binaryDf['GrowthRate']

In [59]:
from sklearn.model_selection import train_test_split
import time

X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.25, random_state=0)
models = {}
accuracies = {}
trainingTimes = {}
predictionTimes = {}

In [60]:
def processModel(model, modelKey):
    # Train Model
    print('----- ----- -----')
    print('Training model', modelKey)
    trainingStartTime = time.time()
    trainedModel = model.fit(X_train, y_train)
    trainingTime = time.time() - trainingStartTime
    print('Finished training in ' + str(trainingTime) + 's')

    # Predict
    print('Predicting on model', modelKey)
    predictionStartTime = time.time()
    modelPredictions = trainedModel.predict(X_test)
    predictionTime = time.time() - predictionStartTime
    print('Finished predictions in ' + str(predictionTime) + 's')

    # Get Accuracy
    print('Getting accuracies on model', modelKey)
    accuracy = trainedModel.score(X_test, y_test)

    # Finished
    print()
    print('Finished processing model', modelKey)
    print('Accuracy: ' + str(accuracy))
    print('Training time: ' + str(trainingTime) + 's')
    print('Prediction time: ' + str(predictionTime) + 's')
    print('----- ----- -----')

    return (accuracy, trainingTime, predictionTime)

In [61]:
# Set Model Properties
model = 'SVM Linear Kernel'
from sklearn.svm import SVC
models[model] = SVC(kernel = 'linear', C = 1)
accuracies[model], trainingTimes[model], predictionTimes[model] = processModel(models[model], model)

----- ----- -----
Training model SVM Linear Kernel
Finished training in 233.9371931552887s
Predicting on model SVM Linear Kernel
Finished predictions in 49.902108907699585s
Getting accuracies on model SVM Linear Kernel

Finished processing model SVM Linear Kernel
Accuracy: 0.8622945052133307
Training time: 233.9371931552887s
Prediction time: 49.902108907699585s
----- ----- -----


In [64]:
# Set Model Properties
model = 'Decision Tree Classifier'
from sklearn.tree import DecisionTreeClassifier
models[model] = DecisionTreeClassifier(max_depth = 2)
accuracies[model], trainingTimes[model], predictionTimes[model] = processModel(models[model], model)

----- ----- -----
Training model Decision Tree Classifier
Finished training in 0.051001787185668945s
Predicting on model Decision Tree Classifier
Finished predictions in 0.006173372268676758s
Getting accuracies on model Decision Tree Classifier

Finished processing model Decision Tree Classifier
Accuracy: 0.8622945052133307
Training time: 0.051001787185668945s
Prediction time: 0.006173372268676758s
----- ----- -----


In [66]:
# Set Model Properties
model = 'Logistic Regression'
from sklearn.linear_model import LogisticRegression
models[model] = LogisticRegression()
accuracies[model], trainingTimes[model], predictionTimes[model] = processModel(models[model], model)

----- ----- -----
Training model Logistic Regression
Finished training in 0.22043704986572266s
Predicting on model Logistic Regression
Finished predictions in 0.0049359798431396484s
Getting accuracies on model Logistic Regression

Finished processing model Logistic Regression
Accuracy: 0.8622945052133307
Training time: 0.22043704986572266s
Prediction time: 0.0049359798431396484s
----- ----- -----


In [67]:
resultsDf = pd.DataFrame(index=models.keys(), columns=['Accuracy', 'TrainingTime', 'PredictionTime'])
resultsDf['Accuracy'] = accuracies.values()
resultsDf['TrainingTime'] = trainingTimes.values()
resultsDf['PredictionTime'] = predictionTimes.values()

resultsDf

Unnamed: 0,Accuracy,TrainingTime,PredictionTime
SVM Linear Kernel,0.862295,233.937193,49.902109
Decision Tree Classifier,0.862295,0.051002,0.006173
Logistic Regression,0.862295,0.220437,0.004936
