# Naughty or Nice - Santa's Logistic Regression




In [0]:
import bamboolib as bam
bam.enable()

## Import Libraries

In [0]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

%matplotlib inline

## Prepare Data
### Read in Dataset and Set Index Col as first column

In [0]:
santaslist = pd.read_csv("data/MOCK_DATA.csv", index_col=0)

### Clean Data - Map Gender & Count of Nasty Incidents

In [0]:
santaslist.gender = santaslist.gender.map({'Male':1, 'Female':0})

In [0]:
#Could just directly make the dictionary but using Zip to demonstrate Zip
nasties = ['Never', 'Once', 'Seldom', 'Yearly', 'Monthly', 'Often', 'Weekly', 'Daily']
scores = [0,0,0,1,2,2,3,3]

nasty_map = dict(zip(nasties,scores))

santaslist.Count_of_nasty_incidents = santaslist.Count_of_nasty_incidents.map(nasty_map)

In [0]:
feature_cols = list(santaslist.columns)
feature_cols.remove('first_name')
feature_cols.remove('Nice')
feature_cols.remove('last_name')

In [0]:
#Build Logistic Regression Model
logreg = LogisticRegression(solver='lbfgs', max_iter=1000)

#Define Features & target
X = santaslist[feature_cols]
y = santaslist.Nice

#Split for train vs test
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=142)

#Fit the Model
logreg.fit(X_train,y_train)


y_pred = logreg.predict(X_test)

score = logreg.score(X_test, y_test)

logit_pred_proba = logreg.predict_proba(X_test)[:,1]

confusion = metrics.confusion_matrix(y_true=y_test, y_pred=logit_pred_proba > .5)
print(feature_cols)
print(score)
print(confusion)


In [0]:
santaslist['Probabilty'] = logreg.predict_proba(santaslist[feature_cols])[:,1]

In [0]:
santaslist