In [72]:
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd

# helper for logistic regression explanations
from scipy.special import expit
from scipy.special import logit
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings('ignore')

In [73]:
df = pd.read_csv("Binary_FINAL_DF.csv")
df = df.iloc[:,2:]  ## ignoring double index
df = df.drop(['Day', 'Year'], axis=1)


### Splitting the Data

In [74]:

def split_df_to_x_y(df, col_to_pred):
    X = df.drop(['Common Dolphin', 'Bottlenose Dolphin', 'Pacific White sided Dolphin',
                 'Rissos Dolphin', 'Whales', 'Gray Whales','Other' ], axis=1)  ## Dropping all the animals from table
    y = df[col_to_pred]
    return X,y


In [75]:
## Getting X and y for every animal

X_Common, y_Common = split_df_to_x_y(df,'Common Dolphin')
X_Bottlenose, y_Bottlenose = split_df_to_x_y(df,'Bottlenose Dolphin')
X_Pacific, y_Pacific = split_df_to_x_y(df,'Pacific White sided Dolphin')
X_Rissos, y_Rissos = split_df_to_x_y(df,'Rissos Dolphin')
X_Whales, y_Whales = split_df_to_x_y(df,'Whales')
X_Gray_Whale, y_Gray_Whale = split_df_to_x_y(df,'Whales')
X_Other, y_Other = split_df_to_x_y(df,'Other')


In [76]:
## Splitting to train and test for each animal

X_train_Common, X_test_Common, y_train_Common, y_test_Common = train_test_split(X_Common, y_Common, test_size=0.2, random_state=42)
X_train_Pacific, X_test_Pacific, y_train_Pacific, y_test_Pacific = train_test_split(X_Pacific, y_Pacific, test_size=0.2, random_state=42)
X_train_Whales, X_test_Whales, y_train_Whales, y_test_Whales = train_test_split(X_Whales, y_Whales, test_size=0.2, random_state=42)
X_train_Bottlenose, X_test_Bottlenose, y_train_Bottlenose, y_test_Bottlenose = train_test_split(X_Bottlenose, y_Bottlenose, test_size=0.2, random_state=42)
X_train_Rissos, X_test_Rissos, y_train_Rissos, y_test_Rissos = train_test_split(X_Rissos, y_Rissos, test_size=0.2, random_state=42)
X_train_Gray_Whale, X_test_Gray_Whale, y_train_Gray_Whale, y_test_Gray_Whale = train_test_split(X_Gray_Whale, y_Gray_Whale, test_size=0.2, random_state=42)
X_train_Other, X_test_Other, y_train_Other, y_test_Other = train_test_split(X_Other, y_Other, test_size=0.2, random_state=42)


### Logistic Reggresion Model

In [77]:
def Logistic_Reg_model(X_train, y_train, Col_to_pred):
    trained_model = LogisticRegression().fit(X_train, y_train)
    print('\n\nLogistic Regression Model for ' +  Col_to_pred + ':\n')
    return trained_model

### The prediction for each animal (seen = 1/ not seen = 0)

In [78]:
def predict_Animal(trained_model, X_test):
    predicted_vals = trained_model.predict(X_test)
    return predicted_vals

### Evaluations for each animal 

In [79]:
#                     | Positive | Negative
#   __________________|__________|_____________
#      Positive       |          |
#   (Animal was seen) |    TP    |    FP
#  ___________________|__________|_____________
#    Negativ (Absent) |    FN    |    TN
#                     |          |
#

def evaluate_performance(y_test, y_predicted):
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_predicted))
    print("accuracy is:", metrics.accuracy_score(y_test, y_predicted))  ## Accurate correct (All table)
    print("precision is:", metrics.precision_score(y_test, y_predicted))  ##  Accurate correct, first row (regarding to Falsly correct prediction)          
    print("recall is:", metrics.recall_score(y_test, y_predicted))  ## Accurate correct, first col (regarding to Falsly False prediction)
    print("f1 is:", metrics.f1_score(y_test, y_predicted))  ## Weighting of precision and recall
    pred_df=pd.DataFrame({'y_test':y_test, 'y_predicted':y_predicted})
    print(pred_df.head(10))

In [80]:
trained_model_Common = Logistic_Reg_model(X_train_Common, y_train_Common, 'Common Dolphin')
y_pred_Common = predict_Animal(trained_model_Common, X_test_Common)
evaluate_performance(y_test_Common, y_pred_Common)



Logistic Regression Model for Common Dolphin:

Confusion Matrix:
 [[  13  206]
 [   9 1075]]
accuracy is: 0.8349961627014582
precision is: 0.8391881342701015
recall is: 0.9916974169741697
f1 is: 0.9090909090909092
      y_test  y_predicted
1263       0            1
3951       1            1
6200       1            1
6081       1            1
96         1            1
1477       1            1
3159       1            1
1400       0            1
3767       1            1
3017       0            1


In [81]:
trained_model_Gray_Whale = Logistic_Reg_model(X_train_Gray_Whale, y_train_Gray_Whale, 'Gray_Whale')
y_pred_Gray_Whale = predict_Animal(trained_model_Gray_Whale, X_test_Gray_Whale)
evaluate_performance(y_test_Gray_Whale, y_pred_Gray_Whale)



Logistic Regression Model for Gray_Whale:

Confusion Matrix:
 [[305 290]
 [159 549]]
accuracy is: 0.6554105909439755
precision is: 0.6543504171632897
recall is: 0.7754237288135594
f1 is: 0.7097608274078864
      y_test  y_predicted
1263       1            1
3951       1            0
6200       0            0
6081       0            1
96         0            1
1477       1            1
3159       1            1
1400       0            0
3767       1            1
3017       1            1


In [82]:
trained_model_Bottlenose = Logistic_Reg_model(X_train_Bottlenose, y_train_Bottlenose, 'Bottlenose Dolphin')
y_pred_Bottlenose = predict_Animal(trained_model_Bottlenose, X_test_Bottlenose)
evaluate_performance(y_test_Bottlenose, y_pred_Bottlenose)



Logistic Regression Model for Bottlenose Dolphin:

Confusion Matrix:
 [[825  30]
 [395  53]]
accuracy is: 0.6738296239447429
precision is: 0.6385542168674698
recall is: 0.11830357142857142
f1 is: 0.19962335216572508
      y_test  y_predicted
1263       1            0
3951       1            0
6200       1            0
6081       1            0
96         0            0
1477       0            0
3159       1            0
1400       1            0
3767       0            0
3017       1            0


In [83]:
trained_model_Whales = Logistic_Reg_model(X_train_Whales, y_train_Whales, 'Whales')
y_pred_Whales = predict_Animal(trained_model_Whales, X_test_Whales)
evaluate_performance(y_test_Whales, y_pred_Whales)



Logistic Regression Model for Whales:

Confusion Matrix:
 [[305 290]
 [159 549]]
accuracy is: 0.6554105909439755
precision is: 0.6543504171632897
recall is: 0.7754237288135594
f1 is: 0.7097608274078864
      y_test  y_predicted
1263       1            1
3951       1            0
6200       0            0
6081       0            1
96         0            1
1477       1            1
3159       1            1
1400       0            0
3767       1            1
3017       1            1


In [84]:
trained_model_Pacific = Logistic_Reg_model(X_train_Pacific, y_train_Pacific, 'Pacific White-sided Dolphin')
y_pred_Pacific = predict_Animal(trained_model_Pacific, X_test_Pacific)
evaluate_performance(y_test_Pacific, y_pred_Pacific)



Logistic Regression Model for Pacific White-sided Dolphin:

Confusion Matrix:
 [[1079   24]
 [ 189   11]]
accuracy is: 0.8365310821181888
precision is: 0.3142857142857143
recall is: 0.055
f1 is: 0.09361702127659575
      y_test  y_predicted
1263       0            0
3951       0            0
6200       1            0
6081       0            0
96         0            0
1477       0            0
3159       0            0
1400       0            0
3767       1            0
3017       0            0


In [85]:
trained_model_Rissos = Logistic_Reg_model(X_train_Rissos, y_train_Rissos, 'Rissos Dolphin')
y_pred_Rissos = predict_Animal(trained_model_Rissos, X_test_Rissos)
evaluate_performance(y_test_Rissos, y_pred_Rissos)



Logistic Regression Model for Rissos Dolphin:

Confusion Matrix:
 [[1134    0]
 [ 169    0]]
accuracy is: 0.8702993092862624
precision is: 0.0
recall is: 0.0
f1 is: 0.0
      y_test  y_predicted
1263       0            0
3951       0            0
6200       0            0
6081       0            0
96         1            0
1477       1            0
3159       0            0
1400       0            0
3767       0            0
3017       0            0


In [86]:
trained_model_Other = Logistic_Reg_model(X_train_Other, y_train_Other, 'Other')
y_pred_Other = predict_Animal(trained_model_Other, X_test_Other)
evaluate_performance(y_test_Other, y_pred_Other)



Logistic Regression Model for Other:

Confusion Matrix:
 [[1049    4]
 [ 245    5]]
accuracy is: 0.8089025326170376
precision is: 0.5555555555555556
recall is: 0.02
f1 is: 0.03861003861003861
      y_test  y_predicted
1263       0            0
3951       0            0
6200       0            0
6081       0            0
96         0            0
1477       1            0
3159       1            0
1400       0            0
3767       0            0
3017       0            0
