# Imports

In [1]:
import numpy as np
import pandas as pd

from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score

import sacred
from sacred import Experiment
from sacred.observers import FileStorageObserver

  import pkg_resources


In [2]:
ex = Experiment("my_experiment", interactive=True)
ex.observers.append(FileStorageObserver("../sacred_experiments"))

In [3]:
# ex.run(config_updates={'C': 0.1, 'n_samples': 500, 'k': 5})

In [4]:
# path_str = '../data/sonar-data.csv'
# dataset_path = Path(path_str)

In [5]:
@ex.config
def config():
    penalty='l2'
    C=1.0
    class_weight=None
    n_jobs=None
    data_path_str = str('../data/sonar-data.csv')
    test_size = float(0.2)
    random_state = 42

In [6]:
@ex.capture
def load_model(penalty, C, class_weight, n_jobs):
    return LogisticRegression(
        penalty=penalty,
        C=C,
        class_weight=class_weight,
        n_jobs=n_jobs,
                              )

In [7]:
%tb

@ex.main
def my_main(data_path_str, test_size, random_state):
    
    # Loading data into DataFrame
    dataset_path = Path(data_path_str)
    sonar_data = pd.read_csv(dataset_path, header=None)

    # Printing DataFrame dimensions
    print("Sonar DataFrame dimmensions: ", sonar_data.shape)

    # Checking Label Distribution
    print("Value counts on Label:", sonar_data[60].value_counts(dropna=False))
    
    # Checking Label Distribution in %
    print("Value Distribution on Label(%):", sonar_data[60].value_counts(dropna=False, normalize=True)*100)
    
    # Dividing Dataset into X and y
    X = sonar_data.drop(columns=60, axis=1)
    y = sonar_data[60]

    print(y.shape)

    # Train/Test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)
    print("hey")
    # Declaring the Logistic Reg
    model = load_model()

    # Fitting the model
    model.fit(X_train, y_train)
    print("trained")
    # Get Train performance
    y_train_pred = model.predict(X_train)
    print(y_train_pred.shape)
    print(y_test.shape)
    print(y_train_pred.shape)
    train_accuracy = accuracy_score(y_true=y_train, y_pred=y_train_pred)
    print(train_accuracy)
    train_recall = recall_score(y_true=y_train, y_pred=y_train_pred, labels=['M', 'R'], pos_label='M')
    print(train_recall)
    train_precision = precision_score(y_true=y_train, y_pred=y_train_pred, labels=['M', 'R'], pos_label='M')
    print(train_precision)
    train_f1 = f1_score(y_true=y_train, y_pred=y_train_pred, labels=['M', 'R'], pos_label='M')
    print(train_f1)

    ex.log_scalar(name="train accuracy", value=train_accuracy)
    ex.log_scalar(name="train recall", value=train_recall)
    ex.log_scalar(name="train precision", value=train_precision)
    ex.log_scalar(name="train f1", value=train_f1)

    # Get Test performance
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_true=y_test, y_pred=y_test_pred)
    test_recall = recall_score(y_true=y_test, y_pred=y_test_pred, labels=['M', 'R'], pos_label='M')
    test_precision = precision_score(y_true=y_test, y_pred=y_test_pred, labels=['M', 'R'], pos_label='M')
    test_f1 = f1_score(y_true=y_test, y_pred=y_test_pred, labels=['M', 'R'], pos_label='M')

    ex.log_scalar(name="test accuracy", value=test_accuracy)
    ex.log_scalar(name="test recall", value=test_recall)
    ex.log_scalar(name="test precision", value=test_precision)
    ex.log_scalar(name="test f1", value=test_f1)

ex.run()

No traceback available to show.
INFO - my_experiment - Running command 'my_main'
INFO - my_experiment - Started run with ID "1"
INFO - my_experiment - Completed after 0:00:00


Sonar DataFrame dimmensions:  (208, 61)
Value counts on Label: 60
M    111
R     97
Name: count, dtype: int64
Value Distribution on Label(%): 60
M    53.365385
R    46.634615
Name: proportion, dtype: float64
(208,)
(166, 60)
(166,)
(42, 60)
(42,)
hey
trained
(166,)
(42,)
(166,)
0.8373493975903614
0.8705882352941177
0.8222222222222222
0.8457142857142858


<sacred.run.Run at 0x702f2fb4ab50>

# Loading Data

In [8]:
# dataset_path = Path(data_path_str)
# sonar_data = pd.read_csv(dataset_path, header=None)

In [9]:
# sonar_data.head()

In [10]:
# sonar_data.info()

## Checking Features

In [11]:
# sonar_data.describe()

There are 59 feature columns of type float64, values are between 0 and 1.
There is a single 

## Checking Label

In [12]:
# # Checking the label
# sonar_data[60].value_counts(dropna=False)

In [13]:
# sonar_data[60].value_counts(dropna=False, normalize=True)*100

In [14]:
# X = sonar_data.drop(columns=60, axis=1)
# Y = sonar_data[60]