In [1]:
import numpy as np
import pandas as pd
import os
import kagglehub
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# -----------------------------
# Data Loading
# -----------------------------
def load_data():
    """Download and load the Sonar dataset from KaggleHub."""
    path = kagglehub.dataset_download("rupakroy/sonarcsv")
    print(f"✅ Dataset downloaded to: {path}")

    files = os.listdir(path)
    csv_path = os.path.join(path, files[0])
    data = pd.read_csv(csv_path)

    return data

In [3]:
# -----------------------------
# Data Preparation
# -----------------------------
def prepare_data(data):
    """Split dataset into features and labels."""
    X = data.drop(columns="R", axis=1)
    y = data["R"]
    return X, y


def split_data(X, y, test_size=0.1, random_state=1):
    """Split the dataset into training and testing sets."""
    return train_test_split(X, y, test_size=test_size, stratify=y, random_state=random_state)


In [4]:
# -----------------------------
# Model Training & Evaluation
# -----------------------------
def train_model(X_train, y_train):
    """Train Logistic Regression model."""
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    return model


def evaluate_model(model, X_train, y_train, X_test, y_test):
    """Evaluate the model and return training/testing accuracy."""
    train_accuracy = accuracy_score(model.predict(X_train), y_train)
    test_accuracy = accuracy_score(model.predict(X_test), y_test)
    return train_accuracy, test_accuracy

In [6]:
# -----------------------------
# Prediction
# -----------------------------
def predict_sample(model, input_data):
    """Predict if a given input sample is Rock or Mine."""
    input_data_as_nparray = np.asarray(input_data).reshape(1, -1)
    prediction = model.predict(input_data_as_nparray)[0]
    return "Rock" if prediction == "R" else "Mine"

In [7]:

# -----------------------------
# Main Flow
# -----------------------------
def main():
    # Load and explore dataset
    data = load_data()
    print("\n🔎 First 5 rows:")
    print(data.head())
    print("\n⚖️ Class distribution:")
    print(data['R'].value_counts())

    # Prepare data
    X, y = prepare_data(data)
    X_train, X_test, y_train, y_test = split_data(X, y)

    # Train model
    model = train_model(X_train, y_train)

    # Evaluate model
    train_acc, test_acc = evaluate_model(model, X_train, y_train, X_test, y_test)
    print(f"\n✅ Training Accuracy: {train_acc:.2%}")
    print(f"✅ Testing Accuracy: {test_acc:.2%}")

    # Predict custom sample
    input_data = (
        0.0629, 0.1065, 0.1526, 0.1229, 0.1437, 0.1190, 0.0884, 0.0907,
        0.2107, 0.3597, 0.5466, 0.5205, 0.5127, 0.5395, 0.6558, 0.8705,
        0.9786, 0.9335, 0.7917, 0.7383, 0.6908, 0.3850, 0.0671, 0.0502,
        0.2717, 0.2839, 0.2234, 0.1911, 0.0408, 0.2531, 0.1979, 0.1891,
        0.2433, 0.1956, 0.2667, 0.1340, 0.1073, 0.2023, 0.1794, 0.0227,
        0.1313, 0.1775, 0.1549, 0.1626, 0.0708, 0.0129, 0.0795, 0.0762,
        0.0117, 0.0061, 0.0257, 0.0089, 0.0262, 0.0108, 0.0138, 0.0187,
        0.0230, 0.0057, 0.0113, 0.0131
    )
    result = predict_sample(model, input_data)
    print(f"\n🔮 Prediction on custom input: 👉 {result}")


if __name__ == "__main__":
    main()

✅ Dataset downloaded to: C:\Users\trive\.cache\kagglehub\datasets\rupakroy\sonarcsv\versions\2

🔎 First 5 rows:
   0.0200  0.0371  0.0428  0.0207  0.0954  0.0986  0.1539  0.1601  0.3109  \
0  0.0453  0.0523  0.0843  0.0689  0.1183  0.2583  0.2156  0.3481  0.3337   
1  0.0262  0.0582  0.1099  0.1083  0.0974  0.2280  0.2431  0.3771  0.5598   
2  0.0100  0.0171  0.0623  0.0205  0.0205  0.0368  0.1098  0.1276  0.0598   
3  0.0762  0.0666  0.0481  0.0394  0.0590  0.0649  0.1209  0.2467  0.3564   
4  0.0286  0.0453  0.0277  0.0174  0.0384  0.0990  0.1201  0.1833  0.2105   

   0.2111  ...  0.0027  0.0065  0.0159  0.0072  0.0167  0.0180  0.0084  \
0  0.2872  ...  0.0084  0.0089  0.0048  0.0094  0.0191  0.0140  0.0049   
1  0.6194  ...  0.0232  0.0166  0.0095  0.0180  0.0244  0.0316  0.0164   
2  0.1264  ...  0.0121  0.0036  0.0150  0.0085  0.0073  0.0050  0.0044   
3  0.4459  ...  0.0031  0.0054  0.0105  0.0110  0.0015  0.0072  0.0048   
4  0.3039  ...  0.0045  0.0014  0.0038  0.0013  0.0089 

