In [2]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
# Step 2: Load the Dataset
file_path = 'kidney-stone-dataset.csv'  # Update this path as per your file location

try:
    # Load the dataset
    df = pd.read_csv(file_path)
    print("Dataset Loaded Successfully!")
    print("First 5 Rows of the Dataset:")
    print(df.head())
except FileNotFoundError:
    print("Error: The file 'kidney-stone-dataset.csv' was not found. Please check the file path.")
    exit()

Dataset Loaded Successfully!
First 5 Rows of the Dataset:
   gravity    ph  osmo  cond  urea  calc  target
0    1.021  4.91   725  14.0   443  2.45       0
1    1.017  5.74   577  20.0   296  4.49       0
2    1.008  7.20   321  14.9   101  2.36       0
3    1.011  5.51   408  12.6   224  2.15       0
4    1.005  6.52   187   7.5    91  1.16       0


In [4]:
# Step 3: Data Preprocessing
print("\nChecking for Missing Values:")
print(df.isnull().sum())

# Drop rows with missing values (optional: can replace missing values with mean/median if needed)
df = df.dropna()
print(f"\nAfter Dropping Missing Values, Dataset Shape: {df.shape}")

# Verify column names
print("\nDataset Columns:", df.columns)

# Ensure the expected columns are present
expected_columns = {'gravity', 'ph', 'osmo', 'cond', 'urea', 'calc', 'target'}
if not expected_columns.issubset(df.columns):
    print("Error: Dataset does not have the expected columns. Exiting...")
    exit()

# Define features (X) and target (y)
X = df.drop(columns=['target'])  # Features: gravity, ph, osmo, cond, urea, calc
y = df['target']  # Target: Binary classification (0 or 1)

# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


Checking for Missing Values:
gravity    0
ph         0
osmo       0
cond       0
urea       0
calc       0
target     0
dtype: int64

After Dropping Missing Values, Dataset Shape: (79, 7)

Dataset Columns: Index(['gravity', 'ph', 'osmo', 'cond', 'urea', 'calc', 'target'], dtype='object')


In [5]:
# Step 4: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
print("\nData Split Successfully!")
print(f"Training Samples: {X_train.shape[0]}, Testing Samples: {X_test.shape[0]}")


Data Split Successfully!
Training Samples: 63, Testing Samples: 16


In [6]:
# Step 5: Train the Logistic Regression Model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
print("\nLogistic Regression Model Trained Successfully!")


Logistic Regression Model Trained Successfully!


In [7]:
# Step 6: Evaluate the Model
y_pred = log_reg.predict(X_test)

# Model Performance
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Model Accuracy: 0.75

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.67      0.75         9
           1       0.67      0.86      0.75         7

    accuracy                           0.75        16
   macro avg       0.76      0.76      0.75        16
weighted avg       0.77      0.75      0.75        16

Confusion Matrix:
[[6 3]
 [1 6]]


In [8]:
# Step 6: Evaluate the Model
y_pred = log_reg.predict(X_test)

# Model Performance
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Step 7: Predict for New User Input
print("\n--- Kidney Stone Detection Prediction ---")
print("Enter the following details to predict kidney stone presence:")

# Collect user input dynamically
user_input = {}

try:
    user_input['gravity'] = float(input("Gravity (e.g., 1.005 to 1.035): "))
    user_input['ph'] = float(input("pH Level (e.g., 4.5 to 8): "))
    user_input['osmo'] = float(input("Osmolality (e.g., 100 to 1200): "))
    user_input['cond'] = float(input("Conductivity (e.g., 5 to 30): "))
    user_input['urea'] = float(input("Urea Level (mg/dL): "))
    user_input['calc'] = float(input("Calcium Level (mg/dL): "))

    # Convert input into DataFrame
    user_data = pd.DataFrame([user_input])

    # Scale input features
    user_data_scaled = scaler.transform(user_data)

    # Predict
    prediction = log_reg.predict(user_data_scaled)

    # Output result
    if prediction[0] == 1:
        print("\nPrediction: Kidney Stone Detected (Positive Diagnosis).")
    else:
        print("\nPrediction: No Kidney Stone Detected (Negative Diagnosis).")

except Exception as e:
    print("Error in input. Please ensure correct numeric values are entered.")
    print("Details:", e)


Model Accuracy: 0.75

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.67      0.75         9
           1       0.67      0.86      0.75         7

    accuracy                           0.75        16
   macro avg       0.76      0.76      0.75        16
weighted avg       0.77      0.75      0.75        16

Confusion Matrix:
[[6 3]
 [1 6]]

--- Kidney Stone Detection Prediction ---
Enter the following details to predict kidney stone presence:

Prediction: Kidney Stone Detected (Positive Diagnosis).
