<a href="https://colab.research.google.com/github/Devsachin2003/Prognosticating-Different-stages-of-Goiter-using-ML-algorithms-/blob/main/Logistic_regression_for_Goiter_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Import Libraries and Mount Drive
First, we import the necessary libraries and mount Google Drive to access the dataset.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


## 2. Load the Dataset
We load the CSV file from the specified path in Google Drive.

In [None]:
# Load the dataset
file_path = '/content/drive/MyDrive/Copy of sick_csv.csv'
df = pd.read_csv(file_path)

# Display the first few rows to verify
display(df.head())

Unnamed: 0,age,sex,on_thyroxine,query_on_thyroxine,on_antithyroid_medication,sick,pregnant,thyroid_surgery,I131_treatment,query_hypothyroid,...,TT4_measured,TT4,T4U_measured,T4U,FTI_measured,FTI,TBG_measured,TBG,referral_source,Class
0,41.0,F,f,f,f,f,f,f,f,f,...,t,125.0,t,1.14,t,109.0,f,,SVHC,negative
1,23.0,F,f,f,f,f,f,f,f,f,...,t,102.0,f,,f,,f,,other,negative
2,46.0,M,f,f,f,f,f,f,f,f,...,t,109.0,t,0.91,t,120.0,f,,other,negative
3,70.0,F,t,f,f,f,f,f,f,f,...,t,175.0,f,,f,,f,,other,negative
4,70.0,F,f,f,f,f,f,f,f,f,...,t,61.0,t,0.87,t,70.0,f,,SVI,negative


## 3. Data Preprocessing
Here we handle categorical variables. We map the target variable `goitre` and other binary columns to numerical values (0 and 1).

In [None]:
# Replace 'f' with 0 and 't' with 1 in 'goitre' column
df['goitre'] = df['goitre'].map({'f': 0, 't': 1})

# Convert categorical columns to numerical using Label Encoding
binary_categorical_cols = ['sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication',
                           'sick', 'pregnant', 'thyroid_surgery', 'I131_treatment', 'query_hypothyroid',
                           'query_hyperthyroid', 'lithium', 'tumor', 'hypopituitary', 'psych',
                           'TSH_measured', 'T3_measured', 'TT4_measured', 'T4U_measured', 'FTI_measured',
                           'TBG_measured']

for col in binary_categorical_cols:
    df[col] = df[col].map({'f': 0, 't': 1})

## 4. Feature Engineering and Imputation
We perform One-Hot Encoding on multi-category columns, define our features (X) and target (y), and impute missing values.

In [None]:
multi_categorical_cols = ['referral_source', 'Class']

# One-hot encode multi-category columns
df = pd.get_dummies(df, columns=multi_categorical_cols)

# Define features and target variable
X = df.drop("goitre", axis=1)  # Features
y = df["goitre"]  # Target

# Impute missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)



## 5. Model Training
We split the data into training and testing sets and train a Logistic Regression classifier.

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.00099, random_state=42)

# Train the Logistic Regression classifier
logreg_classifier = LogisticRegression(C=0.0001, random_state=42)
logreg_classifier.fit(X_train, y_train)

## 6. Evaluation
Finally, we make predictions on the test set and calculate the accuracy of the model.

In [None]:
# Make predictions on the test set
y_pred = logreg_classifier.predict(X_test)
logical_regression = accuracy_score(y_test, y_pred)
# Evaluate the model
print("Accuracy:", logical_regression)

Accuracy: 0.75
