<a href="https://colab.research.google.com/github/rakesh22230/Machine_Learning_Practise/blob/main/222902030_CSE312_222D3_LabReport01_LinearRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
# ---------------------- Import libraries ----------------------
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# ---------------------- Load Dataset ----------------------
url = "https://raw.githubusercontent.com/rakesh22230/Machine_Learning_Practise/refs/heads/main/diabetes.csv"
dataset = pd.read_csv(url)

print("Original Dataset (First 5 Rows):")
print(dataset.head())

# ---------------------- Data Preprocessing ----------------------

# Replace 0 values in selected columns with column mean
cols_with_zero = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']

for col in cols_with_zero:
    dataset[col] = dataset[col].replace(0, np.nan)
    dataset[col] = dataset[col].fillna(dataset[col].mean())


# Replace first row’s Glucose with the max glucose value
max_glucose = dataset['Glucose'].max()
dataset.at[0, 'Glucose'] = max_glucose

# Replace glucose values of records having the lowest age with min glucose
min_age = dataset['Age'].min()
min_glucose = dataset['Glucose'].min()
dataset.loc[dataset['Age'] == min_age, 'Glucose'] = min_glucose

# Show processed data
print("\nAfter Preprocessing (First 5 Rows):")
print(dataset.head())

# ---------------------- Data Modeling ----------------------

# Step 1: Separate input features (X) and target (y)
X = dataset.drop('Outcome', axis=1)
y = dataset['Outcome']

# Step 2: Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Step 3: Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Predict outcomes and round them to 0 or 1
y_pred = model.predict(X_test)
y_pred_rounded = np.round(y_pred)

# ---------------------- Evaluation ----------------------

# Step 1: Evaluate model performance
accuracy = accuracy_score(y_test, y_pred_rounded)
conf_matrix = confusion_matrix(y_test, y_pred_rounded)
precision = precision_score(y_test, y_pred_rounded)
recall = recall_score(y_test, y_pred_rounded)
f1 = f1_score(y_test, y_pred_rounded)

# Step 2: Print results
print("\n\n--------- Model Evaluation ----------\n")
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Original Dataset (First 5 Rows):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

After Preprocessing (First 5 Rows):
   Pregnancies  Glucose  BloodPressure  SkinThickness     Insulin   BMI  \
0            6    199.0           72.0       35.00000  155.548223  33.6   
1            1     85.0           66.0       29.00000  155.548223  26.6   
2       