<a href="https://colab.research.google.com/github/MDJubayer255/ml_Lab01/blob/main/Lab_Report_01_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Load dataset from GitHub or local file
from google.colab import drive
# Load dataset from Google Drive
dataset = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Csvfiles/diabetes.csv')
# Show first 5 rows of original dataset
print("Original Dataset (First 5 Rows):")
print(dataset.head())

# Replace 0s with mean for selected columns
cols = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
for col in cols:
    dataset[col] = dataset[col].replace(0, np.nan)
    dataset[col] = dataset[col].fillna(dataset[col].mean())

# Replace 1st row’s glucose with max
dataset.at[0, 'Glucose'] = dataset['Glucose'].max()

# Replace glucose for youngest patients with min
min_age = dataset['Age'].min()
min_glucose = dataset['Glucose'].min()
dataset.loc[dataset['Age'] == min_age, 'Glucose'] = min_glucose

# Show first 5 rows of dataset after preprocessing
print("\nAfter Preprocessing (First 5 Rows):")
print(dataset.head())

# Feature-target split
X = dataset.drop('Outcome', axis=1)
y = dataset['Outcome']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and round to 0 or 1
y_pred = np.round(model.predict(X_test)).astype(int)

# Evaluation
acc = accuracy_score(y_test, y_pred)
conf = confusion_matrix(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Final output in required format
print("\n--------- Model Evaluation ---------")
print(f"Accuracy: {acc:.2f}")
print("Confusion Matrix:")
print(conf)
print(f"Precision: {prec:.2f}")
print(f"Recall: {rec:.2f}")
print(f"F1 Score: {f1:.2f}")


Original Dataset (First 5 Rows):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

After Preprocessing (First 5 Rows):
   Pregnancies  Glucose  BloodPressure  SkinThickness     Insulin   BMI  \
0            6    199.0           72.0       35.00000  155.548223  33.6   
1            1     85.0           66.0       29.00000  155.548223  26.6   
2       