<a href="https://colab.research.google.com/github/MDJubayer255/ml_Lab01/blob/main/Lab_Report_01_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# prompt: read csv file from googel drive

file_path = '/content/drive/My Drive/Colab Notebooks/Csvfiles/diabetes.csv'
dataset = pd.read_csv(file_path)
dataset.head()

# Step 2: Replace 0s in key columns with mean
cols = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
for col in cols:
    dataset[col] = dataset[col].replace(0, np.nan)
    dataset[col].fillna(dataset[col].mean(), inplace=True)

# Step 3: Replace the first row’s Glucose with the maximum Glucose value
dataset.at[0, 'Glucose'] = dataset['Glucose'].max()

# Step 4: Replace Glucose of the youngest person(s) with minimum Glucose value
min_age = dataset['Age'].min()
min_glucose = dataset['Glucose'].min()
dataset.loc[dataset['Age'] == min_age, 'Glucose'] = min_glucose

# Step 5: Feature and label separation
X = dataset.drop('Outcome', axis=1).values
y = dataset['Outcome'].values

# Step 6: Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Step 7: Train Linear Regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Step 8: Predict the results and round to 0 or 1
y_pred = regressor.predict(X_test)
y_pred = np.round(y_pred).astype(int)

# Step 9: Create comparison dataframe
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df.head())

# Step 10: Evaluation Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))


   Actual  Predicted
0       1          1
1       0          0
2       0          0
3       1          1
4       0          0
Accuracy: 0.8116883116883117
Confusion Matrix:
 [[97 10]
 [19 28]]
Precision: 0.7368421052631579
Recall: 0.5957446808510638
F1 Score: 0.6588235294117647


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset[col].fillna(dataset[col].mean(), inplace=True)
