In [53]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report,mean_absolute_error, mean_squared_error

# Load the dataset
# data = {
#     'BMI':[10,8,7,18,40,23,3,13,17,6],
#     'Age': [25,20,25,20,50,15,10,30,22,27],
#     'Sugar':[1,0,1,1,0,1,0,0,1,0],
#     'Diabetes':[1,1,0,1,0,1,0,0,1,0]
# }
df = pd.DataFrame(data)
# Display basic info
print(df.info())
print(df.head())

# --- LOGISTIC REGRESSION MODEL ---

# Define features and target variable
X = df[['BMI','Age','Sugar']]
y = df['Diabetes']

# Preprocessing: Scale numeric features and one-hot encode categorical features
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), ['BMI','Age']),
    ('cat', OneHotEncoder(), ['Sugar'])
])

# Create pipeline with logistic regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Measure accuracy and display classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error (RMSE):", np.sqrt(mean_squared_error(y_test, y_pred)))
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   BMI       10 non-null     int64
 1   Age       10 non-null     int64
 2   Sugar     10 non-null     int64
 3   Diabetes  10 non-null     int64
dtypes: int64(4)
memory usage: 452.0 bytes
None
   BMI  Age  Sugar  Diabetes
0   10   25      1         1
1    8   20      0         1
2    7   25      1         0
3   18   20      1         1
4   40   50      0         0
Mean Absolute Error (MAE): 0.6666666666666666
Mean Squared Error (MSE): 0.6666666666666666
Root Mean Squared Error (RMSE): 0.816496580927726
Accuracy: 0.3333333333333333
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.33      0.50         3

    accuracy                           0.33         3
   macro avg       0.50      0.17      0.25 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [84]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, confusion_matrix, mean_absolute_error, mean_squared_error
import numpy as np
from sklearn.preprocessing import LabelEncoder

data = {
    'BMI':[10,8,7,18,40,23,3,13,17,6],
    'Age': [25,20,25,20,50,15,10,30,22,27],
    'Sugar':[1,0,1,1,0,1,0,0,1,0],
    'Diabetes':[1,1,0,1,0,1,0,0,1,0]
}
df = pd.DataFrame(data)


# Prepare dataset
X = df.drop(columns=["Diabetes"])
y = df["Diabetes"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree Classifier
clf = DecisionTreeClassifier(max_depth=1).fit(X,y)  #All the thresholds, size equals "dt.tree_.node_count"
# clf.tree_.threshold[1] = 10.0
clf.tree_.threshold[2] = 1 #Manually modifying a threshold


clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error (RMSE):", np.sqrt(mean_squared_error(y_test, y_pred)))



Accuracy: 0.5
Confusion Matrix:
 [[0 0]
 [1 1]]
Mean Absolute Error (MAE): 0.5
Mean Squared Error (MSE): 0.5
Root Mean Squared Error (RMSE): 0.7071067811865476


In [None]:
#Modification

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, confusion_matrix, mean_absolute_error, mean_squared_error
import numpy as np
from sklearn.preprocessing import LabelEncoder

# data = {
#     'BMI':[10,8,7,18,40,23,3,13,17,6],
#     'Age': [25,20,25,20,50,15,10,30,22,27],
#     'Sugar':[1,0,1,1,0,1,0,0,1,0],
#     'Diabetes':[1,1,0,1,0,1,0,0,1,0]
# }
# df = pd.DataFrame(data)
df=pd.read_csv('diabetes.csv')

# Prepare dataset
X = df.drop(columns=["Outcome"])
y = df["Outcome"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree Classifier
clf = DecisionTreeClassifier(max_depth=5).fit(X,y)  #All the thresholds, size equals "dt.tree_.node_count"
clf.tree_.threshold[2] = 200 #Manually modifying a threshold
clf.tree_.threshold[6]=20


clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error (RMSE):", np.sqrt(mean_squared_error(y_test, y_pred)))

