In [16]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = {
    'BMI':[2,8,7,8,40,23,3,13,17,6],
    'Age': [25,20,25,20,50,15,10,30,22,27],
    'Sugar':["Yes","No","Yes","Yes","No","Yes","No","No","Yes","Yes"],
    'Diabetes':["Yes","Yes","No","Yes","No","Yes","No","No","Yes","No"]
}
df = pd.DataFrame(data)
# Display basic info
print(df.info())
print(df.head())

# --- LOGISTIC REGRESSION MODEL ---

# Define features and target variable
X = df[['BMI','Age','Sugar']]
y = df['Diabetes']

# Preprocessing: Scale numeric features and one-hot encode categorical features
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), ['BMI','Age']),
    ('cat', OneHotEncoder(), ['Sugar'])
])

# Create pipeline with logistic regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Measure accuracy and display classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2%}')
print('Classification Report:')
print(report)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   BMI       10 non-null     int64 
 1   Age       10 non-null     int64 
 2   Sugar     10 non-null     object
 3   Diabetes  10 non-null     object
dtypes: int64(2), object(2)
memory usage: 452.0+ bytes
None
   BMI  Age Sugar Diabetes
0    2   25   Yes      Yes
1    8   20    No      Yes
2    7   25   Yes       No
3    8   20   Yes      Yes
4   40   50    No       No
Accuracy: 50.00%
Classification Report:
              precision    recall  f1-score   support

          No       0.00      0.00      0.00         0
         Yes       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, confusion_matrix, mean_absolute_error, mean_squared_error
import numpy as np
from sklearn.preprocessing import LabelEncoder

data = {
    'BMI':[2,8,7,8,40,23,3,13,17,6],
    'Age': [20,25,25,20,50,15,10,30,22,27],
    'Sugar':[1,0,1,1,0,1,0,0,1,0],
    'Diabetes':[1,1,0,1,0,1,0,0,1,0]
        # 'Sugar':["Yes","No","Yes","Yes","No","Yes","No","No","Yes","No"],
    # 'Diabetes':["Yes","Yes","No","Yes","No","Yes","No","No","Yes","No"]
}
iris_df = pd.DataFrame(data)


# Load datasets
# iris_df = pd.read_csv("iris.csv")

# Prepare IRIS dataset
X_iris = iris_df.drop(columns=["Diabetes"])
y_iris = iris_df["Diabetes"]
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

# Train Decision Tree Classifier for IRIS
iris_clf = DecisionTreeClassifier(random_state=42)
iris_clf.fit(X_train_iris, y_train_iris)
y_pred_iris = iris_clf.predict(X_test_iris)

# IRIS Metrics
print("Iris Accuracy:", accuracy_score(y_test_iris, y_pred_iris))
print("Iris Confusion Matrix:\n", confusion_matrix(y_test_iris, y_pred_iris))



Iris Accuracy: 0.5
Iris Confusion Matrix:
 [[0 0]
 [1 1]]
