In [1]:
# Import pandas and sklearn libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Read the dataset from a csv file
df = pd.read_csv('/kaggle/input/asthma-disease-prediction/processed-data.csv')

# Drop the last three columns as they are the target classes
X = df.drop(['Severity_Mild', 'Severity_Moderate', 'Severity_None'], axis=1)

# Create a new column that combines the last three columns into one target class
# 1: Severity_Mild , 2: Severity_Moderate, 3: Severity_None, or 0 if none of the three
y = df['Severity_Mild'] + 2 * df['Severity_Moderate'] + 3 * df['Severity_None']

# Split the data into train and test sets with a 80-20 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit a logistic regression model on the train set
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model performance using accuracy, confusion matrix, and classification report
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Confusion matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification report:\n', classification_report(y_test, y_pred))

Accuracy: 0.24035669191919193
Confusion matrix:
 [[5858 2825 1390 5646]
 [6282 2630 1411 5652]
 [6111 2765 1366 5667]
 [6229 2795 1358 5375]]
Classification report:
               precision    recall  f1-score   support

           0       0.24      0.37      0.29     15719
           1       0.24      0.16      0.19     15975
           2       0.25      0.09      0.13     15909
           3       0.24      0.34      0.28     15757

    accuracy                           0.24     63360
   macro avg       0.24      0.24      0.22     63360
weighted avg       0.24      0.24      0.22     63360



In [2]:
# Import pandas and keras libraries
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Read the dataset from a csv file
df = pd.read_csv('/kaggle/input/asthma-disease-prediction/processed-data.csv')

# Drop the last three columns as they are the target classes
X = df.drop(['Severity_Mild', 'Severity_Moderate', 'Severity_None'], axis=1)

# Create a new column that combines the last three columns into one target class
# 1: Severity_Mild , 2: Severity_Moderate, 3: Severity_None, or 0 if none of the three
y = df['Severity_Mild'] + 2 * df['Severity_Moderate'] + 3 * df['Severity_None']

# Convert the target class to one-hot encoding
y = to_categorical(y)

# Split the data into train and test sets with a 80-20 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and compile a sequential model with a dense layer for logistic regression
model = Sequential()
model.add(Dense(4, input_dim=X.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model on the train set
model.fit(X_train, y_train, epochs=2, batch_size=32)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model performance using accuracy, confusion matrix, and classification report
print('Accuracy:', accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1)))
print('Confusion matrix:\n', confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1)))
print('Classification report:\n', classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1)))


Epoch 1/2
Epoch 2/2
Accuracy: 0.2482638888888889
Confusion matrix:
 [[1150 7619 4020 2930]
 [1243 7513 4185 3034]
 [1206 7560 4126 3017]
 [1189 7375 4252 2941]]
Classification report:
               precision    recall  f1-score   support

           0       0.24      0.07      0.11     15719
           1       0.25      0.47      0.33     15975
           2       0.25      0.26      0.25     15909
           3       0.25      0.19      0.21     15757

    accuracy                           0.25     63360
   macro avg       0.25      0.25      0.23     63360
weighted avg       0.25      0.25      0.23     63360

