In [13]:
# Import pandas and sklearn libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Read the dataset from a csv file
df = pd.read_csv('/kaggle/input/asthma-disease-prediction/processed-data.csv')

# Define the input and output columns
input_columns = ['Tiredness','Dry-Cough','Difficulty-in-Breathing','Sore-Throat','None_Sympton','Pains','Nasal-Congestion','Runny-Nose','None_Experiencing','Age_0-9','Age_10-19','Age_20-24','Age_25-59','Age_60+','Gender_Female','Gender_Male']
output_columns = ['Severity_Mild','Severity_Moderate','Severity_None']

# Convert the output columns into one target column with three classes
# Use a dictionary to map the class names to numbers
class_dict = {'Severity_Mild': 1, 'Severity_Moderate': 2, 'Severity_None': 3}
df['target'] = df[output_columns].idxmax(axis=1).map(class_dict)

# Drop the output columns from the dataframe
df.drop(output_columns, axis=1, inplace=True)

# Split the data into train, test and validation sets
X_train, X_test, y_train, y_test = train_test_split(df[input_columns], df['target'], test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Create and fit a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_test = model.predict(X_test)

# Evaluate the model performance using accuracy, confusion matrix and classification report
print('Accuracy on test set:', accuracy_score(y_test, y_pred_test))
print('Confusion matrix on test set:\n', confusion_matrix(y_test, y_pred_test))
print('Classification report on test set:\n', classification_report(y_test, y_pred_test))


Accuracy on test set: 0.5002209595959596
Confusion matrix on test set:
 [[31694     0     0]
 [15909     0     0]
 [15757     0     0]]
Classification report on test set:
               precision    recall  f1-score   support

           1       0.50      1.00      0.67     31694
           2       0.00      0.00      0.00     15909
           3       0.00      0.00      0.00     15757

    accuracy                           0.50     63360
   macro avg       0.17      0.33      0.22     63360
weighted avg       0.25      0.50      0.33     63360



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# Import Keras and other libraries
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.optimizers import SGD
import pandas as pd

# Read the dataset from a csv file
df = pd.read_csv('/kaggle/input/asthma-disease-prediction/processed-data.csv')

# Define the input and output columns
input_columns = ['Tiredness','Dry-Cough','Difficulty-in-Breathing','Sore-Throat','None_Sympton','Pains','Nasal-Congestion','Runny-Nose','None_Experiencing','Age_0-9','Age_10-19','Age_20-24','Age_25-59','Age_60+','Gender_Female','Gender_Male']
output_columns = ['Severity_Mild','Severity_Moderate','Severity_None']

# Convert the output columns into one target column with three classes
# Use a dictionary to map the class names to numbers
class_dict = {'Severity_Mild': 0, 'Severity_Moderate': 1, 'Severity_None': 2}
df['target'] = df[output_columns].idxmax(axis=1).map(class_dict)

# Drop the output columns from the dataframe
df.drop(output_columns, axis=1, inplace=True)

# Split the data into features and labels
X = df[input_columns].values
y = df['target'].values

# One-hot encode the labels
y = keras.utils.to_categorical(y, num_classes=3)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a sequential model
model = Sequential()

# Add a flatten layer to convert the input shape to a 1D vector
model.add(Flatten(input_shape=(16,)))

# Add a dense layer with 512 units and ReLU activation
model.add(Dense(512, activation='relu'))

# Add another dense layer with 512 units and ReLU activation
model.add(Dense(512, activation='relu'))

# Add an output layer with 3 units and softmax activation
model.add(Dense(3, activation='softmax'))

# Compile the model with SGD optimizer, categorical crossentropy loss and accuracy metric
model.compile(optimizer=SGD(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model for 2 epochs with a batch size of 32
model.fit(X_train, y_train, epochs=2, batch_size=32)

# Evaluate the model on the test set and print the results
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)


Epoch 1/2
Epoch 2/2
Test loss: 1.0406248569488525
Test accuracy: 0.5002209544181824
