
# Disease Type Prediction Using Classification

This notebook outlines the process of predicting disease types using classification algorithms based on various indicators such as time, region, number of cases, population type, and indicator specifics.


In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Assuming the data is pre-loaded and cleaned as explained in the previous setup
# Here is how you would set up the model with data assumed to be loaded into `data` variable

# Prepare the data
y_class = data['HealthTopic']
X_class = pd.get_dummies(data[['Time', 'RegionCode', 'NumValue', 'Population', 'Indicator']], drop_first=True)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Initialize the classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Predict
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the accuracy and the classification report
print("Accuracy:", accuracy)
print("Classification Report:\n", report)
