# Assignment 04: Random Forest Classifier

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
dataset = pd.read_csv('/content/cancer_patients.csv')

# Separating Dependent and Independent variables
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3, random_state=42, stratify=y)

# Create decision tree and random forest classifiers
dtc = DecisionTreeClassifier()
rfc = RandomForestClassifier()

# Fit the classifiers to the training data
dtc.fit(x_train, y_train)
rfc.fit(x_train, y_train)

# Predict the labels for the test data
dtc_predictions = dtc.predict(x_test)
rfc_predictions = rfc.predict(x_test)

# Evaluate the accuracy of the classifiers
dtc_accuracy = accuracy_score(y_test, dtc_predictions)
rfc_accuracy = accuracy_score(y_test, rfc_predictions)

# Print the accuracy of the classifiers
print('Decision tree accuracy:', dtc_accuracy)
print('Random forest accuracy:', rfc_accuracy)

# Find the important features of the random forest classifier
important_features = rfc.feature_importances_
print('Important features:', important_features)
top_five_features = sorted(zip(important_features, x.columns), reverse=True)[:5]
print('Top five features:', top_five_features)

Decision tree accuracy: 1.0
Random forest accuracy: 1.0
Important features: [0.00870802 0.00047022 0.0100365  0.11039561 0.09635973 0.07836926
 0.0614726  0.03124193 0.01623781 0.06372261 0.01006434 0.02216957
 0.0121462  0.07836709 0.09220703 0.02188102 0.03903425 0.07136908
 0.05792585 0.03355827 0.03005693 0.01601133 0.03819476]
Top five features: [(0.11039561092379742, 'Alcohol use'), (0.09635972897105312, 'Dust Allergy'), (0.09220702541796003, 'Fatigue'), (0.0783692584355671, 'OccuPational Hazards'), (0.07836709092518963, 'Coughing of Blood')]
