In [3]:
# Now, let's load the dataset and compute the accuracy results based on the models trained in the notebook.
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset

df = pd.read_csv("survey lung cancer.csv")

# Data Preprocessing: Encode the categorical columns
le = preprocessing.LabelEncoder()
df['GENDER'] = le.fit_transform(df['GENDER'])
df['LUNG_CANCER'] = le.fit_transform(df['LUNG_CANCER'])

# Drop unrelated columns for simplicity in this step (as was done in the notebook)
X = df.drop(['AGE','GENDER','SHORTNESS OF BREATH','LUNG_CANCER'], axis=1)
y = df['LUNG_CANCER']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the classifiers
svm = SVC(kernel='linear')
rf = RandomForestClassifier(n_estimators=100)
knn = KNeighborsClassifier(n_neighbors=5)
ann = MLPClassifier(max_iter=500)
log = LogisticRegression()
dc = DecisionTreeClassifier()

# Voting and Stacking Classifiers
eclf = VotingClassifier(estimators=[('svm', svm), ('rf', rf), ('knn', knn), ('log', log), ('dc', dc)], voting='hard')
sc = StackingClassifier(estimators=[('svm', svm), ('rf', rf), ('knn', knn), ('log', log)], final_estimator=log)

# Training and evaluating models
models = [svm, rf, knn, ann, eclf, sc, log, dc]
accuracy_results = {}

for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy_results[model.__class__.__name__] = accuracy_score(y_test, y_pred)

accuracy_results


{'SVC': 0.967741935483871,
 'RandomForestClassifier': 0.9516129032258065,
 'KNeighborsClassifier': 0.9354838709677419,
 'MLPClassifier': 0.967741935483871,
 'VotingClassifier': 0.9354838709677419,
 'StackingClassifier': 0.967741935483871,
 'LogisticRegression': 0.967741935483871,
 'DecisionTreeClassifier': 0.9193548387096774}