In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [10]:
dataset = pd.read_csv('data.csv')
x = dataset.iloc[:, [0, 1]].values
y = dataset.iloc[:, 3].values

In [11]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.5, random_state=0)
sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain)
xtest = sc_x.transform(xtest)

In [12]:
classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'SVM': SVC(probability=True)
}

In [13]:
accuracies = {}
for name, classifier in classifiers.items():
    classifier.fit(xtrain, ytrain)  
    y_pred = classifier.predict(xtest)  
    accuracy = accuracy_score(ytest, y_pred) 
    accuracies[name] = accuracy

In [14]:
print("Accuracies:")
for name, accuracy in accuracies.items():
    print(f"{name}: {accuracy:.2f}")


Accuracies:
Logistic Regression: 0.90
Decision Tree: 0.83
Random Forest: 0.83
KNN: 0.87
SVM: 0.97


In [15]:
best_classifier_name = max(accuracies, key=accuracies.get)
best_classifier = classifiers[best_classifier_name]

In [16]:
y_prob = best_classifier.predict_proba(xtest)[:, 1]  
threshold = 0.5
percentage_schizophrenia = np.mean(y_prob > threshold) * 100 

In [17]:
print(f"The percentage likelihood of suffering from schizophrenia using {best_classifier_name} is: {percentage_schizophrenia:.2f}%")

The percentage likelihood of suffering from schizophrenia using SVM is: 63.33%
