In [1]:
!pip install pymrmr

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pymrmr
  Downloading pymrmr-0.1.11.tar.gz (69 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.5/69.5 KB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pymrmr
  Building wheel for pymrmr (setup.py) ... [?25l[?25hdone
  Created wheel for pymrmr: filename=pymrmr-0.1.11-cp38-cp38-linux_x86_64.whl size=353787 sha256=eb04511281413725691e75f51e1887132f7f147ea8369133b5b36f5ba3fabdf6
  Stored in directory: /root/.cache/pip/wheels/a7/f0/23/3cb98b0a2ac66bc6bf8930b1ed06d1d5bdc541e175d9f581b7
Successfully built pymrmr
Installing collected packages: pymrmr
Successfully installed pymrmr-0.1.11


In [12]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from pymrmr import mRMR

In [23]:
df = pd.read_csv('Autism-Adult.csv')
df.columns

Index(['id', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score',
       'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'age',
       'gender', 'ethnicity', 'jundice', 'austim', 'contry_of_res',
       'used_app_before', 'result', 'age_desc', 'relation', 'Class/ASD'],
      dtype='object')

In [24]:
# Split the data into features and target
X = df.drop(['id','Class/ASD','austim','used_app_before','age_desc','relation','result','ethnicity'],axis=1)
y=df['Class/ASD']

In [25]:
# Categorical encoding
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
X.columns

Index(['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
       'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'age', 'gender',
       'jundice', 'contry_of_res'],
      dtype='object')

In [27]:
# Encoding Sex
le.fit(X['gender'])
X['gender']=le.transform(X['gender'])
# Encoding Country of residence
le.fit(X['contry_of_res'])
X['contry_of_res']=le.transform(X['contry_of_res'])
# Encoding Jaundice
le.fit(X['jundice'])
X['jundice']=le.transform(X['jundice'])
# Encode Age as it is in object format
le.fit(X['age'])
X['age']=le.transform(X['age'])

In [28]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 704 entries, 0 to 703
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   A1_Score       704 non-null    int64
 1   A2_Score       704 non-null    int64
 2   A3_Score       704 non-null    int64
 3   A4_Score       704 non-null    int64
 4   A5_Score       704 non-null    int64
 5   A6_Score       704 non-null    int64
 6   A7_Score       704 non-null    int64
 7   A8_Score       704 non-null    int64
 8   A9_Score       704 non-null    int64
 9   A10_Score      704 non-null    int64
 10  age            704 non-null    int64
 11  gender         704 non-null    int64
 12  jundice        704 non-null    int64
 13  contry_of_res  704 non-null    int64
dtypes: int64(14)
memory usage: 77.1 KB


In [29]:
# Define the range of number of features to select
K_range = range(1, len(X.columns) + 1)
K_range

range(1, 15)

In [30]:
# Initialize a dictionary to store the accuracy scores for different k values
acc_scores = {}

In [31]:
for k in K_range:
    # Select the top K features using mRMR
    top_k_indices = mRMR(X, 'MID', k)
    X_mrmr = X.loc[:, top_k_indices]

    # Train a Random Forest classifier on the selected features
    clf = RandomForestClassifier()
    clf.fit(X_mrmr, y)
    # Predict on the training data
    y_pred = clf.predict(X_mrmr)
    # Compute the accuracy
    acc = accuracy_score(y, y_pred)
    acc_scores["Random Forest, k = {}".format(k)] = acc
    print("Random Forest, k = {}: Accuracy: {:.2f}%".format(k, acc * 100))

Random Forest, k = 1: Accuracy: 76.70%
Random Forest, k = 2: Accuracy: 81.68%
Random Forest, k = 3: Accuracy: 83.66%
Random Forest, k = 4: Accuracy: 85.09%
Random Forest, k = 5: Accuracy: 85.94%
Random Forest, k = 6: Accuracy: 91.48%
Random Forest, k = 7: Accuracy: 94.18%
Random Forest, k = 8: Accuracy: 96.45%
Random Forest, k = 9: Accuracy: 98.58%
Random Forest, k = 10: Accuracy: 99.72%
Random Forest, k = 11: Accuracy: 99.86%
Random Forest, k = 12: Accuracy: 100.00%
Random Forest, k = 13: Accuracy: 100.00%
Random Forest, k = 14: Accuracy: 100.00%


In [32]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
for k in K_range:
    # Select the top K features using mRMR
    top_k_indices = mRMR(X, 'MID', k)
    X_mrmr = X.loc[:, top_k_indices]

   
    dec = DecisionTreeClassifier()
    dec.fit(X_mrmr, y)
    # Predict on the training data
    y_pred = dec.predict(X_mrmr)
    # Compute the accuracy
    acc = accuracy_score(y, y_pred)
    acc_scores["Decision Tree, k = {}".format(k)] = acc
    print("Decision Tree, k = {}: Accuracy: {:.2f}%".format(k, acc * 100))

Decision Tree, k = 1: Accuracy: 76.70%
Decision Tree, k = 2: Accuracy: 81.68%
Decision Tree, k = 3: Accuracy: 83.66%
Decision Tree, k = 4: Accuracy: 85.09%
Decision Tree, k = 5: Accuracy: 85.94%
Decision Tree, k = 6: Accuracy: 91.48%
Decision Tree, k = 7: Accuracy: 94.18%
Decision Tree, k = 8: Accuracy: 96.45%
Decision Tree, k = 9: Accuracy: 98.72%
Decision Tree, k = 10: Accuracy: 99.72%
Decision Tree, k = 11: Accuracy: 99.86%
Decision Tree, k = 12: Accuracy: 100.00%
Decision Tree, k = 13: Accuracy: 100.00%
Decision Tree, k = 14: Accuracy: 100.00%


In [33]:
# KNN Classification
from sklearn.neighbors import KNeighborsClassifier
for k in K_range:
    # Select the top K features using mRMR
    top_k_indices = mRMR(X, 'MID', k)
    X_mrmr = X.loc[:, top_k_indices]

    knn = KNeighborsClassifier()
    knn.fit(X_mrmr, y)
    # Predict on the training data
    y_pred = knn.predict(X_mrmr)
    # Compute the accuracy
    acc = accuracy_score(y, y_pred)
    acc_scores["KNN, k = {}".format(k)] = acc
    print("KNN, k = {}: Accuracy: {:.2f}%".format(k, acc * 100))

KNN, k = 1: Accuracy: 75.57%
KNN, k = 2: Accuracy: 79.40%
KNN, k = 3: Accuracy: 80.97%
KNN, k = 4: Accuracy: 81.11%
KNN, k = 5: Accuracy: 81.96%
KNN, k = 6: Accuracy: 87.07%
KNN, k = 7: Accuracy: 86.93%
KNN, k = 8: Accuracy: 89.63%
KNN, k = 9: Accuracy: 90.34%
KNN, k = 10: Accuracy: 93.75%
KNN, k = 11: Accuracy: 94.74%
KNN, k = 12: Accuracy: 94.74%
KNN, k = 13: Accuracy: 89.20%
KNN, k = 14: Accuracy: 89.35%


In [34]:
# Naive Bayes classification
from sklearn.naive_bayes import GaussianNB
for k in K_range:
    # Select the top K features using mRMR
    top_k_indices = mRMR(X, 'MID', k)
    X_mrmr = X.loc[:, top_k_indices]

    naive = GaussianNB()
    naive.fit(X_mrmr, y)
    # Predict on the training data
    y_pred = naive.predict(X_mrmr)
    # Compute the accuracy
    acc = accuracy_score(y, y_pred)
    acc_scores["Naive Bayes, k = {}".format(k)] = acc
    print("Naive Bayes, k = {}: Accuracy: {:.2f}%".format(k, acc * 100))

Naive Bayes, k = 1: Accuracy: 73.15%
Naive Bayes, k = 2: Accuracy: 73.15%
Naive Bayes, k = 3: Accuracy: 75.00%
Naive Bayes, k = 4: Accuracy: 75.00%
Naive Bayes, k = 5: Accuracy: 75.43%
Naive Bayes, k = 6: Accuracy: 82.67%
Naive Bayes, k = 7: Accuracy: 84.94%
Naive Bayes, k = 8: Accuracy: 87.93%
Naive Bayes, k = 9: Accuracy: 89.91%
Naive Bayes, k = 10: Accuracy: 92.90%
Naive Bayes, k = 11: Accuracy: 93.75%
Naive Bayes, k = 12: Accuracy: 94.60%
Naive Bayes, k = 13: Accuracy: 94.46%
Naive Bayes, k = 14: Accuracy: 96.59%


In [35]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
for k in K_range:
    # Select the top K features using mRMR
    top_k_indices = mRMR(X, 'MID', k)
    X_mrmr = X.loc[:, top_k_indices]

    log = LogisticRegression(max_iter=1000)
    log.fit(X_mrmr, y)
    # Predict on the training data
    y_pred = log.predict(X_mrmr)
    # Compute the accuracy
    acc = accuracy_score(y, y_pred)
    acc_scores["Logistic Regression, k = {}".format(k)] = acc
    print("Logistic Regression, k = {}: Accuracy: {:.2f}%".format(k, acc * 100))

Logistic Regression, k = 1: Accuracy: 73.15%
Logistic Regression, k = 2: Accuracy: 73.15%
Logistic Regression, k = 3: Accuracy: 74.72%
Logistic Regression, k = 4: Accuracy: 75.99%
Logistic Regression, k = 5: Accuracy: 76.56%
Logistic Regression, k = 6: Accuracy: 83.38%
Logistic Regression, k = 7: Accuracy: 85.94%
Logistic Regression, k = 8: Accuracy: 89.49%
Logistic Regression, k = 9: Accuracy: 91.76%
Logistic Regression, k = 10: Accuracy: 95.03%
Logistic Regression, k = 11: Accuracy: 96.45%
Logistic Regression, k = 12: Accuracy: 97.59%
Logistic Regression, k = 13: Accuracy: 98.30%
Logistic Regression, k = 14: Accuracy: 100.00%


In [36]:
# SVM with different kernels
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
for k in K_range:
    # Select the top K features using mRMR
    top_k_indices = mRMR(X, 'MID', k)
    X_mrmr = X.loc[:, top_k_indices]
    for kernel in ['linear', 'poly', 'rbf']:
      # Define the SVM classifier with different kernels
      svm = SVC(kernel=kernel)
      svm.fit(X_mrmr, y)
      # Predict on the training data
      y_pred = svm.predict(X_mrmr)
      # Compute the accuracy
      acc = accuracy_score(y, y_pred)
      print(f"Accuracy for k={k} and kernel={kernel}: {acc:.2f}")
      

Accuracy for k=1 and kernel=linear: 0.73
Accuracy for k=1 and kernel=poly: 0.73
Accuracy for k=1 and kernel=rbf: 0.73
Accuracy for k=2 and kernel=linear: 0.73
Accuracy for k=2 and kernel=poly: 0.73
Accuracy for k=2 and kernel=rbf: 0.73
Accuracy for k=3 and kernel=linear: 0.75
Accuracy for k=3 and kernel=poly: 0.73
Accuracy for k=3 and kernel=rbf: 0.73
Accuracy for k=4 and kernel=linear: 0.75
Accuracy for k=4 and kernel=poly: 0.73
Accuracy for k=4 and kernel=rbf: 0.73
Accuracy for k=5 and kernel=linear: 0.75
Accuracy for k=5 and kernel=poly: 0.73
Accuracy for k=5 and kernel=rbf: 0.73
Accuracy for k=6 and kernel=linear: 0.82
Accuracy for k=6 and kernel=poly: 0.73
Accuracy for k=6 and kernel=rbf: 0.73
Accuracy for k=7 and kernel=linear: 0.85
Accuracy for k=7 and kernel=poly: 0.73
Accuracy for k=7 and kernel=rbf: 0.73
Accuracy for k=8 and kernel=linear: 0.89
Accuracy for k=8 and kernel=poly: 0.73
Accuracy for k=8 and kernel=rbf: 0.73
Accuracy for k=9 and kernel=linear: 0.91
Accuracy for k=