In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Loading into a dataframe
dataset = pd.read_csv('/content/oasis_longitudinal.csv')

# Have a peek at the data
# Returns fist 5 columns
print(dataset.head())

  Subject ID         MRI ID        Group  Visit  MR Delay M/F Hand  Age  EDUC  \
0  OAS2_0001  OAS2_0001_MR1  Nondemented      1         0   M    R   87    14   
1  OAS2_0001  OAS2_0001_MR2  Nondemented      2       457   M    R   88    14   
2  OAS2_0002  OAS2_0002_MR1     Demented      1         0   M    R   75    12   
3  OAS2_0002  OAS2_0002_MR2     Demented      2       560   M    R   76    12   
4  OAS2_0002  OAS2_0002_MR3     Demented      3      1895   M    R   80    12   

   SES  MMSE  CDR  eTIV   nWBV    ASF  
0  2.0  27.0  0.0  1987  0.696  0.883  
1  2.0  30.0  0.0  2004  0.681  0.876  
2  NaN  23.0  0.5  1678  0.736  1.046  
3  NaN  28.0  0.5  1738  0.713  1.010  
4  NaN  22.0  0.5  1698  0.701  1.034  


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 373 entries, 0 to 372
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Subject ID  373 non-null    object 
 1   MRI ID      373 non-null    object 
 2   Group       373 non-null    object 
 3   Visit       373 non-null    int64  
 4   MR Delay    373 non-null    int64  
 5   M/F         373 non-null    object 
 6   Hand        373 non-null    object 
 7   Age         373 non-null    int64  
 8   EDUC        373 non-null    int64  
 9   SES         354 non-null    float64
 10  MMSE        371 non-null    float64
 11  CDR         373 non-null    float64
 12  eTIV        373 non-null    int64  
 13  nWBV        373 non-null    float64
 14  ASF         373 non-null    float64
dtypes: float64(5), int64(5), object(5)
memory usage: 43.8+ KB


In [4]:
# Dropping the list of columns mentioned
dataset = dataset.drop(['Subject ID', 'MRI ID', 'Hand', 'Visit', 'MR Delay'], axis=1)
dataset.shape

(373, 10)

In [5]:
dataset['Group'].value_counts()

Nondemented    190
Demented       146
Converted       37
Name: Group, dtype: int64

In [6]:
#replacing converted with demented
dataset['Group'] = dataset['Group'].replace(['Converted'], ['Demented'])

In [7]:
# Replacing the categorical variables
dataset['M/F'] = dataset['M/F'].replace(['F', 'M'], [0, 1])
dataset['Group'] = dataset['Group'].replace(['Demented', 'Nondemented'], [1, 0])

In [8]:
dataset

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,1,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,0,1,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,1,1,75,12,,23.0,0.5,1678,0.736,1.046
3,1,1,76,12,,28.0,0.5,1738,0.713,1.010
4,1,1,80,12,,22.0,0.5,1698,0.701,1.034
...,...,...,...,...,...,...,...,...,...,...
368,1,1,82,16,1.0,28.0,0.5,1693,0.694,1.037
369,1,1,86,16,1.0,26.0,0.5,1688,0.675,1.040
370,0,0,61,13,2.0,30.0,0.0,1319,0.801,1.331
371,0,0,63,13,2.0,30.0,0.0,1327,0.796,1.323


In [9]:
dataset.isnull().sum()

Group     0
M/F       0
Age       0
EDUC      0
SES      19
MMSE      2
CDR       0
eTIV      0
nWBV      0
ASF       0
dtype: int64

In [10]:
# Fill with mode values
dataset['SES'] = dataset['SES'].fillna(value= dataset['SES'].mode().iloc[0])
# Fill with mean values
dataset['MMSE'] = dataset['MMSE'].fillna(value =dataset['MMSE'].median())

In [11]:
dataset

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,1,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,0,1,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,1,1,75,12,2.0,23.0,0.5,1678,0.736,1.046
3,1,1,76,12,2.0,28.0,0.5,1738,0.713,1.010
4,1,1,80,12,2.0,22.0,0.5,1698,0.701,1.034
...,...,...,...,...,...,...,...,...,...,...
368,1,1,82,16,1.0,28.0,0.5,1693,0.694,1.037
369,1,1,86,16,1.0,26.0,0.5,1688,0.675,1.040
370,0,0,61,13,2.0,30.0,0.0,1319,0.801,1.331
371,0,0,63,13,2.0,30.0,0.0,1327,0.796,1.323


In [12]:
import os
import pandas as pd
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
import base64
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.primitives.hashes import SHA256
from sklearn.model_selection import train_test_split

# Function to generate a secure key using PBKDF2
def generate_key(password, salt):
    kdf = PBKDF2HMAC(
        algorithm=SHA256(),
        length=32,
        salt=salt,
        iterations=100000,  # You can adjust the number of iterations based on your security requirements
        backend=default_backend()
    )
    return kdf.derive(password)

# Encrypt using AES algorithm in CFB mode
def encrypt_df(df, key, iv):
    backend = default_backend()
    cipher = Cipher(algorithms.AES(key), modes.CFB(iv), backend=backend)

    encrypted_data = {}
    for col in df.columns:
        encrypted_col = []
        if col != 'Group':  # Skip encryption for the target variable
            for value in df[col]:
                encryptor = cipher.encryptor()
                # Convert the value to bytes
                value_bytes = str(value).encode('utf-8')
                # Encrypt the data
                encrypted_value = encryptor.update(value_bytes) + encryptor.finalize()
                encrypted_col.append(base64.b64encode(encrypted_value).decode())
        else:
            encrypted_col = df[col].tolist()

        encrypted_data[col] = encrypted_col

    encrypted_df = pd.DataFrame(encrypted_data)
    return encrypted_df

# Convert base64-encoded DataFrame to integer
def base64_to_int(df):
    return df.applymap(lambda x: int.from_bytes(base64.b64decode(x.encode()), byteorder='big'))

# Example password and salt (for demonstration purposes only)
password = b'SecurePassword123'
salt = os.urandom(16)
print(salt)

# Generate a secure key
key = generate_key(password, salt)

# Generate a random IV (Initialization Vector)
iv = os.urandom(16)
print(iv)

# Encrypt the DataFrame
dataset_encrypted = encrypt_df(dataset, key, iv)

# # Split the dataset into train and test
# train_encrypted, test_encrypted = train_test_split(loan_dataset_encrypted, test_size=0.2, random_state=42)

# # Convert DataFrame to base64
# train_base64 = train_encrypted.applymap(lambda x: base64.b64encode(x.encode()).decode())
# test_base64 = test_encrypted.applymap(lambda x: base64.b64encode(x.encode()).decode())

# # Convert base64-encoded DataFrame to integer for training and testing
# train_decrypted = base64_to_int(train_base64)
# test_decrypted = base64_to_int(test_base64)


b'd\xfa\xce\xb4\xe5\x8b\xfa\xda\xd4\xb3cL\t\x08\xc5\xe1'
b'\xd3F\x9d\xcf\x9e9\xfd4X\xa0\xe8\xd1\xb2\xa4\x97\\'


In [13]:
dataset_encrypted

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,/g==,9xY=,/hU=,/Q9q,/RZ0DA==,/w9q,/hhiCw==,/w9sBew=,/w9iBOk=
1,0,/g==,9xk=,/hU=,/Q9q,/BF0DA==,/w9q,/RFqCA==,/w9sBOs=,/w9iC+w=
2,1,/g==,+BQ=,/hM=,/Q9q,/RJ0DA==,/w9v,/hdtBA==,/w9tD+w=,/g9qCOw=
3,1,/g==,+Bc=,/hM=,/Q9q,/Rl0DA==,/w9v,/hZpBA==,/w9tDek=,/g9qDQ==
4,1,/g==,9xE=,/hM=,/Q9q,/RN0DA==,/w9v,/hdjBA==,/w9tDOs=,/g9qD+4=
...,...,...,...,...,...,...,...,...,...,...
368,1,/g==,9xM=,/hc=,/g9q,/Rl0DA==,/w9v,/hdjDw==,/w9sBe4=,/g9qD+0=
369,1,/g==,9xc=,/hc=,/g9q,/Rd0DA==,/w9v,/hdiBA==,/w9sC+8=,/g9qCA==
370,0,/w==,+RA=,/hI=,/Q9q,/BF0DA==,/w9q,/hJrBQ==,/w9iDOs=,/g9pD+s=
371,0,/w==,+RI=,/hI=,/Q9q,/BF0DA==,/w9q,/hJoCw==,/w9tBew=,/g9pDuk=


In [14]:
dataset_encrypted

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,/g==,9xY=,/hU=,/Q9q,/RZ0DA==,/w9q,/hhiCw==,/w9sBew=,/w9iBOk=
1,0,/g==,9xk=,/hU=,/Q9q,/BF0DA==,/w9q,/RFqCA==,/w9sBOs=,/w9iC+w=
2,1,/g==,+BQ=,/hM=,/Q9q,/RJ0DA==,/w9v,/hdtBA==,/w9tD+w=,/g9qCOw=
3,1,/g==,+Bc=,/hM=,/Q9q,/Rl0DA==,/w9v,/hZpBA==,/w9tDek=,/g9qDQ==
4,1,/g==,9xE=,/hM=,/Q9q,/RN0DA==,/w9v,/hdjBA==,/w9tDOs=,/g9qD+4=
...,...,...,...,...,...,...,...,...,...,...
368,1,/g==,9xM=,/hc=,/g9q,/Rl0DA==,/w9v,/hdjDw==,/w9sBe4=,/g9qD+0=
369,1,/g==,9xc=,/hc=,/g9q,/Rd0DA==,/w9v,/hdiBA==,/w9sC+8=,/g9qCA==
370,0,/w==,+RA=,/hI=,/Q9q,/BF0DA==,/w9q,/hJrBQ==,/w9iDOs=,/g9pD+s=
371,0,/w==,+RI=,/hI=,/Q9q,/BF0DA==,/w9q,/hJoCw==,/w9tBew=,/g9pDuk=


In [15]:
Y = dataset_encrypted['Group'].values
X = dataset_encrypted[['M/F', 'Age', 'EDUC', 'SES', 'MMSE', 'eTIV', 'nWBV', 'ASF']]

In [16]:
numerical_features = ['Age', 'EDUC',  'MMSE', 'eTIV', 'nWBV', 'ASF']
categorical_features = ['M/F', 'SES']

In [17]:
data_base64 = X.applymap(lambda x: base64.b64encode(x.encode()).decode())

# Convert base64-encoded DataFrame to integer
data_final = base64_to_int(data_base64)

In [18]:
X = data_final

In [19]:
data_final.nunique()

M/F       2
Age      39
EDUC     12
SES       5
MMSE     18
eTIV    286
nWBV    136
ASF     265
dtype: int64

In [20]:
X.dtypes

M/F     int64
Age     int64
EDUC    int64
SES     int64
MMSE    int64
eTIV    int64
nWBV    int64
ASF     int64
dtype: object

In [21]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)
print(x_train.shape)
print(x_test.shape)

(298, 8)
(75, 8)


In [22]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
model = SVC()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.42      0.97      0.59        32
           1       0.50      0.02      0.04        43

    accuracy                           0.43        75
   macro avg       0.46      0.50      0.32        75
weighted avg       0.47      0.43      0.28        75



In [23]:
from xgboost import XGBClassifier
# XGBClassifier
model = XGBClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.74      0.88      0.80        32
           1       0.89      0.77      0.82        43

    accuracy                           0.81        75
   macro avg       0.81      0.82      0.81        75
weighted avg       0.83      0.81      0.81        75



In [24]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# DecisionTreeClassifier
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(x_train, y_train)
y_pred_tree = tree_model.predict(x_test)

print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_tree))


Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.72      0.72        32
           1       0.79      0.79      0.79        43

    accuracy                           0.76        75
   macro avg       0.75      0.75      0.75        75
weighted avg       0.76      0.76      0.76        75



In [25]:
X

Unnamed: 0,M/F,Age,EDUC,SES,MMSE,eTIV,nWBV,ASF
0,795295037,964188477,795366717,793852273,3409887031222943037,3416095118673263933,3420265609209149245,3420265566258031421
1,795295037,964193085,795366717,793852273,3405361441363017021,3409865320146484541,3420265609207706429,3420265566272452413
2,795295037,725766461,795364669,793852273,3409869439036898621,3416090767851076925,3420265613533869885,3415762001007179581
3,795295037,725771069,795364669,793852273,3409906822432243005,3416079755554929981,3420265613537667901,3415762001024073021
4,795295037,964183357,795364669,793852273,3409873837083409725,3416090724901403965,3420265613536228157,3415762001021580349
...,...,...,...,...,...,...,...,...
368,795295037,964185405,795370301,795294065,3409906822432243005,3416090724938497341,3420265609209132093,3415762001021579325
369,795295037,964191037,795370301,795294065,3409898026339220797,3416090720606436669,3420265609222109245,3415762001006247229
370,796343613,726810941,795363645,793852273,3405361441363017021,3416062171959868733,3420265566291587901,3415761996726629181
371,796343613,726812989,795363645,793852273,3405361441363017021,3416062159094234429,3420265613504116541,3415761996731476797


In [26]:
import pandas as pd

# Create a DataFrame with the provided values
new_data = pd.DataFrame({
    'M/F': [1951481149],
    'Age': [1932217661],
    'EDUC': [1951356221],
    'SES': [1949264440],
    'MMSE': [8391721684730723645],
    'eTIV': [8381014391392779581],
    'nWBV': [8387508111361011517],
    'ASF': [8381315657576970045]
})

# Make predictions on the new data
predictions = model.predict(new_data)

# Display the predictions
print(predictions)

[1]


In [27]:
import pandas as pd

# Create a DataFrame with the provided values
new_data = pd.DataFrame({
    'M/F': [1950432573],
    'Age': [1986348349],
    'EDUC': [1951353149],
    'SES': [1949264440],
    'MMSE': [8370863949151812925],
    'eTIV': [8380969315727785277],
    'nWBV': [8387508136911253565],
    'ASF': [8387508111142905661]
})

# Make predictions on the new data
predictions = model.predict(new_data)

# Display the predictions
print(predictions)

[1]


In [28]:
import pandas as pd

# Create a DataFrame with the provided values
new_data = pd.DataFrame({
    'M/F': [1950432573],
    'Age': [1937067325],
    'EDUC': [1951355197],
    'SES': [1949264440],
    'MMSE': [8370881541337857341],
    'eTIV': [8380992435300416829],
    'nWBV': [8387508141426815037],
    'ASF': [8381315696249876541]
})

# Make predictions on the new data
predictions = model.predict(new_data)

# Display the predictions
print(predictions)

[1]


In [30]:
Y

array([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,