In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import svm

#### Read the dataset

In [None]:
df = pd.read_csv('APIC_ESTEE_LDA_SVM.csv')
df.dtypes

In [None]:
# df.columns

In [None]:
# select the features and outcomes for further analyses

module1_df = df.loc[:, ['PM2.5_group', 'BC_group', 'mcp_1', 'leptin', 'PAI_1', 'TNFa']]
module1_df.columns = ['PM2.5_Group', 'BC_group', 'MCP_1', 'leptin', 'PAI_1', 'TNF_a']

module1_data = module1_df.loc[:, ['MCP_1', 'leptin', 'PAI_1', 'TNF_a']]
module1_target = module1_df.loc[:, ['PM2.5_Group']]

In [None]:
# standarlize the module1_data
def standarlize (arr):
    arr_mean = np.mean(arr)
    arr_sd = np.std(arr)
    return (arr - arr_mean) / arr_sd

# test_arr = np.array([1, 2, 3, 4, 5])
# standarlize(test_arr)

module1_data = module1_data.apply(standarlize, axis=0)
module1_data.head()

#### Try to SVM on module1_df: 10-fold cross-validation

In [None]:
from sklearn.model_selection import train_test_split

module1_data_train, module1_data_validate, module1_target_train, module1_target_test = train_test_split(module1_data, module1_target, test_size=0.4, random_state=0)

In [None]:
print(module1_data_train.shape, "  ", module1_target_train.shape)
print(module1_data_validate.shape, "  ", module1_target_test.shape)

In [None]:
# help(svm.SVC)
# AND fit SVM with linear kernel

# apic_svm = svm.SVC(kernel='linear', C=1).fit(module1_data_train, module1_target_train)
# apic_svm.score(module1_data_validate, module1_target_test)
# score : 0.61

# we may change the kernel
apic_svm = svm.SVC(kernel='sigmoid', C=1).fit(module1_data_train, np.ravel(module1_target_train))
apic_svm.score(module1_data_validate, np.ravel(module1_target_test))

In [None]:
from sklearn.model_selection import cross_val_score

apic_svm = svm.SVC(kernel='poly', C=1)
scores = cross_val_score(apic_svm, module1_data, np.ravel(module1_target), cv=5, scoring='roc_auc')
print("Scores: ", scores)

In [None]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

In [None]:
# Another option is to use an iterable yielding (train, test) splits as arrays of indices
# review iterator: to design a iterator and output 0 1 1 2 3 5 8 ...

class fib_iterator:
    def __iter__(self):
        self.curr_num = 0
        self.next_num = 1
        return self
    
    def __next__(self):
        if self.curr_num < 500:
            x = self.curr_num
            self.curr_num = self.next_num
            self.next_num = self.curr_num + x
            return x
        else:
            raise StopIteration
    
myiter = iter(fib_iterator())

'''
for x in myiter:
    print(x)
'''