### Importing libraries

In [22]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.naive_bayes import GaussianNB

from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LinearRegression

### Loading data

##### Processed dataset with pre-selecter features (by Random Forest)

In [23]:
# data_train = pd.read_csv('data/processed/train_meaningfull.csv')
# data_test = pd.read_csv('data/processed/test_meaningfull.csv')

##### Raw dataset with (>500 features)

In [24]:
data_train = pd.read_csv('data/raw/train.csv')
data_test = pd.read_csv('data/raw/test.csv')

##### Processing data

In [25]:
X_train = data_train.drop('Activity', axis='columns')
y_train = data_train['Activity']
y_train_encoded = pd.get_dummies(y_train)

In [26]:
X_test = data_test.drop('Activity', axis='columns')
y_test = data_test['Activity']
y_test_encoded = pd.get_dummies(y_test)

### Testing performance of different models

#### Trying Linear Discriminant Analysis (LDA)

In [27]:
lda = LDA()

lda.fit(X=X_train, y=y_train)
lda.score(X=X_test, y=y_test)

0.9643705463182898

#### Trying Gaussian Naive Bayes Classifier

In [28]:
clf = GaussianNB()

clf.fit(X=X_train, y=y_train)
clf.score(X=X_test, y=y_test)

0.7702748557855447

#### Trying Linear Regression (already with cross-validation)

In [29]:
linReg = LinearRegression()

linReg.fit(X=X_train, y=y_train_encoded)
linReg.score(X=X_test, y=y_test_encoded)

0.7746125566264807

### Joining LDA transformation with Linear and Logistic Regression

In [30]:
lda = LDA()

X_transformed_train = lda.fit_transform(X=X_train, y=y_train)
X_transformed_test = lda.transform(X=X_test)

score_lda = lda.score(X=X_test, y=y_test)
print('LDA test score =', score_lda)

  
logReg = LogisticRegressionCV(solver='liblinear')

logReg.fit(X=X_transformed_train, y=y_train)
score_logisticReg = logReg.score(X=X_transformed_test, y=y_test)
print('Logistic Regression test score =', score_logisticReg)

linearReg = LinearRegression()

linearReg.fit(X=X_transformed_train, y=y_train_encoded)
score_linearReg = linearReg.score(X=X_transformed_test, y=y_test_encoded)
print('Linear Regression test score =', score_linearReg)

LDA test score = 0.9643705463182898
Logistic Regression test score = 0.9630132337970818
Linear Regression test score = 0.7748996203372972
