In [2]:
from src.features import pse_knc
from src.data import Species, load_psi
from src.experiment.report import Report
from src.experiment.k_fold_report import KFoldReport

from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [25]:
human_test_dataset = load_psi(Species.human, True)
human_train_dataset = load_psi(Species.human, False)

mouse_train_dataset = load_psi(Species.mouse, False)

yeast_test_dataset = load_psi(Species.yeast, True)
yeast_train_dataset = load_psi(Species.yeast, False)

In [26]:
encoder = pse_knc.Encoder()

In [21]:
def experiment(k, train_dataset, test_dataset, model_factory):
    k_fold = KFold(n_splits=5, shuffle=True, random_state=42)

    x = encoder.fit_transform(train_dataset.samples)
    y = train_dataset.targets

    reports = []
    for train_index, test_index in k_fold.split(x):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]

        _model = model_factory()
        _model.fit(x_train, y_train)
        
        reports.append(Report.create_report(_model, (x_test, y_test)))
    
    k_fold_report = KFoldReport(reports)
    test_x = encoder.fit_transform(test_dataset.samples)
    test_y = test_dataset.targets

    model = model_factory()
    model.fit(x, y)
    
    report = Report.create_report(model, (test_x, test_y))

In [22]:
experiment(5, human_train_dataset, human_test_dataset, lambda: LogisticRegression())

0.51
