In [47]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
import os

In [48]:
# train logistic regression model
def train_lr(x, y):
    lr_model = LogisticRegression(penalty='none', solver='lbfgs').fit(x, y)
    n_feature = x.shape[1]
    theta = []
    for i in range(0, n_feature + 1):
        if i == 0:
            theta.append(lr_model.intercept_[0])
        else:
            theta.append(lr_model.coef_[0][i - 1])
    return (lr_model, theta)

In [49]:
Children_test = "Image/Children_test"
Children_train = "Image/Children_train"
Adults_test = "Image/Adults_test"
Adults_train = "Image/Adults_train"


def read_img_batch(path, endpoint=None):
    container = []
    for root, dirs, files in os.walk(path):
        for file in files:
            path = os.path.join(root, file)
            container.append((cv.imread(path, cv.IMREAD_GRAYSCALE)))
    return container


# read image from each group
x_children_train = read_img_batch(Children_train)
x_children_test = read_img_batch(Children_test)
x_adults_train = read_img_batch(Adults_train)
x_adults_test = read_img_batch(Adults_test)

# set label according to each image set
# children 0; adults 1
y_children_train = np.zeros(len(x_children_train), dtype=int)
y_children_test = np.zeros(len(x_children_test), dtype=int)
y_adults_train = np.ones(len(x_adults_train), dtype=int)
y_adults_test = np.ones(len(x_adults_test), dtype=int)

# combine training set and testing set
x_train = np.array(x_children_train + x_adults_train)
y_train = np.append(y_children_train, y_adults_train)
x_test = np.array(x_children_test + x_adults_test)
y_test = np.append(y_children_test, y_adults_test)

def cross_validation(model, x, y, k):
    kf = StratifiedKFold(n_splits=k)
    mean_squared_errors = []
    for train, test in kf.split(x):
        model.fit(x.iloc[train, :], y.iloc[train])
        y_pred = model.predict(x.iloc[test, :])
        mean_squared_errors.append(mean_squared_error(y[test], y_pred))
    mean_err = np.array(mean_squared_errors).mean()
    std_dev = np.array(mean_squared_errors).std()
    return (mean_err, std_dev)

def plotter_c_error(c_values, mean_square_errors, std_devs, title):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title(title)
    ax.set_xlabel(r"$C$")
    ax.set_ylabel("Mean squared error")
    ax.errorbar(c_values, mean_square_errors, yerr=std_devs, label='mean squared error')
    ax.legend()
    plt.show()

In [51]:
x_train.shape

AttributeError: 'list' object has no attribute 'shape'

In [50]:
model = train_lr(x_train, y_train)
mean_err, std_dev = cross_validation(model, x_train, y_train, 5)

ValueError: Found array with dim 3. Estimator expected <= 2.