In [4]:
from sklearn import datasets
import numpy as np
from sklearn import svm
from sklearn.linear_model import LinearRegression
import optunity
import optunity.metrics
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor as mlpr
import copy
import random    


def build_set_accordance_bitmask(cur_bitmask, X, Y):
    X_train = []
    Y_train = []
    
    X_test = []
    Y_test = []
    
    train_set_size = 0
    test_set_size = 0
    
    for i in range(len(X)):
        if cur_bitmask[i] == bin(1)[2:][0]:
            X_train.append(X[i])
            Y_train.append([Y[i]])
            train_set_size += 1
        else:
            X_test.append(X[i])
            Y_test.append([Y[i]])
            test_set_size += 1
    
    return X_train, Y_train, X_test, Y_test
    
def Train_and_Calc_Q(X_train, Y_train, X_test, Y_test):
    model = LinearRegression()
    model.fit(X_train, Y_train)
    predictions = model.predict(X_test)
    
    Q = optunity.metrics.mse(Y_test, predictions)
    
    return Q
  

def CCV(X, Y, model_name = 'linear_regression'):
    Q = 0.0
    
   # print(2**(len(X)))
    
    for mask in range(1, 2**(len(X))-1):
        cur_bitmask = bin(mask)[2:].zfill(len(X))
        
        X_train, Y_train, X_test, Y_test = build_set_accordance_bitmask(cur_bitmask, X, Y)
    
        Q += Train_and_Calc_Q(X_train, Y_train, X_test, Y_test)
    
        if mask % 10000 == 0:
            print("#", end = '')
        
    print()
    return Q / (2**(len(X))-1)
    
        
        
def Get_Data():      
    data = datasets.load_diabetes()
    x = data.data
    y = data.target
    
    size_of_small_dataset = 20
        
    X = []
    Y = []
    
    for i in range(size_of_small_dataset):
        index = random.randint(0, len(x)-1)
        X.append(x[index])
        Y.append(y[index])
        
    return X, Y
        

def Get_Data_Random_Subsets(X, Y):
    rand_gen = random.Random()
    mask = rand_gen.randint(1, 2**len(X) - 1)
    cur_bitmask = bin(mask)[2:].zfill(len(X))
    
    X_train, Y_train, X_test, Y_test = build_set_accordance_bitmask(cur_bitmask, X, Y)
    
    return X_train, Y_train, X_test, Y_test
  
def max_bound(x, y, N = 100):    
    Q_list = []
    
    for n in range(N):
        X_train, Y_train, X_test, Y_test = Get_Data_Random_Subsets(x, y)
        
        Q_n = Train_and_Calc_Q(X_train, Y_train, X_test, Y_test)
        Q_list.append(Q_n)
        
    max_Q = max(Q_list)
    
    cnt_of_max_Q = Q_list.count(max_Q)
    
    print('F(cnt_of_max_Q): ', cnt_of_max_Q / N)
    print('1/(N+1): ', 1/(N+1))
    


def Two_side_interval(x, y, N = 120):
    Q_list = []
    
    for n in range(N):
        X_train, Y_train, X_test, Y_test = Get_Data_Random_Subsets(x, y)
        
        Q_n = Train_and_Calc_Q(X_train, Y_train, X_test, Y_test)
        Q_list.append(Q_n)
        
    max_Q = max(Q_list)
    cnt_of_max_Q = Q_list.count(max_Q)
    
    min_Q = min(Q_list)
    cnt_of_min_Q = Q_list.count(min_Q)
    
    print('Number_of_occurences(cnt_of_max_Q): ', cnt_of_max_Q / N)
    print('Number_of_occurences(cnt_of_min_Q): ', cnt_of_min_Q / N)
    print('1/(N+1): ', 2/(N+1))

     
def main():  
    X, Y = Get_Data()
    Q = CCV(X, Y, 'linear_regression')
    print(Q)
    
    max_bound(X, Y)
    print('-' * 20)
    Two_side_interval(X, Y)

main()    

########################################################################################################
3314141655.932523
F(cnt_of_max_Q):  0.01
1/(N+1):  0.009900990099009901
--------------------
Number_of_occurences(cnt_of_max_Q):  0.008333333333333333
Number_of_occurences(cnt_of_min_Q):  0.008333333333333333
1/(N+1):  0.01652892561983471
