In [49]:
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
import math
import csv
import pandas as pd


def generate_selections(correlation, mean_x, mean_y, std_x, std_y, size):
    cov = correlation * std_x * std_y
    cov = correlation
    return stats.multivariate_normal(mean=[mean_x, mean_y], cov=[[std_x, cov], [cov, std_y]]).rvs(size=size)

def generate_complex_selection(correlation1, mean_x1, mean_y1, std_x1, std_y1, correlation2, mean_x2, mean_y2, std_x2,
                               std_y2, size):

    selection_1 = generate_selections(correlation1, mean_x1, mean_y1, std_x1, std_y1, size)
    selection_2 = generate_selections(correlation2, mean_x2, mean_y2, std_x2, std_y2, size)

    return 0.9 * selection_1 + 0.1 * selection_2

def columns(sample):
    return (sample[:,0], sample[:,1])

def to_sample(x, y):
    return np.concatenate((x, y), axis=1)        

def get_quadrant_counter(x1, y1, offset=(0,0)):
    functions = (
        lambda x, y: x > offset[0] and y > offset[1],
        lambda x, y: x < offset[0] and y > offset[1],
        lambda x, y: x < offset[0] and y < offset[1],
        lambda x, y: x > offset[0] and y < offset[1]
    )
    
    def counter(quadrant_n):
        nonlocal functions
        nonlocal x1, y1
        c = 0
        
        for i, j in zip(x1, y1):
            if (functions[quadrant_n - 1])(i, j):
                c +=1
        return c
    return counter

def quadrantr(sample):
    c = get_quadrant_counter(*columns(sample), (np.median(sample[:,0]), np.median(sample[:,1])))
        
    return ((c(1) + c(3)) - (c(2) + c(4))) / len(sample)
        
def research_generator():
    functions = (
        (lambda x:stats.spearmanr(x, axis=0)[0], 'Spearman'),
        (lambda x: stats.pearsonr(*columns(x))[0], 'Pearson'),
        (quadrantr, 'Quadrant')
    )
    
    for f in functions:
        yield f

def var_mean_to_csv(filename, res):
    frame = pd.DataFrame.from_dict(res)
    f1 = pd.DataFrame.from_dict(np.mean(frame)).transpose()
    f1.index = ['E(x)']
    f2 = pd.DataFrame.from_dict(np.var(frame)).transpose()
    f2.index = ['D(x)']
    f = pd.concat([f1,f2]).round(4)
    f.to_csv(filename + '.csv')

def do_research(get_sample, table_name, repetitions=1000):
    res = dict()
    
    for i in range(repetitions):
        # sample = generate_selections(cor, 1, 1, 1, 1, size)
        sample = get_sample()
        for research in research_generator():
            fun, f_name = research
            try:
                res[f_name] += [fun(sample)]
            except KeyError:
                res[f_name] = [fun(sample)]
    
    var_mean_to_csv(table_name, res)
    # var_mean_to_csv(str(cor) + '-' + str(size), res)
        

In [None]:
getters = (
    lambda size, c: generate_selections(c, 0, 0, 1, 1, size),
    lambda size: generate_complex_selection(0.9, 0, 0, 1, 1, -0.9, 0, 0, 10, 10, size)
)
sizes = [20, 60, 100]
cor_list = [0, 0.5, 0.9]

for size in sizes:
#     for c in cor_list: 
#         do_research(lambda : getters[0](size, c), str(c) + '-' + str(size))
    
    do_research(lambda : getters[1](size), 'complex-' + str(size))