In [1]:
import csv
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import json
import zipfile

# Version 4/7: q2 only supports two curves and mse. noise supports normal and uniform.

def noise(low, high, d='normal'):
    if d == 'normal':
        num = np.random.randn()
        # Throw away if num > 3 or < -3 (occurs less than 1% of the time)
        while num > 3 or num < -3:
            num = np.random.randn()
        num = num / 6 + 0.5
        noise = num * (high - low) + low
        #return np.random.randn()
        return noise 
    elif d == 'uniform':
        noise = np.random.uniform(low, high) 
        return noise
    
    # Could further implement other distribution
    elif d == '':
        return 0
    else:
        raise NotImplementedError

# Take in a given frequency, data and volumes we can play. volumes MUST be in INCREASING order
def q1(data, freq, volumes, noise_max=5, noise_min=0, noise_type='normal'):
    x = data[freq]
    #x = np.random.choice(x) + noise(x.min()*0.1, x.max()*0.1)
    x = x + noise(noise_min, noise_max, noise_type)
    for i in np.arange(len(volumes)):
        if x < volumes[i]:
            return volumes[i]
    return volumes[-1]


def cov_calc(nums):
    # Parameter to be set
    k = 0.5
    ls = []
    
    for num in nums:
        result = num[0][0] - num[0][1]
        if result < 0: 
            result *= k 
        result = result**2
        ls.append(result)
    return np.array(ls)

def mse_calc(original, curves):
    k = 1/3
    ls = []

    for curve in curves:
        if len(original) == len(curve):
            mse = np.mean([(original[i] - curve[i])**2 if original[i] - curve[i] > 0 
                           else (k*(original[i] - curve[i]))**2 for i in np.arange(len(curve))])
            ls.append(mse)
            
    return np.array(ls)

# Returns a number in [0, len(curves)+1] for answer. 
# len(curves) represent "equally clear", and len(curves)+1 represent "equally unclear"
def q2(data, curves, similar_thres=1, unclear_thres=20, mse_cov_weight=[1, 0], noise_max=5, noise_min=0, noise_type='normal'):
    
    x = data.to_numpy()
    assert(len(data) == len(curves[0]))

    #error = noise(x.min()*0.1, x.max()*0.1)
    error = noise(noise_min, noise_max)

    mse_scores = mse_calc(x+error, curves)
    cov_scores = cov_calc([np.cov(x+error, curve) for curve in curves])
    scores = mse_cov_weight[0] * mse_scores + mse_cov_weight[1] * cov_scores 
    
    # unclear_thres = np.mean(x**2) 
    # similar_thres = unclear_thres * 0.1
    
    if np.all(scores > unclear_thres):
        # Both unclear
        return len(curves)+1
    elif abs(scores[0] - scores[1]) < similar_thres:
        # Both clear
        return len(curves)
    else:
        return np.argmin(scores)