In [None]:
# Correlation

import math

def mean(data):
    return sum(data)/len(data)

def var(data):
    sum = 0
    for i in range(len(data)):
        sum = sum + (data[i] - mean(data))**2
    return sum

def cov(dt1, dt2):
    sum = 0
    for i in range(len(dt1)):
        sum += (dt1[i] - mean(dt1)) * (dt2[i] - mean(dt2))
    return sum

physics = [15.0, 12.0, 8.0, 8.0, 7.0, 7.0, 7.0, 6.0, 5.0, 3.0]
history = [10.0, 25.0, 17.0, 11.0, 13.0, 17.0, 20.0, 13.0, 9.0, 15.0]

mean_history = mean(history)
mean_physics = mean(physics)

var_physics = var(physics)
var_history = var(history)

cov = cov(physics, history)
std = math.sqrt(var_physics * var_history)

r = cov / std
print(round(r, 3))

In [None]:
# Slope

def mean(data):
    return sum(data) / len(data)

physics = [15.0, 12.0, 8.0, 8.0, 7.0, 7.0, 7.0, 6.0, 5.0, 3.0]
history = [10.0, 25.0, 17.0, 11.0, 13.0, 17.0, 20.0, 13.0, 9.0, 15.0]

mean_history = mean(history)
mean_physics = mean(physics)

var_physics = sum([(p - mean_physics)**2 for p in physics])
sum_phy_his = 0

for i in range(len(physics)):
    sum_phy_his += (physics[i] - mean_physics) * (history[i] - mean_history)
    
slope = sum_phy_his / var_physics
print(round(slope, 3))

In [None]:
# Regression

def mean(data):
    return sum(data) / len(data)

# Set data
physics = [15.0, 12.0, 8.0, 8.0, 7.0, 7.0, 7.0, 6.0, 5.0, 3.0]
history = [10.0, 25.0, 17.0, 11.0, 13.0, 17.0, 20.0, 13.0, 9.0, 15.0]

mean_physics = mean(physics)
mean_history = mean(history)

var_physics = sum([(p - mean_physics) ** 2 for p in physics])
sum_phy_his = 0
for i in range(len(physics)):
    sum_phy_his += (physics[i] - mean_physics) * (history[i] - mean_history)

# b = Σ(x - mean(x)) * (y - mean(y)) / Σ (x - mean(x))²
b = sum_phy_his / var_physics

# physics is x, variable independent
# history is y, variable dependent
# a = mean(y) - b * mean(x)
a = mean_history - b * mean_physics

result = b*10 + a
print(round(result, 1))

In [None]:
# Correlation

import math

def pearson(first_data, second_data, n):
    sum_first_data = sum(first_data)
    sum_second_data = sum(second_data)
    sum_data = sum([x*y for x, y in zip(first_data, second_data)])
    
    sum_first_data_sq = sum([x**2 for x in first_data])
    sum_first_data_mult_sq = sum_first_data ** 2
    sum_second_data_sq = sum([y**2 for y in second_data])
    sum_second_data_mult_sq = sum_second_data ** 2
    
    numerator = (n*sum_data) - (sum_first_data * sum_second_data)
    den_first_data = math.sqrt((n*sum_first_data_sq) - sum_first_data_mult_sq)
    den_second_data = math.sqrt((n*sum_second_data_sq) - sum_second_data_mult_sq)
    
    return round(numerator/(den_first_data*den_second_data), 2)


n = int(input())
mathematics = []
physics = []
chemistry = []

for i in range(n):
    elements = list(map(float, input().split()))
    mathematics.append(elements[0])
    physics.append(elements[1])
    chemistry.append(elements[2])
    
print(pearson(mathematics, physics, float(n)))
print(pearson(mathematics, chemistry, float(n)))
print(pearson(chemistry, physics, float(n)))

In [None]:
# Linear Regression

from sklearn import linear_model

features, rows = map(int, input().split())
X, Y = [], []

for i in range(rows):
    x = [0]
    elements = list(map(float, input().split()))
    for j in range(len(elements)):
        if j < features:
            x.append(elements[j])
        else:
            Y.append(elements[j])
    X.append(x)
    
model = linear_model.LinearRegression()
model.fit(X, Y)
a = model.intercept_
b = model.coef_

new_rows = int(input())
new_X = []

for i in range(new_rows):
    x = [0]
    elements = list(map(float, input().split()))
    for j in range(len(elements)):
        x.append(elements[j])
    new_X.append(x)
    
result = model.predict(new_X)

for i in range(len(result)):
    print(round(result[i], 2))

In [None]:
# Polynomial Regression

from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

features, rows = map(int, input().split())
X, Y = [], []

for i in range(rows):
    x = [0]
    elements = list(map(float, input().split()))
    for j in range(len(elements)):
        if j < features:
            x.append(elements[j])
        else:
            Y.append(elements[j])
    X.append(x)
    
poly = PolynomialFeatures(degree = 3)

model = linear_model.LinearRegression()
model.fit(poly.fit_transform(np.array(X)), Y)

new_rows = int(input())
new_X = []
for i in range(new_rows):
    x = [0]
    elements = list(map(float, input().split()))
    for j in range(len(elements)):
        x.append(elements[j])
    new_X.append(x)
    
result = model.predict(poly.fit_transform(np.array(new_X)))
for i in range(len(result)):
    print(round(result[i], 2))

In [None]:
# Linear Regression

import sys
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt

dataset = pd.read_csv('https://s3.amazonaws.com/hr-testcases/399/assets/trainingdata.txt', header = None)

plt.plot(dataset.iloc[:,0], dataset.iloc[:,1], 'ro')
plt.ylabel('Laptop battery life')
plt.show()

dataset = dataset[dataset.iloc[:, 1] < 8]

dataset.insert(0, len(dataset.columns), 0)

X = dataset.iloc[:,0:2].as_matrix()
Y = dataset.iloc[:,2].as_matrix()

model = linear_model.LinearRegression()
model.fit(X, Y)

timeCharged = float(input().strip())
result = model.predict([[0, timeCharged]])
if result[0] > 8:
    print(8.0)
else:
    print(round(result[0], 2))

In [None]:
# Basic Statistics

import numpy as np
import math
import statistics as stpy
from scipy import stats

def mean_confidence_interval(length, mean, stdev):
    return 1.96 * (stdev/math.sqrt(length))

total = int(input())
numbers = list(map(int, input().split()))

mean = np.mean(numbers)
median = np.median(numbers)
mode = int(stats.mode(numbers)[0])
stdev = stpy.pstdev(numbers)
confidence_interval = mean_confidence_interval(total, mean, stdev)
min_confidence = round(mean - confidence_interval, 1)
max_confidence = round(mean + confidence_interval, 1)

print(round(mean, 1))
print(round(median, 1))
print(mode)
print(round(stdev, 1))
print("{} {}".format(min_confidence, max_confidence))

In [None]:
# Support vector machine

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.svm import SVR

url = "https://s3.amazonaws.com/hr-testcases/399/assets/trainingdata.txt"
names = ["class", "hours"]
dataset = pd.read_csv(url, names = names)

arr = sorted(dataset.values, key = lambda a:a[1])
X = np.array([x[0] for x in arr])
X.sort()
X = X.reshape(-1, 1)
y = np.array([y[1] for y in arr]).ravel()

svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
y_rbf = svr_rbf.fit(X, y).predict(X)

n = float(input())

print(*svr_rbf.predict(np.array([n]).reshape(-1, 1)))

In [None]:
# Polynomial Regression

import numpy as np
import matplotlib.pyplot as plt

def Main():
    f, N = map(int, input().split())
    
    X = []
    y = []
    
    for _ in range(N):
        data = list(map(float, input().split()))
        X.append(data[0])
        y.append(data[1])
        
    np_x = np.array(X)
    np_y = np.array(y)
    
    np_x = np.sort(np_x, kind = 'mergesort')
    
    n = int(input())
    predict = []
    
    for _ in range(n):
        predict.append(float(input()))
        
    plot(np_x, np_y, predict)
    
    
def plot(x, y, predicted):
    p1 = np.polyfit(x, y, deg = 1)
    p2 = np.polyfit(x, y, deg = 3)
    p3 = np.polyfit(x, y, deg = 5)
    
    pl1 = plt.plot(x, np.polyval(p1, x), 'r-', label = 'Degree 1')
    pl2 = plt.plot(x, np.polyval(p2, x), 'b--', label = 'Degree 1')
    pl3 = plt.plot(x, np.polyval(p3, x), 'm:', label = 'Degree 1')
    
    plt.legend(['d1', 'd3', 'd5'])
    
    plt.show()
    
    
if __name__ == "__main__":
    Main()