In [1]:
import numpy as np
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn import linear_model
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
p_d = pd.read_csv('physical.txt',sep='\t',header=0)

# This line constructs the linear regression. Mass is a function of Fore + Bicep + ...
# Result just calculates the coefficents and the Intercept along with 
# R value among other things.
result = smf.ols(formula='Mass ~ Fore + Bicep + Chest + Neck + Shoulder + Waist + Height + Calf + Thigh + Head', data=p_d).fit()


print('Parameters: ', result.params.keys())
print('Parameters: ', result.params.values)

#Vals_zero is just a dictionary that contains the coefficents. I'm constructing the prediction(s).
vals_zero = result.params
pred = vals_zero['Intercept'] + (p_d['Fore'] * vals_zero['Fore']) +  (p_d['Bicep'] * vals_zero['Bicep']) +  (p_d['Chest'] * vals_zero['Chest']) \
+ (p_d['Neck'] * vals_zero['Neck']) + (p_d['Shoulder'] * vals_zero['Shoulder']) + (p_d['Waist'] * vals_zero['Waist']) + (p_d['Height'] * vals_zero['Height']) \
+ (p_d['Calf'] * vals_zero['Calf']) + (p_d['Thigh'] * vals_zero['Thigh']) + (p_d['Head'] * vals_zero['Head'])

# This is the 
plt.figure()
#plt.xscale('linear')
#plt.yscale('linear')
plt.axhline(y=0, color='r', linestyle='dotted')

plt.scatter(pred, p_d['Mass'] - pred) # Residual = Observed (y) - Expected (regression)
plt.title('Residuals in Original Coordinates')
plt.xlabel('Fitted Value')
plt.ylabel('Residual value')
plt.show()

#ax = sns.regplot('Mass', 'aDelay', local, fit_reg=False, label='Data')



In [None]:
#Apply the cube root to the 
cbr = lambda x : np.power(x, 1/3)
mod_df = p_d.copy(deep=True)
mod_df['Mass'] = mod_df['Mass'].apply(cbr)

result = smf.ols(formula='Mass ~ Fore + Bicep + Chest + Neck + Shoulder + Waist + Height + Calf + Thigh + Head', data=mod_df).fit()


print('Parameters: ', result.params.keys())
print('Parameters: ', result.params.values)

vals = result.params.values
pred = vals[0] + mod_df['Fore'] * vals[1] +  mod_df['Bicep'] * vals[2] +  mod_df['Chest'] * vals[3] \
+ mod_df['Neck'] * vals[4] + mod_df['Shoulder'] * vals[5] + mod_df['Waist'] * vals[6] + mod_df['Height'] * vals[7] \
+ mod_df['Calf'] * vals[8] + mod_df['Thigh'] * vals[9] + mod_df['Head'] * vals[10]

print(pred)
plt.figure()
#plt.xscale('linear')
#plt.yscale('linear')
plt.axhline(y=0, color='r', linestyle='dotted')

plt.scatter(pred, mod_df['Mass'] - pred) # Residual = Observed (y) - Expected (regression)
plt.title('Residuals in Cube-Root Coordinates')
plt.xlabel('Fitted Value')
plt.ylabel('Residual value')
plt.show()



In [None]:
pred = vals[0] + mod_df['Fore'] * vals[1] +  mod_df['Bicep'] * vals[2] +  mod_df['Chest'] * vals[3] \
+ mod_df['Neck'] * vals[4] + mod_df['Shoulder'] * vals[5] + mod_df['Waist'] * vals[6] + mod_df['Height'] * vals[7] \
+ mod_df['Calf'] * vals[8] + mod_df['Thigh'] * vals[9] + mod_df['Head'] * vals[10]

pred2 = np.power(pred.values, 3)

plt.figure()

#plt.xscale('linear')
#plt.yscale('linear')
plt.axhline(y=0, color='r', linestyle='dotted')

plt.scatter(pred2, p_d['Mass'] - pred2) # Residual = Observed (y) - Expected (regression)
plt.title('Residuals in Cube-Root Coordinates')
plt.xlabel('Fitted Value')
plt.ylabel('Residual value')
plt.show()
