In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from itertools import combinations
import statsmodels.api as sm

In [2]:
def import_data(experimental_file, response_file, response):
    'imports data'
    'experimental_file = experimental design csv filename i.e experimental.csv'
    'response_file = results csv filename i.e Response.csv'
    'response = reponse name: i.e rheomix final deg time min or rheomix stability time min'
    
    experimental_df = pd.read_csv(experimental_file)
    response_df = pd.read_csv(response_file)
    
    X = experimental_df[experimental_df.columns.values.tolist()[1:]].values
    y = response_df[response].values
    max1 = max(y)
    min1 = min(y)

    y_norm = [2*((i-min1)/(max1-min1)) - 1 for i in y]

    X_linear = X
    
    return y_norm, X_linear, experimental_df, response_df
    

In [3]:
def total_fit(y, X_total):
    'fits model of all linear terms to obtain benchmark AIC'
    'AIC_prev_min = AIC value of previous step in stepwise regression'
    'AIC_cur_min = AIC value of current step in stepwise regression'
    
    model = sm.OLS(y, X_total)
    results = model.fit()
    AIC_prev_min = results.aic + 1
    AIC_cur_min = results.aic
    
    return AIC_prev_min, AIC_cur_min

In [4]:
def model_type_func(array_, indexs, model_type):
    
    if len(indexs) == 2:
        
        if indexs[1]== 'inv' or indexs[1]== 'log':
            
            if indexs[1] == 'inv':
                return 1/(array_[:, indexs[0]])
            if indexs[1] == 'log':
                return np.log(array_[:, indexs[0]])
            
        else:
            if model_type == 'scheffe':
                return array_[:, indexs[0]]*array_[:, indexs[1]]
            if model_type == 'becker':
                return (array_[:, indexs[0]]*array_[:, indexs[1]])/(array_[:, indexs[0]]+array_[:, indexs[1]])

    if len(indexs) == 3:
        if model_type == 'scheffe':
            return array_[:, indexs[0]]*array_[:, indexs[1]]*array_[:, indexs[2]]
        if model_type == 'becker':
            return (array_[:, indexs[0]]*array_[:, indexs[1]]*array_[:, indexs[2]])/(array_[:, indexs[0]]+array_[:, indexs[1]]+array_[:, indexs[2]])
    
def model_terms_name(list_, terms, indexs, model_type):
    
    if len(terms) == 1:
        
        if indexs[1] == 'inv':
            list_.append(['1' + '/' + terms[0],  indexs[0], indexs[1]])
        if indexs[1] == 'log':
            list_.append(['log' + '(' + terms[0] + ')',  indexs[0], indexs[1]])

    
    if len(terms) == 2:

        if model_type == 'scheffe':
            list_.append([terms[0] + '*' + terms[1],  indexs[0], indexs[1]])
        if model_type == 'becker':
            list_.append(['(' + terms[0] + '*' + terms[1]+ ')' + '/' +  '(' + terms[0] + '+' + terms[1] + ')', indexs[0], indexs[1]])
     
    if len(terms) == 3:
        
        if model_type == 'scheffe':
            list_.append([terms[0] + '*' + terms[1] + '*' + terms[2],  indexs[0], indexs[1], indexs[2]])
        if model_type == 'becker':
            list_.append(['(' + terms[0] + '*' + terms[1] + '*' + terms[2] + ')' + '/' +  '(' + terms[0] + '+' + terms[1] + '+' + terms[2]  + ')', indexs[0], indexs[1], indexs[2]])

In [5]:
def model_terms_list(experimental_df, response_df, model_type, order, inv_log):
    'creates list of terms with key in current model'
    'creates list of possible terms with key to be added'
    
    linear_terms = experimental_df.columns.values.tolist()[1:]
    
    model_terms = []
    for i in range(len(linear_terms)):

        term = linear_terms[i]
        key = i
        model_terms.append([term, i])
     

    for i in range(len(linear_terms)):
        for j in range(len(linear_terms)): 
            if i < j:
                
                model_terms_name(model_terms, [linear_terms[i], linear_terms[j]], [i, j], model_type)
                
            
    if order == 3:
        for i in range(len(linear_terms)):
            for j in range(len(linear_terms)): 
                for k in range(len(linear_terms)):
                    if i < j:
                        if j < k:
                            
                            model_terms_name(model_terms, [linear_terms[i], linear_terms[j], linear_terms[k]], [i, j, k], model_type)
                            
    
    if inv_log == 'log' or inv_log == 'inv':

        for i in range(len(linear_terms)):
            model_terms_name(model_terms, [linear_terms[i]], [i, inv_log], model_type)

        
    return model_terms

In [6]:
def X_total(model_terms, X_linear, model_type):
    X = X_linear
    for i in model_terms:

        if len(i) == 3:

            j, k = i[1], i[2]
            add_term_cur = model_type_func(X_linear, [j, k], model_type)
            X_new = np.column_stack((X, add_term_cur))
            X = X_new
            
        if len(i) == 4:

            j, k, l  = i[1], i[2], i[3]
            add_term_cur = model_type_func(X_linear, [j, k, l], model_type)
            X_new = np.column_stack((X, add_term_cur))
            X = X_new
            
    return X
    

In [7]:
def model_fit(experimental_file, response_file, response, model_type, order, inv_log):

    y, X_linear, experimental_df, response_df = import_data(experimental_file, response_file, response)
    
    model_terms = model_terms_list(experimental_df, response_df, model_type, order, inv_log)
    X = X_total(model_terms, X_linear, model_type)
    
    AIC_prev_min, AIC_cur_min = total_fit(y, X)

    cntt = 0
    while AIC_cur_min < AIC_prev_min:
        cntt += 1
        
        AIC_prev_min = AIC_cur_min

        cnt2 = 0
        cnt3 = 0
        cnt4 = 0
        for i, j in enumerate(model_terms):


            if len(j) > 2:

                X_new = np.delete(X, i, axis = 1)
                cnt3 = 2

            if len(j) == 2:

                cnt3 = 1
            
            if cnt3 == 2:
            
                new_model = sm.OLS(y, X_new)
                new_results = new_model.fit()
                AIC = new_results.aic

                if AIC < AIC_cur_min:

                    AIC_cur_min = AIC
                    X_updated = X_new
                    term_key = j

                    sol_results = new_results
                    cnt2 = 1


        if AIC_cur_min < AIC_prev_min and cnt2 == 1:

            model_terms.remove(term_key)
            X = X_updated
            final = sol_results


    return final, model_terms, X, y

In [8]:
'model_fit(experimental_file, response_file, response, model_type, order, inv_log)'

'repsonse = rheomix final deg time min or rheomix stability time min'
test1 = 'rheomix final deg time min'
test2 = 'rheomix stability time min'

'model type = scheffe or becker'
'order = 2 or 3 for second order or special cubic'
'inv_log = inv, log or None to add inverse terms, log terms or neither'

final, terms, X, y = model_fit('experimental.csv', 'Response.csv', test2, 'scheffe', 3, None)
final.summary(), terms, final.params

  return 1 - np.divide(self.nobs - self.k_constant, self.df_resid) * (1 - self.rsquared)
  return 1 - np.divide(self.nobs - self.k_constant, self.df_resid) * (1 - self.rsquared)
  return self.ssr/self.df_resid
  return np.dot(wresid, wresid) / self.df_resid


(<class 'statsmodels.iolib.summary.Summary'>
 """
                             OLS Regression Results                            
 Dep. Variable:                      y   R-squared:                       1.000
 Model:                            OLS   Adj. R-squared:                    nan
 Method:                 Least Squares   F-statistic:                     0.000
 Date:                Fri, 30 Aug 2019   Prob (F-statistic):                nan
 Time:                        16:58:51   Log-Likelihood:                 1391.6
 No. Observations:                  46   AIC:                            -2691.
 Df Residuals:                       0   BIC:                            -2607.
 Df Model:                          45                                         
 Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
 ---------------------------------------------------------------------