In [21]:
import pandas as pd
import numpy as np
from itertools import combinations

In [14]:
def get_slope(X, Y):
    z = np.polyfit(X, Y, 1)
    p = np.poly1d(z)
    xp = np.linspace(0, 22, 100)
    return z[0]

In [15]:
def get_slope_diff(diet_1_data, diet_2_data):
    X1, Y1 = diet_1_data
    X2, Y2 = diet_2_data
    return (get_slope(X1, Y1) - get_slope(X2, Y2))

In [16]:
def shuffle_diet_labels(data):
    new_df = pd.DataFrame(columns=["weight","Time","Chick","Diet"])
    for i in range(22):
        temp = data.loc[data['Time']==i]
        count_row = temp.shape[0]
        if(count_row==0):
            continue
        else:
            temp['Diet'] = np.random.permutation(temp['Diet'].values)
            new_df = pd.concat([new_df, temp], ignore_index=True)
    return new_df

In [43]:
def get_slope_diff_count(data, observed_slope_diff, diet_1, diet_2, num_shuffles):
    count = 0
    weight = 0
    time = 1 
    for i in range(num_shuffles):
        if (i>0 and i%20 == 0):
            print("Done: ", i)
        new_df = shuffle_diet_labels(data)
        new_df_diet_1 = new_df[:][new_df.Diet == diet_1]
        new_df_diet_2 = new_df[:][new_df.Diet == diet_2]

        new_df_diet_1_values = new_df_diet_1.values
        new_df_diet_2_values = new_df_diet_2.values

        X1 = np.array(new_df_diet_1_values[:,time], dtype=float)
        Y1 = np.array(new_df_diet_1_values[:,weight], dtype=float)
        X2 = np.array(new_df_diet_2_values[:,time], dtype=float)
        Y2 = np.array(new_df_diet_2_values[:,weight], dtype=float)

        slope_diff = get_slope_diff((X1,Y1),(X2,Y2))   
        if observed_slope_diff < 0 and slope_diff <= observed_slope_diff:
            count += 1
        elif observed_slope_diff >= 0 and slope_diff >= observed_slope_diff:
            count += 1
    return count


In [44]:
observed_diff = {(1,2): -1.769,
                 (1,3): -4.5828,
                 (1,4):  -2.874,
                 (2,3): -2.8137,
                 (2,4): -1.105,
                 (3,4):1.7085}

In [45]:
def slope_diff_sig_test(chickweight):
    unique_diets = chickweight.Diet.unique()
    diet_comparisons = list(combinations(unique_diets, 2))
    num_shuffles = 100
    for ele in diet_comparisons:
        observed_slope_diff = observed_diff[ele]
        diet_1, diet_2 = ele
        data = chickweight.loc[(chickweight['Diet'] == diet_1) | (chickweight['Diet'] == diet_2)]
        count = get_slope_diff_count(data, observed_slope_diff, diet_1, diet_2, num_shuffles)
        ######################################
        #
        # Output
        #
        ######################################
        print("**********Diet: ", diet_1, " vs Diet: ", diet_2,"**********")

        print ("Observed difference of two slopes: %.2f" % observed_slope_diff)
        print (count, "out of", num_shuffles, "experiments had a difference of two means", end=" ")
        if observed_slope_diff < 0:
            print ("less than or equal to", end=" ")
        else:
            print ("greater than or equal to", end=" ")
        print ("%.2f" % observed_slope_diff, ".")
        print ("The chance of getting a difference of two means", end=" ")
        if observed_slope_diff < 0:
            print ("less than or equal to", end=" ")
        else:
            print ("greater than or equal to", end=" ")
        print ("%.2f" % observed_slope_diff, "is", (count / float(num_shuffles)), "\n")

In [46]:
chickweight = pd.read_csv('chick_weight.csv')
chickweight = chickweight.drop(['Unnamed: 0'], axis=1)

slope_diff_sig_test(chickweight)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Done:  20
Done:  40
Done:  60
Done:  80
**********Diet:  1  vs Diet:  2 **********
Observed difference of two slopes: -1.77
1 out of 100 experiments had a difference of two means less than or equal to -1.77 .
The chance of getting a difference of two means less than or equal to -1.77 is 0.01 

Done:  20
Done:  40
Done:  60
Done:  80
**********Diet:  1  vs Diet:  3 **********
Observed difference of two slopes: -4.58
0 out of 100 experiments had a difference of two means less than or equal to -4.58 .
The chance of getting a difference of two means less than or equal to -4.58 is 0.0 

Done:  20
Done:  40
Done:  60
Done:  80
**********Diet:  1  vs Diet:  4 **********
Observed difference of two slopes: -2.87
0 out of 100 experiments had a difference of two means less than or equal to -2.87 .
The chance of getting a difference of two means less than or equal to -2.87 is 0.0 

Done:  20
Done:  40
Done:  60
Done:  80
**********Diet:  2  vs Diet:  3 **********
Observed difference of two slopes: