In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()


In [2]:
clean_ncaa = pd.read_excel('All Data Clean Rd5.xlsx')

In [3]:
def permutation_sample(data1, data2):
    """Generate a permutation sample from two data sets."""

    # Concatenate the data sets: data
    data = np.concatenate((data1, data2))

    # Permute the concatenated array: permuted_data
    permuted_data = np.random.permutation(data)

    # Split the permuted array into two: perm_sample_1, perm_sample_2
    perm_sample_1 = permuted_data[:len(data1)]
    perm_sample_2 = permuted_data[len(data1):]

    return perm_sample_1, perm_sample_2

In [4]:
def draw_perm_reps(data_1, data_2, func, size=1):
    """Generate multiple permutation replicates."""

    # Initialize array of replicates: perm_replicates
    perm_replicates = np.empty(size)

    for i in range(size):
        # Generate permutation sample
        perm_sample_1, perm_sample_2 = permutation_sample(data_1, data_2)

        # Compute the test statistic
        perm_replicates[i] = func(perm_sample_1, perm_sample_2)
    return perm_replicates

In [5]:
before = clean_ncaa[clean_ncaa['SC Change'] == 'Before']
after = clean_ncaa[clean_ncaa['SC Change'] == 'After']

In [6]:
def diff_of_means(data_1, data_2):
    """Difference in means of two arrays."""

    # The difference of means of data_1, data_2: diff
    diff = np.mean(data_1) - np.mean(data_2)

    return diff


In [7]:
 def pre_post_sc_pvalue(stat):   
    # Compute difference of mean impact force from experiment: empirical_diff_means
    empirical_diff_means = diff_of_means(before[stat], after[stat])

    # Draw 10,000 permutation replicates: perm_replicates
    perm_replicates = draw_perm_reps(before[stat], after[stat],
                                     diff_of_means, size=10000)

    # Compute p-value: p
    if empirical_diff_means > 0:
        print("Decrease in:", stat, "after the shot clock change")
        p = np.sum(perm_replicates >= empirical_diff_means) / len(perm_replicates)
    else:
        print("Increase in:", stat, "after the shot clock change")
        p = np.sum(perm_replicates <= empirical_diff_means) / len(perm_replicates)

    # Print the result
    print('p-value =', p)

In [8]:
pre_post_sc_pvalue('PQ average')

Increase in: PQ average after the shot clock change
p-value = 0.2011


In [9]:
pre_post_sc_pvalue('FGA')

Increase in: FGA after the shot clock change
p-value = 0.0


In [10]:
pre_post_sc_pvalue('3PA%')

Increase in: 3PA% after the shot clock change
p-value = 0.0


In [11]:
pre_post_sc_pvalue('ORB')

Decrease in: ORB after the shot clock change
p-value = 0.0


In [12]:
pre_post_sc_pvalue('3P%')

Increase in: 3P% after the shot clock change
p-value = 0.0


In [13]:
pre_post_sc_pvalue('BLK')

Decrease in: BLK after the shot clock change
p-value = 0.0089


In [14]:
pre_post_sc_pvalue('SRS')

Decrease in: SRS after the shot clock change
p-value = 0.4212


In [15]:
pre_post_sc_pvalue('FG%')

Increase in: FG% after the shot clock change
p-value = 0.0


In [16]:
np.corrcoef(clean_ncaa['FG%'], clean_ncaa['3PA%'])[0,1]

0.01017771587060229

In [17]:
clean_ncaa['2pa'] = clean_ncaa['FGA'] - clean_ncaa['3PA']
clean_ncaa['2pm'] = clean_ncaa['FG'] - clean_ncaa['3P']
clean_ncaa['2P%'] = clean_ncaa['2pm']/clean_ncaa['2pa']

In [18]:
np.corrcoef(clean_ncaa['2P%'], clean_ncaa['3PA%'])[0,1]

0.2906548436476493

In [19]:
np.mean(clean_ncaa['2P%'])*2

0.9822926892296282

In [20]:
np.mean(clean_ncaa['3P%'])*3

1.0358198198198205

In [21]:
clean_ncaa['true_shoot%_diff'] = clean_ncaa['2P%']*2 - clean_ncaa['3P%'] *3

In [22]:
np.mean(clean_ncaa['true_shoot%_diff'])

-0.05352713059019091

In [23]:
before = clean_ncaa[clean_ncaa['SC Change'] == 'Before']
after = clean_ncaa[clean_ncaa['SC Change'] == 'After']

In [24]:
print('True shoot diff before = ', np.mean(before['true_shoot%_diff']))
print('True shoot diff after = ', np.mean(after['true_shoot%_diff']))

True shoot diff before =  -0.062011857410067914
True shoot diff after =  -0.04504240377031386


In [25]:
pre_post_sc_pvalue('true_shoot%_diff')

Increase in: true_shoot%_diff after the shot clock change
p-value = 0.0


In [26]:
pre_post_sc_pvalue('FTA')

Decrease in: FTA after the shot clock change
p-value = 0.0


In [27]:
np.corrcoef(clean_ncaa['ORB'], clean_ncaa['true_shoot%_diff'])[0,1]

0.09733735334448884

In [28]:
np.corrcoef(clean_ncaa['FTA'], clean_ncaa['true_shoot%_diff'])[0,1]

0.09099912156235164