In [72]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from scipy.special import factorial
import math
# import csv
from matplotlib import rc, rcParams

In [None]:
with open('DorothyFileC050.txt', mode ='r') as file: # "ElsieWiddowsonFileC051.txt"
    line = file.readline()
    while line[0] == "#":
        print("HEADER" + line)
        line = file.readline()
        # print header
    # Check to make sure first line of data is correct
    data1 = [line.split(sep="\t")]
    print("First line of data in file: " + str(data1))
    data1[0][3] = int(data1[0][3])
    # Check time is at this index
    time = data1[0][3]
    numIntervals = 1000
    intervalTime = 1 #Range in seconds

    # Alex 18/09 NOTE This function will crash for index out of range, and may run indefinitely in the case of data with bad time.
    while time < (numIntervals * intervalTime * 1000): # Data is in ms 
        data1 += [file.readline().split()]
        data1[-1][3] = int(data1[-1][3])
        time = data1[-1][3]

# Bin function graciously borrowed from pythonForJLAB https://github.mit.edu/juniorlab/Python-Intro/blob/master/pythonForJLAB.ipynb

# Take out zero bins. The discussion for why I do this is beyond the scope of this intro, ask the instructors
# It's not important to know what's going on in this function if you don't want to know.
# If you do want to know, you'll have to think about it
# Alex 18/09 For a time interval of 10s, its really unlikely that we will get empty bins but we should do this for robustness.
def delete_zeros(bins,counts,err):
    '''
    Inputs:
    bins = the frequency bin centers
    counts = the frequency data
    err = the error data
    Output:
    new_bins = bin centers, but if the frequency of a bin center is zero the bin is removed
    new_counts = frequency data, but if "                    "
    new_err = error data, but if "                       "
    '''
    zeros = np.where(counts==0) # Find the indices where the frequency data is zero
    mask = np.ones(len(counts),dtype=bool) # create a mask of True values
    mask[zeros[0]] = False # Turn the zero parts of the mask False
    new_counts = counts[mask] # Recreate the bin data without the False parts
    new_bins = bins[mask] # Recreate the frequency data without the False parts
    new_err = err[mask] # Recrete the "      "
    return new_bins, new_counts, new_err

print(len(data1))
print("First data point at : " + str(data1[0]))
print("Last data point at : " + str(data1[-1]))

ind = 0
bin_counts = []
bin_edges_1 = np.arange(0,numIntervals + 1)
for interval in range(1, numIntervals + 1):
    count = 0
    while data1[ind][3] < interval * 1000 * intervalTime:
        count += 1
        ind += 1

    bin_counts += [count]



print(bin_counts)
plt.rcParams["figure.figsize"] = (15,3)
print(bin_counts)
# plt.bar takes bin centers as its arg, not edges.
plt.bar(np.arange(0,numIntervals) + np.ones(numIntervals)*0.5, bin_counts, width = 1)
plt.xlabel(r'Time (10s)', fontsize=20, labelpad=20)
plt.ylabel(r'Count in bin', fontsize=20)
plt.show()

# lines = [line.strip() for line in lines]
# data = [int(line) for line in lines if line.isdigit()]
# print(data)

In [74]:
# Plotting cumulative mean and demonstrating the reduction in error
cumulativeaverage = np.cumsum(bin_counts) / np.arange(1, numIntervals + 1)
# For Poisson, Var = mean. 
stds = np.sqrt(cumulativeaverage / np.arange(1, numIntervals + 1))

In [None]:
plt.errorbar(np.arange(1, numIntervals+1), cumulativeaverage, yerr = stds, lw=0.3)
plt.show()

In [None]:
binedges = np.linspace(0, 50, 30)
binvalues = plt.hist(bin_counts, binedges)

# Also from PythonForJLAB
def poisson(x, lam, a):
    '''
    Inputs:
    a = normalization constant
    x = corresponding number of events, should be an array
    lam = expected value, which is equal to the variance for poisson distribution
    Outputs: probability of x number of events occuring, scaled by constant a
    '''
    return a * lam**x * np.exp(-lam)/factorial(x)

x = np.linspace(0, 45, 256)
plt.plot(x, poisson(x, cumulativeaverage[-1], numIntervals * 50 / 29))
plt.show()

In [77]:
#np.sum((poisson(bincenters, cumulativeaverage[-1], numIntervals * 50 / 29) - binvalues[0]) ** 2 / ((np.sqrt(cumulativeaverage[-1])) ** 2))

In [None]:
len(binvalues[0])