In [5]:
from math import sqrt
import numpy as np
import csv

In [6]:
# Defining the lists that we will import the data from the csv file to, can also use pandas and numpy
seq_100 = []
seq_1000 = []
seq_10000 = []

with open("data.csv", "r") as file:
    csv_reader = csv.reader(file)
    #Using for loops to loop through all data points in each column, using if row[x] is not empty to disregard empty cells
    for row in csv_reader:
        if row[0] != '':
            seq_100.append(float(row[0]))
        if row[1] != '':
            seq_1000.append(float(row[1]))
        if row[2] != '':
            seq_10000.append(float(row[2]))

In [16]:
#1 - Solve STD with only for loops
def std_loops(seq_100):
    #Defining variables
    sum_x = 0
    sum_x_squared = 0
    N = 0

    for i in seq_100:
        sum_x += i
        sum_x_squared += i**2
        N += 1

    mean = sum_x / N
    mean_x_squared = sum_x_squared/N

    return sqrt(mean_x_squared - mean**2)

data = [seq_100, seq_1000, seq_10000]
LoopStandardDeviations = []

#Printing the results
for n in data:
    LoopStandardDeviations.append(std_loops(n))
    if n == seq_100:
        x = 100
    if n== seq_1000:
        x = 1000
    if n== seq_10000: 
        x = 10000
    print(f"Standard Deviation for {x} floats: {std_loops(n)}")

Standard Deviation for 100 floats: 0.2823721097353601
Standard Deviation for 1000 floats: 0.28467443283850546
Standard Deviation for 10000 floats: 0.2854045269476155


In [12]:
# Compute standard deviation of the data using builtin functions
def std_builtin(seq_100): 

        N = len(seq_100)
        meanX = sum(seq_100)/N
        sumSquares = sum(i**2 for i in seq_100)
        meanSquares = sumSquares/N


        vSquared =  meanSquares - (meanX)**2
        BuiltinStandardDeviation = float(sqrt(vSquared))

        return BuiltinStandardDeviation

BuiltinSTDs = []

for n in data:
    BuiltinSTDs.append(std_builtin(n))
    if n == seq_100:
        x = 100
    if n== seq_1000:
        x = 1000
    if n== seq_10000: 
        x = 10000
    print(f"Standard Deviation for {x} floats: {std_builtin(n)}")



Standard Deviation for 100 floats: 0.2823721097353601
Standard Deviation for 1000 floats: 0.2846744328385061
Standard Deviation for 10000 floats: 0.28540452694761564


In [14]:
# Compute standard deviation of the data using numpy
numpySTDS = []

for n in data:
    numpySTDS.append(np.std(n))
    if n == seq_100:
        x = 100
    if n== seq_1000:
        x = 1000
    if n== seq_10000: 
        x = 10000
    print(f"Standard Deviation for {x} floats: {np.std(n)}")


Standard Deviation for 100 floats: 0.28237210973536014
Standard Deviation for 1000 floats: 0.28467443283850596
Standard Deviation for 10000 floats: 0.2854045269476156


In [9]:
#Timing the different functions for 100 data points
# Using loops
%timeit std_loops(seq_100)

#Using Built-in functions
%timeit std_builtin(seq_100)

#Using numpy
%timeit np.std(seq_100)

29.6 μs ± 1.79 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
38.3 μs ± 8.53 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
78.7 μs ± 15.9 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [7]:
#Timing the different functions for 1000 data points
%timeit std_loops(seq_1000)
%timeit std_builtin(seq_1000)
%timeit np.std(seq_1000)

372 μs ± 21.1 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
308 μs ± 11.4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
62.1 μs ± 2.1 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [8]:
#Timing the different functions for 10000 data points
%timeit std_loops(seq_10000)
%timeit std_builtin(seq_10000)
%timeit np.std(seq_10000)

4.45 ms ± 130 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.37 ms ± 490 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
894 μs ± 83.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
