# Homework 1

We will be doing our experiments and calculations on an Ubuntu 24, Windows [insert type], and [insert Bradley's machine].

In [140]:
import matplotlib.pyplot as pyplot
import numpy as np
import csv
from scipy import stats as st

## Visualization Functions

Here are the generic versions of the helper functions we will use for data visualization.

In [141]:
def load_csv(byte_size):
    filename = f"memtest{byte_size}B.csv"
    with open(filename, mode='r') as file:
        reader = csv.reader(file)
        data = []
        for row in reader:
            data.append(int(row[0]))
    return data


In [142]:
def get_data_stats(data):
    mean = sum(data)/len(data)
    std = np.std(data)
    mode = st.mode(data)
    sorted_data = sorted(data)
    
    med = sorted_data[int(len(data)/2)+1]
    return mean, med, mode, std

In [143]:
def remove_outliers(data, within_std = 2):
    mean, med, mode, std = get_data_stats(data)
    cleaned_data = []
    outliers = []
    for d in data:
        if d < mean + int(within_std*std) and d > mean - int(within_std*std):
            cleaned_data.append(d)
        else:
            outliers.append(d)
    return cleaned_data, outliers

In [144]:
#Graph showing all samples over time, add way to limit sample number displayed and where starts
def create_linear_sample_plot(data, byte_sizes, clean_outliers = True, first_sample = 0, sample_length = None):
    """Creates scatter plot of all clock cycles samples for a particular data block size.
    Parameters: data - list(ints) 
    bytes_sizes - int
    first sample (optional) - sample to start the graph"""
    
    if clean_outliers:
        data, outliers = remove_outliers(data)
    if sample_length is None:
        sample_length = len(data)
    data = data[first_sample:sample_length]
    sample_count = range(first_sample+1,first_sample+sample_length+1,1)
    
    pyplot.scatter(sample_count,data)
    
    
    pyplot.title('Clock Cycles to Copy %i Bytes' % byte_sizes)
    pyplot.xlabel('Trial')
    pyplot.ylabel('Clock Cycles')
    pyplot.show()
    return data, outliers

In [145]:
def frequency_dictionary(data):
    counts = {}
    for d in data:
        if d not in counts.keys():
            counts[d] = 1
        else:
            counts[d] = counts[d] + 1
    return counts

In [146]:
def get_top_most_freq_clock_counts(data, top_n = 10):
    counts = frequency_dictionary(data)
    sorted_items = sorted(counts.items(), key=lambda item: item[1],reverse=True)

    # Convert the sorted list of tuples back into a dictionary
  
    sorted_dict = dict(sorted_items[0:top_n])
  
   
    return sorted_dict

In [172]:
#Create histogram
def create_histogram(data, byte_size, is_outliers = False, clean_outliers = True, within_std = 2):
    outliers = []
    cleaned_data = data
    if clean_outliers:
        cleaned_data, outliers = remove_outliers(data,within_std)
    
    
    pyplot.hist(cleaned_data, bins='auto')
    pyplot.title('Histogram of Clock Cycles for '+str(byte_size)+' Bytes with '+str(within_std)+' STD of the Mean')
    if is_outliers:
        pyplot.title('Histogram of Outlier Clock Cycles for '+str(byte_size)+' Bytes with '+str(within_std)+' STD of the Mean')
    pyplot.xlabel('Clock Cycles')
    pyplot.ylabel('Frequency')
    pyplot.show()
    return data, outliers

In [173]:
def produce_stats(byte_size):
    data = load_csv(byte_size)
    cleaned_data, outliers = create_linear_sample_plot(data,byte_size)
    cleaned_histo_data, histo_outliers = create_histogram(data,byte_size,is_outliers = False, clean_outliers = True, within_std = 2)
    d, o = create_histogram(histo_outliers,byte_size,is_outliers = True, clean_outliers = False, within_std = 2)
    cleaned_histo_data, histo_outliers = create_histogram(data,byte_size,is_outliers = False, clean_outliers = True, within_std = 1)
    d, o = create_histogram(histo_outliers,byte_size,is_outliers = True, clean_outliers = False, within_std = 1)
    cleaned_histo_data, histo_outliers = create_histogram(data,byte_size,is_outliers = False, clean_outliers = True, within_std = 0.4)
    d, o = create_histogram(histo_outliers,byte_size,is_outliers = True, clean_outliers = False, within_std = 0.4)
    avg, med, mode, std = get_data_stats(data)
    print("Stats for "+str(byte_size)+" ")
    print("Mean: "+str(avg))
    print("Median: "+str(med))
    print("Mode: "+str(mode[0]))
    print("Standard Deviation: "+str(std))
    d = get_top_most_freq_clock_counts(data,10)
    print("Top Ten Most Common Clock Cycles for "+str(byte_size)+" bytes copied: "+str(d))

## Ubuntu 24
### Question 1
#### 64 Bytes of data copied

In [None]:
produce_stats(64)

#### 128 bytes

In [None]:
produce_stats(128)

#### 256 Bytes

In [None]:
produce_stats(256)

#### 512 Bytes

In [None]:
produce_stats(512)

#### 1024 Bytes

In [None]:
produce_stats(1024)

#### 2048 Bytes

In [None]:
produce_stats(2048)

#### 4096 Bytes

In [None]:
produce_stats(4096)

#### 8192 Bytes

In [None]:
produce_stats(8192)

#### 16384 Bytes

In [None]:
produce_stats(16384)

#### 32768 Bytes

In [None]:
produce_stats(32768)

#### 65,536 Bytes

In [None]:
produce_stats(65536)

#### 1,048,576 Bytes

In [None]:
produce_stats(1048576)

#### 2,097,152 Bytes

In [None]:
produce_stats(2097152)

### Question 2

Machine 1 (CloudLab): <br>
    DRAM name: HP 855506-091 <br>
    Frequency = 2400MHz <br>
    tRCD = 17 <br>
    tRP = 17 <br>
    tCAS/tCL = 17 <br>
    tRC = tRAS + tRP = 56 <br>
    tRAS = 39 <br>
    tRRD = <br>
    tWR = <br>
    tWTR = <br>
    tCWD = <br>
    tRTP = <br>
    tCCD = <br>
    tBURST = <br>


Machine 2 (Bradley's Laptop - Dell Inspiron 15): <br>
    Frequency: 1600MHz <br>
    tRCD = 22 <br>
    tRP = 22 <br>
    tCAS/tCL = 22 <br>
    tRC = tRAS + tRP = 74 <br>
    tRAS = 52 <br>
    tRRD = <br>
    tWR = <br>
    tWTR = <br>
    tCWD = <br>
    tRTP = <br>
    tCCD = <br>
    tBURST = <br>


### Question 3