In [1]:
from funcs import *
import pandas as pd
import numpy as np

In [2]:
path = "sampleData200k.txt"

wf_array = get_wordfrequencies(path)

if wf_array:
    print("\n   WordFrequency Array built. Creating framework...\n")

    start_values = [2000,5000,10000,20000,30000,40000,50000]
    incr_percent = 0.10

    subset_1 = TestFrameWork(wf_array=wf_array[:60000], starts=start_values, incr=incr_percent)
    subset_2 = TestFrameWork(wf_array=wf_array[60000:120000], starts=start_values, incr=incr_percent)
    subset_3 = TestFrameWork(wf_array=wf_array[120000:180000], starts=start_values, incr=incr_percent)
    print("     Done!")

    

   


   WordFrequency Array built. Creating framework...

     Done!


In [3]:
# GROWTH ANALYSIS
growth_df = pd.DataFrame()
implementations = ["list", "hashtable", "tst"]

growth_results = []

for implementation in implementations:
    print(f"Getting {implementation} results across all increments...")
    results_subset1 = subset_1.grow_analysis(implementation)
    results_subset2 = subset_2.grow_analysis(implementation)
    results_subset3 = subset_3.grow_analysis(implementation)

    results_avg = {start:0 for start in start_values} # keys of increment, default vlaue of 0 (we will add later)

    for key in results_avg.keys():
        avg = np.mean([results_subset1[key], results_subset2[key], results_subset3[key]])
        results_avg[key] = avg
    
    growth_results.append({str(implementation): results_avg})

growth_df["StartSize"] = start_values

for my_dict in growth_results:
    for implementation_type, starts in my_dict.items():
        growth_df[implementation_type] = starts.values()

Getting list results across all increments...
Getting hashtable results across all increments...
Getting tst results across all increments...


In [5]:
growth_df.head()

Unnamed: 0,StartSize,list,hashtable,tst
0,2000,4e-05,0.0,7e-06
1,5000,0.00011,6.678899e-07,7e-06
2,10000,0.000221,3.337065e-07,7e-06
3,20000,0.000439,5.004009e-07,7e-06
4,30000,0.000673,1.11209e-07,7e-06


In [4]:
 # SHRINK ANALYSIS
shrink_df = pd.DataFrame()
implementations = ["list", "hashtable", "tst"]

shrink_results = []

for implementation in implementations:
    print(f"Getting {implementation} results across all increments...")
    results_subset1 = subset_1.shrink_analysis(implementation)
    results_subset2 = subset_2.shrink_analysis(implementation)
    results_subset3 = subset_3.shrink_analysis(implementation)

    results_avg = {start:0 for start in start_values} # keys of increment, default vlaue of 0 (we will add later)

    for key in results_avg.keys():
        avg = np.mean([results_subset1[key], results_subset2[key], results_subset3[key]])
        results_avg[key] = avg
    
    shrink_results.append({str(implementation): results_avg})

shrink_df["StartSize"] = start_values

for my_dict in shrink_results:
    for implementation_type, starts in my_dict.items():
        shrink_df[implementation_type] = starts.values()

Getting list results across all increments...
Getting hashtable results across all increments...
Getting tst results across all increments...


In [6]:
shrink_df.sort_values(by="StartSize", ascending=False, inplace=True)
shrink_df.head()

Unnamed: 0,StartSize,list,hashtable,tst
6,50000,0.001366,4.003684e-07,4e-06
5,40000,0.001074,3.336271e-07,3e-06
4,30000,0.000784,4.448891e-07,3e-06
3,20000,0.000518,3.336668e-07,3e-06
2,10000,0.000261,3.337065e-07,3e-06


In [25]:
 # STATIC ANALYSIS
static_df_search = pd.DataFrame()
static_df_autocomplete = pd.DataFrame()
implementations = ["list", "hashtable", "tst"]

static_results_search = []
static_results_autocomplete = []
runs=50

sizes = ["Small", "Medium", "Large"]

for implementation in implementations:
    print(f"Getting {implementation} results across all increments...")
    results_subset1 = subset_1.static_analysis(implementation, runs=runs)
    results_subset2 = subset_2.static_analysis(implementation, runs=runs)
    results_subset3 = subset_3.static_analysis(implementation, runs=runs)

    results_avg_search = {size:0 for size in sizes} # keys of increment, default vlaue of 0 (we will add later)
    results_avg_autocomplete = {size:0 for size in sizes} # keys of increment, default vlaue of 0 (we will add later)

    for key in results_avg_search.keys():
        search_avg = np.mean([results_subset1[0][key], results_subset2[0][key], results_subset3[0][key]])
        autocomplete_avg = np.mean([results_subset1[1][key], results_subset2[1][key], results_subset3[1][key]])
        results_avg_search[key] = search_avg
        results_avg_autocomplete[key] = autocomplete_avg
    
    static_results_search.append({str(implementation): results_avg_search})
    static_results_autocomplete.append({str(implementation): results_avg_autocomplete})

for my_dict in static_results_search:
    for implementation_type, starts in my_dict.items():
        static_df_search[implementation_type] = starts.values()

for my_dict in static_results_autocomplete:
    for implementation_type, starts in my_dict.items():
        static_df_autocomplete[implementation_type] = starts.values()



Getting list results across all increments...
Getting hashtable results across all increments...
Getting tst results across all increments...


In [26]:
static_df_search.rename(columns={static_df_search.columns[0]: 'list_search'}, inplace=True)
static_df_search.rename(columns={static_df_search.columns[1]: 'hashtable_search'}, inplace=True)
static_df_search.rename(columns={static_df_search.columns[2]: 'tst_search'}, inplace=True)

static_df_search.head()

Unnamed: 0,list_search,hashtable_search,tst_search
0,0.00045,0.0,0.0
1,0.000653,0.0,0.0
2,0.001509,0.0,1.3e-05


In [27]:
static_df_autocomplete.rename(columns={static_df_autocomplete.columns[0]: 'list_autocomplete'}, inplace=True)
static_df_autocomplete.rename(columns={static_df_autocomplete.columns[1]: 'hashtable_autocomplete'}, inplace=True)
static_df_autocomplete.rename(columns={static_df_autocomplete.columns[2]: 'tst_autocomplete'}, inplace=True)

static_df_autocomplete.head()

Unnamed: 0,list_autocomplete,hashtable_autocomplete,tst_autocomplete
0,0.001256,0.001228,0.040947
1,0.001896,0.001852,0.063641
2,0.003875,0.003827,0.123173


In [28]:
static_df = pd.concat([static_df_search, static_df_autocomplete], axis=1)
static_df.insert(0, "Size", ["Small", "Medium", "Large"])

static_df.head()

Unnamed: 0,Size,list_search,hashtable_search,tst_search,list_autocomplete,hashtable_autocomplete,tst_autocomplete
0,Small,0.00045,0.0,0.0,0.001256,0.001228,0.040947
1,Medium,0.000653,0.0,0.0,0.001896,0.001852,0.063641
2,Large,0.001509,0.0,1.3e-05,0.003875,0.003827,0.123173


In [29]:
growth_df.to_csv("growth_results.csv")
shrink_df.to_csv("shrink_results.csv")
static_df.to_csv("static_results.csv")