# Ordenamiento

El ordenamiento de arreglos corresponde a encontrar una permutación tal que cada elemento sea menor a su sucesor. 

In [1]:
def selection_sort(collection):
    length = len(collection)
    for i in range(length):
        least = i
        for k in range(i + 1, length):
            if collection[k] < collection[least]:
                least = k
        collection[least], collection[i] = (collection[i], collection[least])
    return collection

def insertion_sort(collection):
    for index in range(1, len(collection)):
        while 0 < index and collection[index] < collection[index - 1]:
            collection[index], collection[index - 1] = collection[index - 1], collection[index]
            index -= 1
    return collection

In [2]:
insertion_sort([3,2,1])

[1, 2, 3]

In [19]:
import numpy as np
import time

def gen_random(size):
    return np.random.permutation(np.arange(size,dtype=np.int32))

def gen_worst_case(size):
    return np.arange(size,-1,-1,dtype=np.int32)

def gen_best_case(size):
    return np.arange(size,dtype=np.int32)

arr=gen_worst_case(1000)

start=time.clock()
insertion_sort(arr.copy())
end=time.clock()
t1=end-start

start=time.clock()
selection_sort(arr.copy())
end=time.clock()
t2=end-start



print('Insertion Sort: %2f'%t1)
print('Selection Sort : %2f'%t2)


Insertion Sort: 0.451467
Selection Sort : 0.167222


In [4]:
def merge_sort(A):
    n=len(A)
    if len(A) <= 1:
      return A
    L = merge_sort(A[0:int(n/2)])
    R = merge_sort(A[int(n/2):n])
    return merge(L, R)

def merge(L, R):
    result = []
    l_idx, r_idx = (0, 0)
    while l_idx < len(L) and r_idx < len(R):
      if L[l_idx] < R[r_idx]:
        result.append(L[l_idx])
        l_idx += 1
      else:
        result.append(R[r_idx])
        r_idx += 1
    result.extend(L[l_idx:len(L)])
    result.extend(R[r_idx:len(R)])
    return result

In [5]:
arr=gen_best_case(10)

print(arr[0:int(10/2)])
print(arr[int(10/2):10])
#sorted_arr=merge_sort(arr)

#print(sorted_arr)

[0 1 2 3 4]
[5 6 7 8 9]


In [6]:
import pandas as pd
import time 
import sys

method=[merge_sort,selection_sort,insertion_sort]
fun=[gen_random,gen_worst_case,gen_best_case]
n_data=[1000,2000,3000,4000,5000]
n_samp=20

s_n=[]
ellapsed_time={m.__name__:[] for m in method}

for i in n_data:
    for j in range(n_samp):
        s_n.append(i)
        arr=gen_random(i)
        for m in method:
            start=time.process_time()
            m(arr.copy())
            end=time.process_time()
            ellapsed_time[m.__name__].append(end-start)
        

In [7]:
df_list=[]
for m in method:
    df=pd.DataFrame({'n':s_n,'time':ellapsed_time[m.__name__],'method':[m.__name__]*len(ellapsed_time[m.__name__])})
    df_list.append(df)
df=pd.concat(df_list,axis=0)


In [8]:
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")

sns.lmplot(x="n", y="time", hue="method",order=4, data=df)
plt.show()

<Figure size 661.375x500 with 1 Axes>

In [9]:
df.groupby(['method','n']).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,time
method,n,Unnamed: 2_level_1
insertion_sort,1000,0.215581
insertion_sort,2000,0.863619
insertion_sort,3000,1.930357
insertion_sort,4000,3.487668
insertion_sort,5000,5.443871
merge_sort,1000,0.00567
merge_sort,2000,0.01156
merge_sort,3000,0.018059
merge_sort,4000,0.025665
merge_sort,5000,0.032219


In [10]:
df.groupby(['method','n']).var()

Unnamed: 0_level_0,Unnamed: 1_level_0,time
method,n,Unnamed: 2_level_1
insertion_sort,1000,0.0001348505
insertion_sort,2000,0.001152912
insertion_sort,3000,0.004467432
insertion_sort,4000,0.1215325
insertion_sort,5000,0.0963802
merge_sort,1000,7.301443e-07
merge_sort,2000,3.280403e-07
merge_sort,3000,1.001391e-06
merge_sort,4000,1.720958e-05
merge_sort,5000,6.736183e-06


In [11]:
import numpy as np
import statsmodels.formula.api as sm

def get_model(data, quadratic=True):
    if quadratic:
        data['n_squared'] = data.n**2
        model = sm.ols('time ~ n_squared', data=data).fit()
    else: # assume n log(n)
        data['nlogn'] = data.n * np.log(data.n)
        model = sm.ols('time ~ nlogn', data=data).fit()
    return model


In [12]:
model1=get_model(df[df.method=='selection_sort'],True)
model2=get_model(df[df.method=='selection_sort'],False)

In [13]:
model1.summary()

0,1,2,3
Dep. Variable:,time,R-squared:,0.993
Model:,OLS,Adj. R-squared:,0.993
Method:,Least Squares,F-statistic:,14060.0
Date:,"Thu, 29 Aug 2019",Prob (F-statistic):,1.19e-107
Time:,12:03:01,Log-Likelihood:,68.162
No. Observations:,100,AIC:,-132.3
Df Residuals:,98,BIC:,-127.1
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0090,0.020,-0.448,0.655,-0.049,0.031
n_squared,1.695e-07,1.43e-09,118.579,0.000,1.67e-07,1.72e-07

0,1,2,3
Omnibus:,47.811,Durbin-Watson:,0.752
Prob(Omnibus):,0.0,Jarque-Bera (JB):,297.924
Skew:,1.334,Prob(JB):,2.0300000000000002e-65
Kurtosis:,11.024,Cond. No.,22600000.0


In [14]:
model2.summary()

0,1,2,3
Dep. Variable:,time,R-squared:,0.963
Model:,OLS,Adj. R-squared:,0.962
Method:,Least Squares,F-statistic:,2535.0
Date:,"Thu, 29 Aug 2019",Prob (F-statistic):,7.550000000000001e-72
Time:,12:03:01,Log-Likelihood:,-15.942
No. Observations:,100,AIC:,35.88
Df Residuals:,98,BIC:,41.09
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.9290,0.062,-14.914,0.000,-1.053,-0.805
nlogn,0.0001,2.27e-06,50.353,0.000,0.000,0.000

0,1,2,3
Omnibus:,7.947,Durbin-Watson:,0.165
Prob(Omnibus):,0.019,Jarque-Bera (JB):,8.426
Skew:,0.705,Prob(JB):,0.0148
Kurtosis:,2.808,Cond. No.,59700.0
