In [5]:
import pandas as pd

tdr = pd.read_csv('tourism_dynamic_routes_1345.csv')
tdr.head()



Unnamed: 0,Route_ID,User_ID,Sequence,Total_Duration,Total_Cost,Weather,Traffic_Level,Crowd_Density,Event_Impact,Optimal_Route_Preference,Satisfaction_Score,Age,Gender,Nationality,Travel_Companions,Budget_Category,Preferred_Theme,Preferred_Transport
0,1,23,42->32->48->20->29->9,466,1541,Sunny,Medium,Low,Festival,9->20->29->32->42->48,2,61,Other,USA,Family,Low,Relaxation,Taxi
1,2,124,7->44->43->50->30,151,3817,Rainy,High,High,Festival,7->30->43->44->50,4,18,Male,India,Group,High,Shopping,Bus
2,3,135,27->3->32->28->2->36->31,559,8087,Rainy,Low,High,,2->3->27->28->31->32->36,4,68,Male,USA,Group,Medium,Shopping,Walk
3,4,86,38->2->20->19->40->39->29,159,2218,Snowy,Medium,High,Festival,2->19->20->29->38->39->40,4,38,Female,USA,Friends,Low,Adventure,Car
4,5,137,27->24->3,466,1388,Snowy,Low,Medium,Holiday,3->24->27,2,35,Female,China,Solo,High,Cultural,Car


In [2]:
import numpy as np
def tourism_index(score, age, cost, duration):
    return (
        np.exp(np.sin(score) + np.cos(age)) * 
        ((score ** 3 + np.log1p(age) ** 2) / (age + 1))
    ) / (
        np.sqrt(cost + duration) + np.log1p(cost * duration) + 1
    )

def apply_tourism_index_python(tdr):
    return tdr.apply(lambda x: tourism_index(x["Satisfaction_Score"],
                                             x["Age"],
                                             x["Total_Cost"],
                                             x["Total_Duration"]), axis=1)

# тест часу
%timeit apply_tourism_index_python(tdr)



37.3 ms ± 174 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [3]:

import time

start_time_python = time.time()  # початок
result_python = apply_tourism_index_python(tdr)
end_time_python = time.time()    # кінець

print(f"Час виконання: {end_time_python - start_time_python:.6f} секунд")


Час виконання: 0.042079 секунд


In [9]:
%load_ext cython



The cython extension is already loaded. To reload it, use:
  %reload_ext cython


In [10]:
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport exp, sin, cos, log, sqrt

cdef double tourism_index_c(double score, double age, double cost, double duration):
    return (
        exp(sin(score) + cos(age)) * 
        ((score ** 3 + (log(1 + age)) ** 2) / (age + 1))
    ) / (
        sqrt(cost + duration) + log(1 + cost * duration) + 1
    )

cpdef apply_tourism_index_cython(double[:] scores, double[:] ages, double[:] costs, double[:] durations):
    cdef Py_ssize_t n = scores.shape[0]
    cdef Py_ssize_t i
    cdef double[:] result = np.empty(n, dtype="float64")
    for i in range(n):
        result[i] = tourism_index_c(scores[i], ages[i], costs[i], durations[i])
    return np.asarray(result)




In [13]:
import time
import numpy as np


scores = tdr["Satisfaction_Score"].to_numpy(dtype=np.float64)
ages = tdr["Age"].to_numpy(dtype=np.float64)
costs = tdr["Total_Cost"].to_numpy(dtype=np.float64)
durations = tdr["Total_Duration"].to_numpy(dtype=np.float64)


start_time_cython = time.time()
result_cython = apply_tourism_index_cython(scores, ages, costs, durations)
end_time_cython = time.time()

print(f"Час виконання: {end_time_cython - start_time_cython:.5f} секунд")


Час виконання: 0.00027 секунд


In [3]:
import numba

@numba.jit(nopython=True)
def tourism_index_numba(score, age, cost, duration):
    return (
        np.exp(np.sin(score) + np.cos(age)) * 
        ((score ** 3 + np.log1p(age) ** 2) / (age + 1))
    ) / (
        np.sqrt(cost + duration) + np.log1p(cost * duration) + 1
    )

@numba.jit(nopython=True)
def apply_tourism_index_numba(scores, ages, costs, durations):
    n = len(scores)
    result = np.empty(n, dtype=np.float64)
    for i in range(n):
        result[i] = tourism_index_numba(scores[i], ages[i], costs[i], durations[i])
    return result




In [17]:
import time
import numpy as np

scores = tdr["Satisfaction_Score"].to_numpy(dtype=np.float64)
ages = tdr["Age"].to_numpy(dtype=np.float64)
costs = tdr["Total_Cost"].to_numpy(dtype=np.float64)
durations = tdr["Total_Duration"].to_numpy(dtype=np.float64)

start_time_numba = time.time()
result_numba = apply_tourism_index_numba(scores, ages, costs, durations)
end_time_numba = time.time()

print(f"Час виконання Numba: {end_time_numba - start_time_numba:.5f} секунд")


Час виконання Numba: 0.00016 секунд


In [18]:

times = {
    "Python": end_time_python - start_time_python,
    "Cython": end_time_cython - start_time_cython,
    "Numba": end_time_numba - start_time_numba
}


fastest = min(times, key=times.get)


print("Час виконання:")
for k, v in times.items():
    print(f"{k}: {v:.5f} секунд")

print(f"\nНайшвидша версія: {fastest} ({times[fastest]:.5f} секунд)")


Час виконання:
Python: 0.04208 секунд
Cython: 0.00027 секунд
Numba: 0.00016 секунд

Найшвидша версія: Numba (0.00016 секунд)


In [9]:
import numpy as np

In [10]:
%timeit
apply_tourism_index_numba(tdr["Satisfaction_Score"].to_numpy(),
  tdr["Age"].to_numpy(),
  tdr["Total_Cost"].to_numpy(),
  tdr["Total_Duration"].to_numpy()
)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1mNameError: name 'np' is not defined[0m
[0m[1mDuring: Pass nopython_type_inference[0m