From 8f0c1cccc7df09e7f32d577d448b27ae0763f498 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 4 Jan 2024 16:46:56 +0100 Subject: [PATCH 01/52] convert lib to nbdev --- .github/ISSUE_TEMPLATE/bug-report.yml | 62 - .github/ISSUE_TEMPLATE/config.yml | 5 - .../ISSUE_TEMPLATE/documentation-issue.yml | 26 - .github/ISSUE_TEMPLATE/feature-request.yml | 25 - .github/images/y_train.png | Bin 54277 -> 0 bytes .github/release-drafter.yml | 19 - .github/workflows/python-package.yml | 39 - .github/workflows/python-publish.yml | 31 - .gitignore | 158 +-- .pre-commit-config.yaml | 34 + .ruff.toml | 66 + LICENSE | 2 +- MANIFEST.in | 5 + README.md | 244 +++- nbs/.gitignore | 1 + nbs/00_utils.ipynb | 651 +++++++++ nbs/01_features.ipynb | 1228 +++++++++++++++++ nbs/02_tsfeatures_core.ipynb | 257 ++++ nbs/03_test_features.ipynb | 159 +++ nbs/04_m4_data.ipynb | 290 ++++ nbs/_quarto.yml | 20 + nbs/index.ipynb | 395 ++++++ nbs/nbdev.yml | 9 + nbs/styles.css | 37 + requirements.txt | 6 - settings.ini | 47 + setup.py | 28 - tsfeatures/__init__.py | 5 +- tsfeatures/_modidx.py | 41 + tsfeatures/compare_with_r.py | 71 - tsfeatures/features.py | 857 ++++++++++++ tsfeatures/m4_data.py | 160 ++- tsfeatures/metrics/__init__.py | 4 - tsfeatures/metrics/metrics.py | 345 ----- tsfeatures/tests/__init__.py | 1 - tsfeatures/tests/test_acf_features.py | 27 - tsfeatures/tests/test_arch_stat.py | 16 - tsfeatures/tests/test_holt_parameters.py | 18 - tsfeatures/tests/test_mutability.py | 31 - tsfeatures/tests/test_pacf_features.py | 10 - tsfeatures/tests/test_pipeline.py | 18 - tsfeatures/tests/test_small_ts.py | 36 - tsfeatures/tests/test_sparsity.py | 25 - tsfeatures/tests/test_statistics.py | 25 - tsfeatures/tsfeatures.py | 1067 ++------------ tsfeatures/tsfeatures_r.py | 155 --- tsfeatures/utils.py | 281 +++- 47 files changed, 4699 insertions(+), 2338 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/bug-report.yml delete mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE/documentation-issue.yml delete mode 100644 .github/ISSUE_TEMPLATE/feature-request.yml delete mode 100644 .github/images/y_train.png delete mode 100644 .github/release-drafter.yml delete mode 100644 .github/workflows/python-package.yml delete mode 100644 .github/workflows/python-publish.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .ruff.toml create mode 100644 MANIFEST.in create mode 100644 nbs/.gitignore create mode 100644 nbs/00_utils.ipynb create mode 100644 nbs/01_features.ipynb create mode 100644 nbs/02_tsfeatures_core.ipynb create mode 100644 nbs/03_test_features.ipynb create mode 100644 nbs/04_m4_data.ipynb create mode 100644 nbs/_quarto.yml create mode 100644 nbs/index.ipynb create mode 100644 nbs/nbdev.yml create mode 100644 nbs/styles.css delete mode 100644 requirements.txt create mode 100644 settings.ini delete mode 100644 setup.py create mode 100644 tsfeatures/_modidx.py delete mode 100644 tsfeatures/compare_with_r.py create mode 100644 tsfeatures/features.py delete mode 100644 tsfeatures/metrics/__init__.py delete mode 100644 tsfeatures/metrics/metrics.py delete mode 100644 tsfeatures/tests/__init__.py delete mode 100644 tsfeatures/tests/test_acf_features.py delete mode 100644 tsfeatures/tests/test_arch_stat.py delete mode 100644 tsfeatures/tests/test_holt_parameters.py delete mode 100644 tsfeatures/tests/test_mutability.py delete mode 100644 tsfeatures/tests/test_pacf_features.py delete mode 100644 tsfeatures/tests/test_pipeline.py delete mode 100644 tsfeatures/tests/test_small_ts.py delete mode 100644 tsfeatures/tests/test_sparsity.py delete mode 100644 tsfeatures/tests/test_statistics.py delete mode 100644 tsfeatures/tsfeatures_r.py diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml deleted file mode 100644 index f54ec71..0000000 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: Bug report -title: "[] " -description: Problems and issues with code of the library -labels: [bug] -body: - - type: markdown - attributes: - value: | - Thank you for reporting the problem! - Please make sure what you are reporting is a bug with reproducible steps. To ask questions - or share ideas, please post on our [Slack community](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ) instead. - - - type: textarea - attributes: - label: What happened + What you expected to happen - description: Describe 1. the bug 2. expected behavior 3. useful information (e.g., logs) - placeholder: > - Please provide the context in which the problem occurred and explain what happened. Further, - please also explain why you think the behaviour is erroneous. It is extremely helpful if you can - copy and paste the fragment of logs showing the exact error messages or wrong behaviour here. - - **NOTE**: please copy and paste texts instead of taking screenshots of them for easy future search. - validations: - required: true - - - type: textarea - attributes: - label: Versions / Dependencies - description: Please specify the versions of the library, Python, OS, and other libraries that are used. - placeholder: > - Please specify the versions of dependencies. - validations: - required: true - - - type: textarea - attributes: - label: Reproduction script - description: > - Please provide a reproducible script. Providing a narrow reproduction (minimal / no external dependencies) will - help us triage and address issues in the timely manner! - placeholder: > - Please provide a short code snippet (less than 50 lines if possible) that can be copy-pasted to - reproduce the issue. The snippet should have **no external library dependencies** - (i.e., use fake or mock data / environments). - - **NOTE**: If the code snippet cannot be run by itself, the issue will be marked as "needs-repro-script" - until the repro instruction is updated. - validations: - required: true - - - type: dropdown - attributes: - label: Issue Severity - description: | - How does this issue affect your experience as user? - multiple: false - options: - - "Low: It annoys or frustrates me." - - "Medium: It is a significant difficulty but I can work around it." - - "High: It blocks me from completing my task." - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 4a9684d..0000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,5 +0,0 @@ -blank_issues_enabled: true -contact_links: - - name: Ask a question or get support - url: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ - about: Ask a question or request support for using a library of the nixtlaverse diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.yml b/.github/ISSUE_TEMPLATE/documentation-issue.yml deleted file mode 100644 index 1015b68..0000000 --- a/.github/ISSUE_TEMPLATE/documentation-issue.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Documentation -title: "[] " -description: Report an issue with the library documentation -labels: [documentation] -body: - - type: markdown - attributes: - value: Thank you for helping us improve the library documentation! - - - type: textarea - attributes: - label: Description - description: | - Tell us about the change you'd like to see. For example, "I'd like to - see more examples of how to use `cross_validation`." - validations: - required: true - - - type: textarea - attributes: - label: Link - description: | - If the problem is related to an existing section, please add a link to - the section. - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml deleted file mode 100644 index 71415a6..0000000 --- a/.github/ISSUE_TEMPLATE/feature-request.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Library feature request -description: Suggest an idea for a project -title: "[] " -labels: [enhancement, feature] -body: - - type: markdown - attributes: - value: | - Thank you for finding the time to propose a new feature! - We really appreciate the community efforts to improve the nixtlaverse. - - - type: textarea - attributes: - label: Description - description: A short description of your feature - - - type: textarea - attributes: - label: Use case - description: > - Describe the use case of your feature request. It will help us understand and - prioritize the feature request. - placeholder: > - Rather than telling us how you might implement this feature, try to take a - step back and describe what you are trying to achieve. diff --git a/.github/images/y_train.png b/.github/images/y_train.png deleted file mode 100644 index 18e0d3c1153469bb4de597ea0cdbe2b018b929db..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 54277 zcmdqIQ+Opo|EQaZZQIU76KAsGWMbQ#*v5)8v2EM-#I|kQPIl(|_TJCAJQwHrUz~N* zPj?r(R##X3>g~5fwE=e!{G|l_ z_49-_`t!Aq<0zu$sAyy2=%R0L3}R|+V`a?XU}$e_Z0+#d#_UW(*f8^5mU0-pKfZ{hP2XR}ru z?oQ|J>UdPV>sVAZ=vZ_tu7m++e@F`vBSV7&EIztct@!KqicnCo#r5=veY>;-eQ%xd z=IqcbuuBekn4+UoTBtik+mlDbkyTUHIbxM{8;cVBfVg}zWc8H{frWN^rgMqlopz>S zzWiTBa)bAtvuCTn48~^Ut`^?mWsI=Im45aD;Ex|t$&jHx`djMS7CU34 z&OO>9ntx&kQ!zO`oi_>+Bfn`OTa*4P>WLN-(Y`QnO>GLlZeV6FQsEI|lDr9Ga)h38 zf#LT?R!EY;lyvY=ouPF!?P zX;Y|e{dCXTIA(MLqr*@#S7c1r+^Fqd0J=(t>9tpm`W53Jzx@v7d@1bX(j+n*4p#s4 zI9S>Y8`SxpkHthL!yfD&AdcXW7fHaIJ^3!`tXe}Ibnpr@*OU5Aef32bm`(j-%xEa@ zTE0Rwq0S~1dF#EImGEm@NUQk${M0jAcr!)gN3$r&+hl zpJv>vx{?EH4H1M#!$DC=(VQGurxEa_OEL#>_fE`3rj&o~1G6Y#3oK&AS9bV{YxSQw zgTny{25ASEKTnpm6bg-_ZEWUtb?fzM8ue-T{(khrE#HXJ>YuAs0UNQ$`l67?ESzX) z^$E4pySwz)CeRV3OF@+;A+ty9vFpTuHY&GC4eY)sS*m`t7bT%NwW1Pz8x=Rod%3>lrs5hP_Tj$GqMILJ8B=m6Bw5keEK+sgw}3MfsBj znvGWcV)Tv#k(e@(VKS`Rmt*WZOX1j7nSH4_d_+#AayK68WHixm&ru{RzZ<*nVrP{= zY`_xxOt z55RUNYp$l;mq>Y;CK>hhr3p(*>doOjtfAH*R=Qg8`8l#N9DO zOfdiO=Z~$R#eAJ)_qk6+Wv?UQ^LZuRD#Cg3fN~t`2Be%4S6#UJY9)!(Qjki>3gYrMdiB zc8C{`<01bL56h#7iGG#l5Q^CW6h88Rm}^CG&6<$+{>BFf4_IsBpXg$q>oXINf%wlW z_ieS&J+0j~m)JXm=0v|Pwo(LK@+slu6h0SXZ;cde8XMY)h9q zzwi8gaH1E-@BP!~gwDk~h*giG0DiqY_F_|%(DSAMVwr`tY5Og8pbu?YX10#~l)bWK zafP-dZFbp89v4D3E<~e0xF#H}dE+Rx)SVE*DdE_J-zAk4rR4;BHS_nYNcurc^g5ki z>5uzu9pz^FiHv8r>0h~Z;LVV*ihyzf)E~9+Xu6uz z@1+yk`8(7bed&&9VytLKM;f0FJ-mFG)g1KkrF!r6AfuOlHRxGWZ3IG0c6Rz4lDYFw zysWlF%5ysAXP|a+&t6a$wwG49su^^B>e2-b*L4I+D(-f(;uot@1f00%Itr5-HEfvB zJ~EV29L|R&OA{NBA{sb`m+nc)?hBy}@?ff64fX?Z?|wjicl|T%N6mf{tCNKqZ%sD` z)hMGrUT)t#in8U}Q(_9>cM`@tZv=^FMLr0=iwCs&*v@Nmco&4r2~342ClnDXZJY1p znNPmwkH3lf`3H_v0UwT@>T7yMk?!0{Ym7WoWxwGK#W(R}5Bs*?5%~W8UdYVsU|N6) zG0%CDy>QTl5_`aJsWIOK5n}2EDtFZ_zFeb(u4%K&z;R9H$0TNUm#R8YztjY}-NXp! z>)(foZDQHy3-A%(yS<91c|B1+QFS9rErH$dIt?Da;yJHKptL}BSi&`6Mc=N_CgvAJ z-)P7Sbw`H>P*5_pTT4&QN@+|DV27k&M_7U=(Lva>3)O%UqhR7KRvy+pC)f97Lj{u_ zVjS_P(>kctR`Yau89>M8C1+IaXRlcW)ZG(Uv`P4POL?ZJl(S(f@zZDxrU$|fMbIZp@Y6j#(C`4&C)R1cC4G}-S>>a7yEw*UhdyAo8pAK^@UHH#y%Dqt z&+3+MN&>kVqq?@iIhVz76QeVxp#Va?;j;_Q!l)i1p1@_iVJC6%X07ZC0(UB?z|~r6 zIgsNY4(TOQZbMR4mDvj`)%asOLlldO7jR_dVsXK6E7)#Eyvze&dmqILsk+&^XWa2m zVyiE#bwN+%tOLzTg%|V&vx~IZ2Jdc99 z*AuChpReyGVj8$AO}^wK;cLfEW_!gau!)77A`0;7USVmN-5n5!F`n1%1e zgA|aZuhcx$DM>nwEQWU97w9T8umaO?$r+!;>MYl_#?Bj{WFz&rV}HwhpI9_y0$|1@ z=;W{3Frhx5XzgpImkn&RmQRYxk-> znGlu%(L!1#2!X6IUFrjspV|Y|%cFu!diY(L?TZQ_cm{$Ou#Rk~Z*ri=DQ@N7Kd_$% zT#y=yRq+PcezvCizp~dcF~?RDlhuMNsN&52h2F{ZPH6Yi6hGSRY^#ypU958$72NU0 z39l*3Uj^_+23GCYaM?k3SWBJGQ$E0bD9411E%i4ZqW(Fu5su|QF(W;jHqv?D)Xx7X zD%t2bdMoL!{4tx)fWLN23zzkWyu1shr7S-Se`;3zDq0*Bvwftym*g zjj~p^XfP0<-5KEteodh@W%Ft`4#O3zGUnlwPNJ_~np=^`{trFK!?m&!YR+xvU-7*Q zpA6mWqKWNaudCloD353MJ3L2p01`-0Uw+%wm?P`>m~`0EZgJZo+Z|H|awo?@ugiDF zHXr)0te~4Ha6@0|h>;EctrS52lZlxB&wrx)|9va~`|E@+$p70@AiY#sp7w24|B4gf zxz+u=*BuWEOZiWUeJBh6sf-ou-!5_AzqJH7H1oft-vr^mYT@@m{{Q$C2)1?8HvquW zh$2#7xn_l2_=J`WPx|iE#$;%S6LmmD@8m=%HZ(B308Y*6Z|Dj~N;}dlOxwAQN`GQk zMkk59}tXAMSgK3HS zX!%Sto0DW*L+7am`k#$b8_!SV`87WFgT3W-C!p{qf{u0*Fzb6r+pLq9udg z9KY#Jsih-u6TSp~HCe_QnSN^KqcvPg`I(fEKe5O2i%d`fa@G+f6=PX``}~wC zZh`5%Rs}Dx;yV7S;$w{xj`TEJ&CN<~2FR#OnlO*}9PiGme+W z`73Zk%dK(mSy_M0Rr#IwDVP)XrpNaSAnqTYPphVi( zYtPpON4ly+1qK{-vGXX$jW5VqE6OoA_8VPr0H#Jj*6x8;^O9U0<>E$7YHo`hYCw{7 zRS@(2@_havghxet7Gn==`Nsn-!UPbi(rLzNEy03n=&1403!*tfD+tmyj&(Ab!* z*Wj5b^9j$d70@EY86b0_a?EJVI#92DQEDmS09Co#%c@_%bPZ0nVvNGd1oGKb`N*97 zsWWG2!rXEb!)|Mp6wy=e?+L-z2(cOq&%=2mMGALQUJQw&I3!2wLwzH{jum1nS?G7z5dyrk6e9p0;&XYp(wonT2=-kY!g)TM^I)p%sC^o`5v>~kGG z5x+WTq+6I5eX^KP&ZAq#i8JPIZPAjlT4%65MG; zsn+b}DUQ@qhmJ&{BGW)(aj`jxMIqg!wsbNPwnUf3e0yx)dAtQi zemLyJ;)=IkPb=ZBublP;&O1Z|(FlxMuO?CK0MSX^IU5dhNo_aj7435h(F$kR4<-g; zVtzd2HYFZQhtXrCtlxCIkQU#*ebbneq~eYDbt!$ETnxI0n-YC{s8-Q*xhC3Q85o58 z{gq;+dQa-SZ*z_G?JA`4hWTCH-}G{}UH$Z0y``?9`~B+z|Bzb^maM$^bY)FHQ@)g$frJA8{VgV}p*JE^ zRkKw-jft8c?ubKBQ++O$!?sLomJi4*QH2#Xw)mj7HgdnhWwlAF`s(Uh^Dw-*o|iGD z(t5JFgvB>K(b5ojGO1#Y1_|~18(40ALIJ|<%#C|l(yEUSh}BiD>U)SJuk)b)Qph|VhRbipqJYqX#X{Bp5Ya8{p8{Wvw z{;&ONY44T6;=t9SaU$Goh&#M8tGB|Bn?uHCVN$28g&!&v&QDFP{E_cMEm1+gE5>vy z8V`S|daa$5@afDVPA-ZV^+{5BHtnlCzv`>DL`)@mALQJ=zUo_jFlkFAd8+_h9;Q6C zmkbuo_X$@7o`Y)KTP6W(*=w3wHf+*7O-_F7|2oRG0batWE{oGP0lVhv;dYJbhIR>q z7lq=g%gEx$GD{!m4MCWc+DKV`RH8sbY@0D45^{STMsj)JbrlQMt|^J!8f@KvN3DY@ zYCdj^?>TvX7IUNu#7hi?b*dNPefJMi+?D||gzn1@tRjv3Inv&WG3h}*hwXTAM^Icl zal%}Eim?0Dw&c4%vlD1KB7P`R6zEfDw4Ve&p~xFHQru%Eb6zklJ`t_$y_ z2j)*mQEaOcnk%TX`d=jcEs^sgW2L2ng_DnF8XPcu*aGJO;0h&!k6e?b+d8l|)^C7P ztD-CMUu>|K(Jb_eRcn2lZBFU)xI%=Ck~Lk5;y+m?ST?l!f#O&N-;XGh)hJls@_Z*_dOfqM zlJ8sc_5q{lPfSA{R6@hNv8$t048erW{Iaw$XERz;OdN>Nb7ohg+w=C9IR7!UT8+LK zoQw;@ysA`>X=Z)YKfkm4?i5@^d0(xPzZwY-MkA?92mC%^Ym)ZimYulS4zZ-N1cF@3 zOWF~;HsvYbWTRnL;GUub-ecZ)z-<4{3<6$sj3nZAduM|r*_gUR>DTci@-ePYee(ur zLny4c!%15H0r6vl6#Zg^FyLohx8Lo!d`){3Q5fva5H_io<>tI|+ehzFRFNGUf7OQf z%kNt;D0Yx9kYN2%-1v~&MDu5aZA$DK%X2c&rF~zP)Ib5WJ2uq#k=q{6N?1SrFug1YONV7}u}Q7PkozRLE>Ng5U$npR z;(9+SRm-OmEjXKkw_auyVumZVyzxBo3M98c(CXgk?M zd`^eD;Hep+A6ww0RP|T0;#y23nGyYHcTmhx(IKilhed0tTp4IaE15^eFDJ1J#Oz$! zrkPCi_%hmT`_bVrPu!U045g|+n_Mfl51!+g5TdBOrLaZk zMUSCc_RdDO{|;i}2kDUx^r0+&BH{$Rxdwz&VMS}w=*wu0kH)Q~pUNa;orq41-*v{G zYQH;5=ACJvFXqc6yfEMIXLvpx!j^WHW$zuaCrbYy)2)f^@=fjx2JXP`Jd zx9ID#3zvPgHXMDqw_Ls_8GN88&n*f)_g8L@K5RC#uMqgtogtF}#5r2HZ}fbaoxlzO zI^)=RQN+ioONHPL_vNE2#8R@{00yh~WGM^zn3Zf+WA{2Dvc@S6zk2gv9Mj5Tq-6h# z3@%#Mif|0B)Y>SII?*CaSMwg~iN&}Red*}_?C|&t)IqJK4mJz>&FmoaPFE2VnK7i| zKoBb_P1uUvu<#>z^wC;@2xSXkspzeQii3F3ddYMzi|~&jQvVm&Yb>ztsUE3CKI2A!%?b2dvF>%t{7v*RtLJ%G=*pc(o3LJu-RcR*5> zV(O<2X&OjemWyimef0$rM??r!57oG%Ts>tVS4si3J$A`yQ}q=Ik|t;8xPgQOVxwqA z$B!Eb#7^vxH=t8m{OE?ZT4`=ou=vkb!}jSN@*V}}7_8*}0fJYW#`NqXF`PIzPKU(_ zqm$owlD9rG9ZV14ax~&ZgK!aDt?b~eYz~^ejOCiKfTDzQEOv$bA>pp}jQQQu%b54^ zKHd%pwB6(<^KWGf2dFI+zA9eXUSUgoLwYjqh>2yQq)C?B^F=+W+^*y)7pU0_xvmZs zTozxG6DZ62zNUq2Ezp)#hB76YW*VGmtmj1;r9ici9hk~bm2|FZ1j%>g_HFsDOxzEl zK&^9@gCTjv)snhz3sHvNlq-NqR+I5lnJI{w%@9wK6&I=)zob6r(&YG%qBh}; zOhw5*L_V+A(>2-O?@suj-6}AfQ>E*J1x7$eW8q%I{JgQyz*hY--sApSEEoMAif7)Z z$W3IOwC;{50xN~WFHcdOC}ZWv%&{P?a&{Bo5VliM z09RxTdvd+qZI4>2O1&Br%T7?5xX_=fxY0!+`Nop3@w-W3XJd--w2p}lBw8UKs+MDd zA`mG^r%9g&Nt{yZ z8QNa;Har`Mc%M!AmHw+C+^tK4%A+z~K3?Y?rFe!@o3gS?jbk=8=S>omp&4HzVNAYh zr-Xf-$C)(zkI}0Juu)YN>~sBqcPp9#SiK*t`Iij1hf4C_OT9fIH}cYd?Iwqh{c#5ONLe*>cZW_W z^2r%U7B_y(+1XNwd4nH+VCQzwpOxxnWNSej^VxX3uV+3?7+_j9{r$U_<`Unz1qhXY zl*~qV*aAa>^8j2N{w;zPHxYeTegC8K%n}w}kCWf7GB?-ZRS=)PUQi54#=J|aakrtF z4)}W6fSR2-a8q%p8E-RspAu7pHK0+#vuU;RA(w{}c8GDzczp!F+`3ao z?E2r>&(ECv2k2h9NFcZ{B7x4&0^zoPT1C~IrEGC)C$2x{Me)862;!rMS}11tgBrcP zH9(qb-8oW=-gtH=0{veAHwPKqJ6tPa=Kby2roFFq*P7J6U*ckhVo%BMZA&UvtnqKv zdg>bvS%`=Dry#fKIi=U!8u2*jLPyS|j6YF?%eVqJC>|HxqWsr*@ZBwB4$AYir65Kz z#kmVSfs0<<8GJQDM!Daa*dNPTJ;wr@73dJ=UzYuQhbVqa0_8MQU8FneqNB-g|Gb~x zZ4?%gYLIf1G+6O=frBYw&OggA@=a7F2%Zkl+rV?f4-HqZjN??{_}z_ zEe;If?^4~R6P(^!hG9a$xQ#Sd=2S z3ljkx-+c&xyL1Gd*F&6&V3YtstE_dUrQjYOnz(@gLj7Mep!QwLIi^e~U9i1fP+Da* z_O;FEX`nY(ftEnBgz89pW$%{Im=N`)Xy!ohHimYNg4<$8XFl!Ejd>e~p-!Z4papSIKLd zb>HNDI63ysR@FY6JSf(;XI#H2WP7B;9;3C*fTu?EX!9Y!vii@%ejklgtbvk5| zqAf%g4TiG$ul_M;k~_J;Wfm8=XBH8+YJTp;p9?q^@^LsIdnu zPOc8S#Pj#pQ2}!u&y)&}zSydkYBKCiT(Y^Z{U&~C#AqosVg5oa%gwA=JE01p>8Jyp zD1AS<16zJE6Gp~GLu=NDwQotWxEw0Jt;2*?N)CMpg-E9NtrhKfar@;;-?+HIm)-h> zY{Usě?#n@=DFFdZqk$Ylj$z062%ZM%GExB^1;P2EoGbT+6BLWB(&2vE7)f>N| zmGVWZ(Il1kOPugXs_l{%DXZFtxe@nO4&VFNG2zaHUf-xtn$HVx5t-fC|}%}If^O{|NNcvRa0c_KlG7!;lG>z2{8G; zNhe59nQGOBHQqn?IW^&UJX5f2%gp+Zi2D8)h5nD=(K90e&R{Cz93_bYR*JQ^7j&x) zaSqbp%~Ia>6UXNxwPQT2bJr8R1z)jXhbea(;&;-!#x8Y?S1~-b7Xx+*HdL}iuc5>c z?+u$S^TVap<+G0?R+-QKZsVc=W~TVyJStYN73ep-y`rFX*1_uBVly0e0a#=Cjqim} zt=fvToOcUpXAHa|{ajGAr~`Z?@ZOw>x}PSXIsYhWVCZA5+wEU@iB2xr2v~0~L2Q;2 z=0YlI;`-nuwW5dEj$33gS!LT!QIV7B;OJ^<%CTgbGmsXxpn4V{Y(lW}?Lv26e7x#* zGwt*_mM=UGTjDV*WmB!x3WzC!@#_Zp>lCw$w?pPAt#}tSxzQ0crPQ^K=*hF;*+ysD zW7tUXi-5272RI@u)yZ{HGU0L59?i_WMC65Q*t(cx@lzNVlQH`N)9ou$Ovhlg$(+BQd$t|e$Ntl>ui2P8=y-a zm!&?DyF@&hfK^PhJaWruxxd7vI_OIEr+Z!ku|jh!v>g_m>aTQP_5rqq0#=-MrO%#N zHWxG*A}xdRTrsg2y}|3ialjgN0B&R_XVzKhLAYGCW@+&;`b4h(YRZOKSEst=^y^@`hlAO~oE8~>n@dnSeAl5jMT(DPU4!F6$KNU>^)R*Z{L75(}1u9B~L|v^Jti?wm=3}A-u*CNs9863_<4>9cvOWzv%Uo z?9JlCrH{d3@Rp{sMtY8h0E9XYxtX=6gVKypR<`1Ilb9rqsA$Z_>$LX*RuV`8nG9_H zqaCob)Pqf?r4l`WUA&8nQJl6~p!pS<=k}>(3g(PBvN^TeIZoyY-DVfa2n}oDHvtR&BQPi^ynum-6CO}>Okfv za^recKbVfPjXc6rbf1jhUA{bxVHa^N=gRTZWX;CLF>&?QtTV(W{7P$s*26GJ{Y zqGq74O&LFznTDQ%W%V`+wJl}g#0rCA?ZhZ-)rY6OjF~JkS061+r=iJQ)(OM# zww`v7iQd_wVd$J=9%qaMtq%@bZU3wXvJnR|c`ZBK3xZ5}fQP9_Q7QSAKGr@T#YTy0 zagPK62y+iM^O89>DZoLf9&Rc{0tw}LcQ@o)GubX(qELQ=6dX}gghox;tJJz2%A*_Q z9<;%H<4+={2KXfea%4 z(N{RRivrlH%Cavn=M$%S)7|))*W+{C{v;*zn5)}In)xaXU98;Rc8krSa~&vT1k?-u zzCEU?!UzsB^HZMp^>y%yqd2~!qs@D7qkF$F(b`x<2SZ_r-un^2fCZ|HY+yS`ipG75 z;mskjKzE6EsW?mRIW)#_w$2h|t1A{s;De7m8ph*XP_+kyS@b1a@lG= z>&*k1@EWM;0$C@dd_Y0^I3(Y5l%ZLhS;Ax}8T@XhxrcvYUIw%0GW? zsKgHQC{=dj^8CXX_$^;fH2h|2SUe}301Z@zC(BD^;YWLi?F;KoiQAa_4Gksytz2Ad z0MivAOnOpDNy+Jg@(AB6JVZ!zvRagNd^EFsfJ8PrQNh@Lq;hDKyfa3APK~ge?XkGf z;LQ*a1o$3qh`HDwNzY*lvCMc`7>ZoTn7(FR_cdm?{O}HcccTA*l#}u7E4nb(gC==^ zu?f>?LgF$2t$Fu!Isj3{_#&|+v*U(3Z!qmec-eCD@=mvRox>*5Iv6uP!}R7q5)wi4 zl6f5nZ!@9qr@n!GUe@wyoEh~kjdR@2vlf-e37l|Djna#g8p%PU(qdq2W`lAv=4mN+ zPa!N%-hvi8+8_&L@K=;>;pt*xm>GXme&nyt{Tetjj^SaF)3tT-R193_P_!oC3Rp6$ zOHN|%uCrNF_uu_IHa=2S4Vs{(rA23QG~DtbMvC$A5QI5~)T+!qOZP6x-T6T~W%`$Y zvQj_@8cc}XmbPxQWheg_=*Vb*Ecfbfnkjm_fUb z_K)5wBvHmkY~Xg$WbF5^$KtDrEopqE%HQ2?#Y)^et9Wd+u6|-|T*r1 zqM05Z9v~@R-g=;m)4R4#7I;e6;tkttI=~ACp)*stdg_LnPz2#G9J!x&KjHqC>^zlK zMV;JZVbO{s@eb3=eor%zlVHDki-c9; zN>`_;;p1o>4dtslK0sxY71uVpVDWKZ-bUpG3JTs$>>KIBWNw&C+rePa+G-12+nG$^ z>)b9RTgvWclb&sd*5p(L=RUA5vY&yP~mPW=*n zXjPdfP=}OzKCT6&)^awMxH$0Qrgoa{wf^G*m7r<7p|Sx$^4&FMZp>Kk{?{yc7ZRL1 zYf+~U$FvADD0eYt{djnDh-DlLYCD%e?FyqyI6BuyPcM#BZFqFPmFMruL%b~e%zX7;_pY(j`8LLD9Nt$Vrd=K2R9sp%61=P2$#h-VcI`XL>ga|~D>>#-_MPvsZ924r zRw&YS0o$)Ktqbdd zJ2Z{<=X^F4T;mV2KTpPJf|51}yy(kQQ_Al%bv5Y-9 zKk&?(_^1rdqPToUaXc+qRXY5BbNBncwv&+bz3r@VixGv}frkgIcDeP`OQMq|Z?9_o zh?O{9y)4tnK0gEF@>@pu(>;hBS0ZyvLVtuwa0X~fyP4#F=_iN~a@=Qi*k5g%Xs_ac zS{sSt`ajRSd0V0;LYB8Jd_OVT>k-*`tBPFk*MEgy zaK!2brxn#5;J)JiT;=x_bk8<1adp`Fj7*Q*`Uq!vbZ`M_pO5lrmHYunQLy*+Q9Nw; zqpq>_?X=M0#?M5$MuOQ`5G|G2Z+e3pzvrlFUan@Ih`geyg*t2rp|;F)QIuKKehB6D45-+5b+k2^;&N$k%}eM2L`*Njh|;x>s0s4!Rv3GcQ0>G zhb02I-e2AunekgfLY8RcV`Pd278#l;$D2aiQY}Cw@tyZ(X9q|%&@P;oYRJD=TQ^R! z?;|IEDQ&<~|HJcFRkL6d@F*J51#RB5K0Y2R%HZ=6g?{CZWfqE!L-c{lh@XSHYTMBb zLpbP1$G#K}6(g88*gb@vt<`&jLuWfEPmVN?b%S51jS>?$D$C!+%Nd)BwRp4_VmWut zbP|F+WMsG@7kV~)kA0lR#4_SwR*!){t0z@ffHQpUruh10$z_jDv)|)TZCvi($Q{t5 zT-BG9M`ceu-u*ky?_E4rJS5#r#3104Hy{ZprVjN*lPTmRTt9nL3%9WluRly-#YqtO zWE3Ro)N7~%>lYCY0qk@`_I+}Qav>2BaTPe>p<LsxRMK;<$h2O{v%vE<;*EqZeF)k z4rCZGb*cmW*)3GA4UA816HIBapRR!W1|q=AzK`+-4?C#iky&xkChV*j=`z1<*M_1Tas{hW$9vTOm zO!-DN`X+Hz^|uohu7o!@;#HNd?FmZ112BDK2Mtj(u+H!7nU2$amkd84)dAycDeNf= z?fdiO7WXENffwt%_U27zrjTLY6y$5U8uZj#Ksz#W{vzOYt7|G5>w+~}a~KqNlj)tj z-W|BH2v5t3mQvpd-w*~Md4EeKunV48>&X?@(H6^7yyayX-_oIRUqy3gJ0eWWniz@=0Qrf%$1T?rM88nM*~?>P=Cbt483ukE zh(Z*<;0sPppHvw!Qw`@O$GkTHm)>D$3 zgNRbEynES5MQlY^W?zsZa_R$On~6L24(F&ED__hRZ3;{kR^8jSN_-ZSwX@tmpCN+! zkdE5HB?6l~-&$0V6cX*j7Cq`+7F|H+B`@o{vbYqGWaVT%e`76LX}{Lu zw1S)i7$m$NNmRK~!a2Y8?HlUNrv-b7=uo)$<^+#_-SW$WyqD>e6HJ#!Pe??{Sh<-z zIGb58YL40JGDh-v+9iHLpN43NVyhR-*TK9!kr(oWw%ri%nfR%cTj1~`^P+Zme2uUx}CZm=7wXF9qO7a>7 zpFjQOYowBy(}wj4PZX2nOpH5(h9WtJ`0i*h5>ZRJpHZIp(W8E{hpF!hr6vlrC8U)C zp-rP(%D`OW{;N>F!6S>Q!F|EOt-}9;!T%Xb_?A{^w!@eK0}k3fiT^*3C;$IsZx)+= z8JLZ6WI)wL;zzWXQ;!_T1{4u0$igcF^)9+i}H`S%v4EFV2N_$+Lwn} z;Fe-UNS{AvAS5r)%9AY6h2NilyfG^NCHU7lQN02HzVnqnmZVR<+kNDPvAin*gKPDr zI>k$ohtUnE(ESA}mZ72Kk26{K!nBGVKTl28g4h{8X4Hefh-7V7ik_jivb=m1t>5NiniP+cyrkratP(|eZ1*NcpPXO#R- zyYh{O%%`?59I56~$ZTlL$BywOq58i#AEtJs&(y(+caxWI_MaRj(!}lV!pF|oe`!`F zMIl@KXt5d*_dIl=uxkXvwFIllxN}UI_vh%SNe8>Oe)#AE*=;B z?>P%0ie)VZBy619Shx&VFb4z{;;`tTaSVRMV1U>LYW*h;jUxA1@)W zCiS=~;0*ix&MhTjET#%IR2k7jRj8OvJ!nGF_Bu`cE)(UfR65q8zht+u2N9I|{5@;U zv;}a2F)oJN#V^Ot=i5qv)nO{+XPyd=YQ30d8h!x+H1rhc3&JdFQ-Z}}>Y7@G*GI6h zmD-oNsi&6Pc!PA?fbvhH97D)?bWba?+3Nf-DGA(*J#eKl}sWHy`jwqv)kY=@9`bd0!`;xxXhW6 zsp$>D)m}_Uvpi)bK;WCkUmt3#k2ce_krox6nq}odLY;Cig)%4N#rjFz@R@Tjqt3>` zorf!n!+NhUt@GXO{uL^qOO&&q=5Z;|Q1?lg=WqeD6rfomja`1|U|gcPU0un; z_BXp`Wk%=_H(t{%@|55qY-SWfKRJ9ces=aOKjyHwlc=*gVsGp&>WH{R40Bas@$Cl=ra(s{3crd2(r2Z(n+%_#VT}V{fHBUJk?i zSTEDoWw3S+hm_`v`uqNh&}~Eam(k0Arctpb#6!Kwq{8_4Z0#SQG9PBH+T!XVHouZd zl?M0t*UEa<;M;Sg2qpc#@{HJMw#M*!Es>c2)kQ#o1iU+^#6nq)slFEuj$f};jNv_(=#a@@vWw!8;inHxVP zg;^8S<8GVp_ecYCFt*&b)O@_KTd#t!wBOv&uMK%Q!Q2OsSrs6^K2w%0uMA_C0V*Si z!xIr4cLx%qaq&UseJc+PR7(r+5cNK4I$LRouwo?FyFvbb&(?DDh*mo5G;Nl1rHF?u zB@Dpb-p)w*heq&X3*m&fXz zm97c;kKmXmvun#2PN>zSXLL?f@nVN4+_65{mWly4VQ*^Rr$pgqg|IK5zm3eJJc82i zA|Yp~`*>F6&%@l+8fQko&RQEIovFBJq@Uc``Y=T}4Kj<77{sla-DA<206>LPq>Q1xogvHD@2h?On+EJJe1!EUhKhJ+oK5>rfK?*A`i7v^Y@T@1&X_ zOm*e{AD$<=zj>SXr~cibX58icsw|P+=h0tCz3Af&X`=ZBojZ02`4-5R-*G0m>XD6vL50-e@^iP9g#Sx zmepv390d-q4y{au>2y%ou2jDt&BkdcGuq;lJSswAj|oS04uc#Z{zuKDLMzytk#AX&1SSyn%#((8&1ImSCx*11?PMvj-nbMqmJBK7(YPfHK!}spotvt$M)l2ad_ue_-_=15!edjwEKf_3d z90Y;bkXoWY0@<|eBt@?Ef}?Vd3Iw;4*aLUmN#q&*+pT=)Z(`;2;j0~WBmL)TldmZa@u(MT)ej>%p|%gY}=0ac>W*b-a zrZQ%O>fP(8=s;FGdcVmdm;eM$1=Yue1m0=CA-gfN!t_pHON z3|nS~6OG3om*mT2^&EX(k|chZQqK|+U9>b|^nd>RSnN3ah=T;9WV!Ed-0Itd7)!Qp=rVPyNnEdNXP2Nu2oU>H^q^UCoc+OQ`ee!I%X^fF6E z&hCx+B#I3C;Ru&~JX2!xjm8p6(Er_~U&;IAcoD(tH))MkXbPePi-Mc8DNQQwc>5SV z=10#UoX)1ntSrB_ANJ(8O=AWt{`2KyQ117Gu24P`F~(8VtTlcyYJl4yCvRcXxLP?i%do`_4Z5 z?lZ3XnF) z#hD#&o{oMy(@z;Q=zpX8nRZE51LR2lsZ)7Q)twhUDhClEcq}@k=S$U{UufcC6+S;?}QKxiu2 zR{;_$ZO-}ogKiPKEUFd!Ssto2U?h1ci1+QCZu{*Mi&wFny8d628kk%F{ykXq0Gi5P z0#FDJR?YFv|8<=AqqqaiXM^!FH-nrX5=P^gFO=)cnD11Taf~m5Ksh((Yr;Pgiu3Bq z#h$bLV_2NLZn9%+baHSeA6w3z4FOItUBCNWPjVV+R}2%+=Ip{926*^5SX#7LUM_YNPOkrm_#)jC zre`X2;D3~sBR_!T%>XaCAUtVfaFRE`=6|22)A$QkeLNp0HGvO6>vP7~#D~&8ntvhs zw9JF?g0yw?kGToIPyKcP_5rNI%XkWvdE3^5H@Wgc*!e;WbpXwTp)@a9ba5HQ(8(B`M}cL&3zAl(L!VliA2Gyn)BSojxAMW0?#t7H%2OVxr zR3bWw{!VgVzl>7mz%JlhVxQ`@tYA9EAjsbGIou}B;uZw$iqxmeF~Ljf>yAt$@{K5z zu$^q5Bf-v}Zqz1Y$eE4J$c?p?*u`m?-E!oB(?6&|SmpuR{35eNiT695`bAmDYC-zl zUyc3p;Qik_<7%fuRVKcs$jrhU3r{xqln(Z$^BFOw{u_1|E=>`Y+AC)-RoaNn9E6KTy;5HLNn?L4YUryj`3*C^s zUVr{)WRgRkH+9jwP%t*pGpD}IUNqqXhk)*>kJRhDG9vH+$sHvQDb!!pdFRC1H8_50 zbFbE>@trA!JgOiqz6PC@KUbLPPS{*AoikLDO?uh^mGYW;AfTc7(85JPCh%{>Ks@KL zz_UAS>%(!z0%AFHFt|Rw{Z0KQJX+pr04t%MPJll9%@!BE$six>3Y&~0fV!FLqHn6( zmO?pI=F6OoXCpwL&jp$4!;06$OVLa{{`pzF>}o=9hB-Yz{XzjWO!+KGU%EjwTuVZq zRa2Qaliy2J$1t`$e*o`X^5{%nkXT7!ntBu9-({vZ(ye#H@}XqWg%NB2s7y;Cv!lLbok58#^I zRuEx23guVsN?L3tKw&hkTdr`Kp5_kXWzx`C`;+-Qk&u(%4$jf;zIaVkMn{jOAUzK-TgZ!@Cd={iJBBwI!z4`q5DxQj zw%6eCnb)4NsISPdoj(NuRe1R2XxEp4Iv=gW67>n4e>q)a=n9yXx0exQjXx2xH<%{u zW1N|iXX!WbbFiMradPm~&qFx;tv>6xMy8ad6c_$`;w5m9mD3F0#1|$z}z^|*HliXmacj%FWNvqM$UFC1PwpF7`>-$Q{XwR z0=9v=_ghYPFlIWRkI-yJjG$YouI_AUl|#R^T2T^QYf4)Ipi^^cZM?Ij$n0wf+P48D z$ioj7v~QmRKf;iPp#8Tt1M;h@&&^9jKaqaN`HYB&*sP}w_w5rY1@0%(kY9$7;Xfe! z^ZNh33Dr9;-vuP*Z<&<+y8F4K{|n`hA_d1!+roI#bEV&@1POy_FEqrJoV12=zGHa+ftMxZT7k_Lq$Gkt&vm*(kQp+S8WKDT#iJNnZ6~WFH zxCh%#vVLr#eWc}+y?hBZ;AP|MjfsN8sM#g!tiuk~Xqb)>aUj&u>6otzZ(aL4T+4Hu z6Sy*18;pkGMBo@k z%WvSDXl!Uuk4feDgz0tHz5d^@yN)}C4efxc7=Hmp=1tM#H9JkFAPmAVuMj*YRU^U1 zIJ?29+ObJ88+Fy^4?W_a@my4f86R|WdD-Ax#PsbMic3v-d21-p&vz#-oXpB5q|bv| zOp|!J^9&1#WlG)`d8}Y3OppQ=e0542YIui2yfm@4TVYN##P?FZsOaVl=U_YOVW3gq zu5ln5DVYBO(W=bG>EYjFiu*pwGqp&L4(V_atqN~^J)w1}4-di|t%%afvg>{-z*Rng zbKz-&^5NDGWCo`LDmDKG2C<`Yk;e47@DDwcO`+q9Ggp0W(iOu?=#yFff=;6BUj&LA z)~pR#OF;Sp{38C=6Gp(3_K5BCNfqS&g~9xqji5M$f+}>*|L{*j+ZyJlAQH_!`V$p= z8z0Dak9Qc(3fOgKHqZqIQibElAFHC*uw|CqmtvhAFsy<{mr@*%eWUdU8u$5YbyZoY zEfQLq>+W_Lh7G%WbQ!ehm$=As#UC0u2`#ZKswtXuC5c$W$D%omkD0#->-2^Fr99=& z=pONqsI~BJUUU+yV>r^(*OQfqz@%h|$6-rhZ^_5bTcy=Z9f^#yJzZ^&sjIVzu(1^N zpfF(mQtRbLyih@>RIk62JjYYLI9;6@Wz5C}LVuS?&}cb4^_{J?m8UM~lXA4hud&T> z>F~AQZz-G{?@9m$+M)K>3KWak4Fs5u<(06OrWsQuHUdz2k6252>P(r;b*$>O!z&_x z3nz8|25C~5xBp8i63pwUt`6A3f&y4 zs_;|P!huW}5Xa>*EsU2p5p_dben(HbqQUFz@Z#w*8bwT5v#HsNThsBBEJfH3Zhav9 z;>+woVHW+@2TcBt{l6NES<#?qV0OK>n-W>@Tt_xWdw2A*yp6TW>CHExA-GKzeAYT3 z0+pBcLpNn$$#5%tl7`7C% zYG+~)BRzJy`Rg#5yOlL8>B%LHT{u)p2<)HylSQ^8zy^h|!S1mfPSl*r;WZ%3>?&ptE^lFxZasJFZ*;`TQw00Ey!G#6g|H2)k5b{&O1SR70r}y0xDwgEKNDC(m3CR>h)CK~!K2xh#AP;1~EE0V__(FuPLLleod7Ve{pIq-ptLJ%9^# z7DN{kn&*<3KlNxJdPOdih8zj_o-m}l^?oxJ_PNR~EqL_`%-_K^{u{d^3D#6sWl+F| z(TLq#KHMM#lxlw-cNT+WX!GB2kMB(5)202gb9tw?yT6KeybsmA^OJH}3c{wlr4^Gf_2L z=jV%7fpT2xDd9GsgbYPE_X1@|(Pt1dgBI-+@Sz(2bhBwc3;#YtTcn$(>(GKE^cR0j zfc$t%reLe9$t^RA^CN?!g}{{(a<033=kZe9nc;9TfQL1n=OMj1Q)~Z^5r0K6#TReq8`~>)6#Ie!rn*{tnu*W-{WgVf}l~DL^3HkFE)}>P~%Us6d zM}Cr3x6dic3h?Rr8ZvpYWF5DR&T-Z>4+6beb!R>qHYs+Mo`aFq11!{>b)&@>(25#S z#V{SJsz4JufRY+ic145T(>f=JGN&hPWw@qyww3)A?Ax$T5L8v_ zMu))re9hBlNj%Ek^Bsz!&ly1HRYU)TbXIcpL>K)8#9)*w*ZorARtcCpfnKM5(vEUp zh#WT`EQ9Xw4n9GQB_XsU8M^pc(V*)F?TzR4#J6kkOdblHBx5&WPj?{7yUgY=1|Kbb zZmQizolsX;M8`n#=F?!*&IS#r1>1uOv*RZ(Tu58?8T1U z1QGo7sy5vLn_bi$Z&2;{c?X_%(yto#5NRVqPmVuX=2%5$=vkusMU~2d!G>$Ill^)g zO?)K~5hCl-yxI!$(o>o6d8DoF4^!087uyi`254ya6f+o4mlju{-EHtzW)p=5h3)SK z>#?0_!2R-RU7@oDK?TR7sieB=uL;0lEVuKAJa4RZcNUmw0%->PEO5n0v?f-L5*6Qn zjiGd5Rf3V>)IxLK{APo9tyZyYsZ+*a3xR533)GRknFgec3zFPj>AJ-w`%1HwUzUC5 zKJV51JXs>>>3Ghg6R8S2a47zDq70xE5kD*Vz}H_Degi2-K-ySpK}2*vW@#qsQX(*6ukEfHDvYksKg4XW7QuyC5r@P})SLGHzCYwYx$!rX? z2KQKmJr+2T&o1u2y#9@JhgoL`gOr5DqN&LrsO~i+*#uODWk3iOX<=ubvW`x0NAdCl zK6uWS`u$gSDIIn#p#_B%{;?*7x4O5chXRyuH&JA*U;b*enK)K4U`0LUe z{5h#6K_rBmk16}W1isIM(wr=55qReoP&Egi87y2swWg-!Hn@L79$jcD1g$Fm2K{Pb zj7bx*nLv^ve^=Fp$GnI`xcMcdc)sV_f;{k8ijYIQYs!cwkW?B7g%|;(N9M6x6c~#R z%Fb}6cmJ(Dk-k`^37qam3-kJ+ffD9U4BAdEtfudIE8zKJ=O2+2;Bv~JP=lY6eKO3y zP4PHA&Y*Ca%q$Jyjz!UEwO*Updy*TmsRLNR8x-i{5b;z)160ATqH%T6c(s9``Q69Ny{HZD4R|iphVP(TmvgOqI!MWdECu&0ero$S+1NE;3 z$qasjh+@jxs2}wtNDl(1KN56-2ZNIuEKLtJ5hH6`zCPlZL|E^l)(pgwNx;6&i6YuE zHrYr%DD8cO>wos^t|{S79NocwuBBxyw41c0L=zdjRugY=jQni^sS*kvdq@+}!=oN` z6JRVyGw^W@3j2Ga`B~^T-o2KXcHrQH*-{76S&()6$6#W~L9N!fXszKwAm|AEOGkc8o>@>HU-efRKwK3T`M@=g{ z#H3w2_>O=-Lx%KpI@Qw#FdHrv#~@RLMD8j}3~ZE%r)Cc6LV)LK}6Cl;1+{MZDROv)$`8 z2><^2Pk;*Td~I==wSOmFh{Et*uo3LuK2aeA2+XN!*4`^45Tl8|{Pd)vq()z~O%P4p zTkZPJ^)3{WmZd8s;Ei#D!*%c>ruM@g^ZPD)Nc+4fgK>+b`CCKoQ3O zKi~-ZC3E#G1?Wl@g7&^s_%EUg3WU(9|0YoMKTs3P+UdualQJs)CsHorMIBoudkv+S ze55xkiien-t~p6#gpLm6k~zAij%zQj^P2BKAF3yRU5Q({cQd#0&rR2>$;(MRSOc~U z=8L-$?`;Ov2SYiw8;k>j8oo(;GNh4~#Bgd!5aV5L*YV7{F1O|gmbP4S zBu?W9ej2XB1*9NnA5VHDc7V_ZW$4QmT(KH})QoYHCfbs-&IP(C(l;J|{>lHPFQ0Z* z^ds!9%@yu4p{-BjQN8oh48Lv=+y7SJ{BHl}-SOwHftvH%pCIt=r{C|B-_6I<7w-QQ zl91hjCY3!~ke}VB1)2PhDpxt-VCJ!;LhAB0j}P7OL1*(k_=q8pm>P5Y&>sYJKZoyw zS7!`>ZO+)2hPUn|%%JXlX(7KWFZB+Cz=IE#l(;;xwN3{&rv6od#uPgi!AFx`kg|ml zV(@o3qf7NJRPl@K-QD@vWv00(NuOelYv`SBBh9p?+r~J&gyt*m6NG@utv1<+IC zh_FhaJ zDhqK)>t{{W^A=e$DPF`<>mL1X-Q!2GO-tl0c0njCG$C&k z2~%IQt#v5paa@}PgUWR7FO9#=I?u=Zo+-9Cs@+`u!DAS&`7YVb<0Z zwW^VF3kmwZ6AjjUTq>!~h;$%26wV$Dg!vqCdg`;r*t)FsFhL#G43CVzlhb};J39Uj z7zQ-%0UlD)ug*z>;acF4iMQo9)XqC+j0bPdh2NQ$1&uGiKIEYfpCbbyB~S?jh6F9g z;2t90&%;J9cSpD{+Hj(8bPZl=3|wj|feS}opjSet;n$-e^m9>2f8i9>uKwC*Zbkm@ zs1IQX53P`)_hL6)dnAb;J>GU<&(VY7TXy|k%bt~c!{7DJ{p@DY9`Zzk*T~J!U2`A9 z7oq>%P%7u5)#IFG?M?aQ8AHC=V?zUt@2EA3e@$#TGT`}U$2*(Q{HWIdkv-+ zOJaLoTTj(qohQRubX-3rwD~^hDzxAeUTi3@#1^jA(w__0L_O^7F2SBZ&@*}2@twT& z{supr%watEzob4G5e+S(q(s%XC9Xf9;O`N02rPFL_3_tF>MCsh8gy@+bRBv19DW7O zUqN=vg`2|U&DGlBM+~G^J^JpgR+)3m8tS$SB3%Pdpyj>d_Tv}uQD+t)O zem;YI{z#05G;u)H7QV*6(P8S)@*Me`@cf!y^)VO-+?6Nhf*hG4&xaHhp+^6k44>;? zka_olTHh~GNmc<*gOG_9K#p%r8suYdb_g+R9eLG0@h&DaAKy4G={}G!HLRU$^nZDH zBzYd30n8E{@#}t}}b9LPF;|Rh-SRM4tro2Yy(pD@9+mPU1 z!~`M}<1mzSToFb#m~E6;RkX&`VPTLb!}dbP^_U6efPvzEkefC_=oUG*E9wGtvttr( zcf2vBprPo8*Zd4UfP7cit3b`dsvY=wkoVe*VBpgft=FB$X+uSyP+N3_zE{Bm$?zBm z#L5i#VsdWjRd_x7>6;HU6Ff>4vcddUnuM|%BE!jYr;x@n1xZlfbb%$!cPjbY<+V6p zPr_+L%UqJ3?>gXk=(C%GtY)NN<$)-|Q8NJ{~8`Q!|)^rIdfp@$>bm6GDo+t7xuUX0uV_OKsh;dGaaH(5ew0naf3@rQ%v7-tWa ze5p*jAM$GI{7)hd8LTNVOW+@?^iU=Y=DH0bwaDV;yZ`y!E1jK&Mgp+h-vBhfHzR~1 zPUuvWQhgyIEs!hUT6&KVIq6YL9E64JVj|A(-&2Dk3au)juvT2U3~qqzUlb zR(>^fkwHESyt!YnE=IZUY)y=`k-*5w+~?d{`@)A!0+0+(QNx2akUs#~9d{4MEqz`N+ zIC_6#4fThCizeUh3{z+$Gk8tZ6!{g*BqH2BUC{ZVLSh(HlymLf(rGpgoqS;jGU5L&o4!~$T2KEktU_$MyH<5_ z)6fBrIj{;751OEs=V{csEK5kY!%Hwhl&odz`xSzU$2S*|-KkZdvhS3nW^{gh2)-m< zzVwqqsdAz8NeDLFtT!cRz-V`;BVD^vS~$xNn!9y#jaAi(xoz#)UEiFWMsQ90g0$8Q z{c@WrTm>Rgsj$J&D6^|w)!*?>;l6T<91Dl!Z>HB#?YC(f;hNMSn=WUgpQb$~bu@=b z?X4N7i5r391sotRu2-ds$rCOEJ(bkBU$6GG;IleepFi0u(flG1A%rh=poou1@zCX2 z&g-ls3NQV!2R}ATf8;H3Spd)AhwH6kG!KuSw)EYZN!o*_|DR1JXz?q5u@^Vyw)evB z04rHQZB-m&_6Taq<>|_+Dn@rms(U%3w zFHK}Q+_pZRQFP~(%+i(d!Q+B4t&h>&>L(s{BTPFK_!SOL_dT?gr73d|Z(; z%@v9q-mn^B^1I77Gs~}46oB@JVwe$jv)Q0L3i4zNkCfkK(_^D!#@o!kNNs+Z! zeR`O}n2fS-<6_5vvo-v@c#Yui!{2sK0t#sexeaw99 zb77cg)Ra4zn969=3O=$m7pRX|eQ^+`|7jl08CZq>MhFB5qBMmjGVFY9yXwm|V{ z)#1isK+sU2%c1x!BKwuBu3pF9xm_m|LRJ(#pDi!|{vkcgZl9zL*M`m`1|Tq&ts`i? zE%rC`KsXg!F+9h7+xmaeyu3J;<6K^t`97`>4ynWQo}}t~)?n;|ZaiOOLi@mJI)3Z_ zYF<4kR2#X9VG`w+4Z)E$blr;#*$y(a^H3%f59`SPYF%AcZN2Ask>5Dy4Kx(xTu0%5 z>C5+95h0-?5v?#Vzqh8|wh+)xf0J6OjT#$#VGf+hCJdEHulw`s*v~|>@y5)21G6-C zE#B8|YQlM%E+#=ZwV;6I1=PEmow|~mBb|3G!qD4RCME(Qvj2;})ObJsxX(u;j3;4A4_yxqrWD9kffloH??K*FLaalL z?Y0bvR1hluD^4y}V(L$t%)}BaZ|F(j?|Fw~@>5Ne`8DlT$lTn@e`S;u9xZ3;78W>K zWSa$y>~WD8s9X_OM@9JPn>l_XbFW<1_s0=*QuXmeGh`P2EmLKWhS*1C>`R)5Ch9Xy zZOZ^v7|s*~gfj<&7Q~$WW{g22#?xHyCz)rw0-&C+5K<+ zWMX|1NSdDYJhnhoJu^Sb5K}eqcRW(PFz7!POm_6)sL!#Nr0n8}$5SB_VKxKp6tmag z&bDb?@AwgxMyJ22CN{ayZ|L@)&+-jOE?i#ko~4J+CzmgG7|~N^jHDH1rY#PnRzdhO z5giWVYt1sRwr9w?4%c;^hPXr%iSc~TBF#KDSt5wFZg^I=U~fF8QoBC>bdqRXVddA? z@^bvvo7RhmUurUK*+4ZJ=+v-+;+1+b@&MM1bQ=TnbO`@COH`BH+`s#x-5VJe8UHgs zKL$qLBicl6VQj)+dkRMbcDbyA$2ET&hG=k;uC!ATh+z&Ku!+fv@68B5QL);TIj%`j ze(bPF9^Q`6zl@WX_tlHBXTf;RBy=CY2e$Xe_bZJJE&fc)o+L>HldzYLW%Q8a6&*H( zV?1G+bcNgA*7N2RiLB&N>`M7P@&g^rt zJ0r*DG@BonDJ%Wq?@jc>x^_26PJme)J$k*ax1rqG|X z%=H#G%>ye3`{adjIo)rc^TH%C(GL@z754fy+U$8ls5;k%4)<)!l>{4hp*Fo%VMaY4 zS_U>dI-XXxA+~dZpba8{@WYbSiwYmfpp>{-xaXc~5f58{;<) z(O2R5!HW|*v)y+{WvAqkxhyt)-#kV<_g&MmEvlJQ>?ChU+9(!QgK#{sIW8VG+T=LN z$WyjddEoZE7zG&25|9+_vo-{94m;zd=3XB*fef4j%j9y^?NW%~^2g=eZi z9;Z9Ilk$!`o{-W|5uT3tDkYN^D?=j4W<*tf$5k`jm%Tb|s_!!^`_^>waNaN)`4^#3 znVJ6n;|S6UU|hvcMX9nyuO!9E2W|n|q+EFu4L_dc8lUR6ol)t@)@M|C)8o@8*&A*N zjG?pXu~k4oKJV&s!~3UwgH%N;77X;{uDQVj_V>D>Vs@2ktGR`yfxqR-(lFW2=a#K! z4TGcjsk&QQk?lz$z!dzAzadTB2K>5ANuSKk_f1B;VmyR-<)2b1Rq5#Rg=GwP3bn@k z*9N0YxLSgWp~E_9)|U*Z8UNv4Z07%HQ~xrle?2CV|IQ{9lAD_wH$O5&ifYwVR8;Ib zn_+{%FVb!ZdkgvR^s}IG?cG1vTe-m&#_7u{-2hQdp(tr^ceAN_!uru349Ox)dX#ho z^J8Bxxzwvi`*&1->fA{FDgFpA&|@{cTeV42*gPgZE-Z?G(k(!iGdv6W#iQ9077+zR7;!KyZ)tI7>u=kNxAu$0>K9PPV>c_jd z_vp)mnIWPN3IkcWSA>HDc;$z%ipaqA-CnW{4JI4yI@X0hXhL304_o}AT~0q9_Qb>e zuRdLIEsccS52N!M<4-8Ai#F=l3yf5|m;|jrffIQqYBPV&sI?$tc|R4gDwL6IqqOir z)~8`CjPS>{3fEIfXRn(Fam`9DLN`PJTt+X2^(fTP0(6zfjp67xMwI>`)Xy5e%xblkNAw6MjF*m3ydwWJ3Sdjm`(yf+!THO8C#+g> zt&!6ABWQW;{}C#nVeIVeII1{$6{+_vSeMO{?YZ$a#JmAp#n)Hl^(3HCUHn8mA!9ZY(1zfgdFY`~)d+m70iA zwYJ_-4JMfyX&emcW&cDp!y>JIU@3{>b8c$Vc=q@D7y>9&iY7+1+S8dv`uR2o_Fn_^8bW z@2{S@U1%T8Nlq-(wwH3mwOwa>@^hC5BgF}5cwme$XMPAY1-CP6KgsJb(Hqe9EYnB# zxusWTPAS?f(5$9U1T8Y`Rd0Ngebe5t|0Yn2Q*ANaDll>^+b5X%!2zU9SevIaq}hrb zr9x`k^VzgI&9~#eIQjhR>yw@4hSqS(A*v>S@mo+5?##vt=2jtoO3bkA`#GJHf_%@Z z+RAEnXXDh!kX~JgUHBQz164d%bF|%)Id51%8FT_ZuKx*#@9%v=JW^=?w?)fSy# zCZ+bm5{`ZouRG^xTYnIu;kBmqFMr7rjwCg+X9=(WT)X*0**-0Hf4(20r787bc#)d$ zCK6~vw`K>ql~q0t4RnsL;u5r_@La(AbbEh+?0I(!fzhTa%Cha|s4r{>MF~E1sdpEB ziNcGz?_SWE8X%*NatzKBN`0vq^5$(TpX#(|17^|(RaU&ro1gyV5hwX2>=)exG^5$# z14ymPiY!V9E=w=Z9$G?NxPRH)8P>nmEx2+I;=mHVkwy=nJp2OEBDib`t05k0Zj2UG zqR7TEJ03g-8<|<;q8hn3{jqKn{DVDNAx|6L z2oKouZZ}c<3G83b5kx`8KXhgx0#;JsRr}?tR2mYW*~Pxm6$)Q9UF4f09Qnbo8$Xn& zo#@-nZV1h5HNGdR4eahK+^n`i`Un*n(Pw5AjST1a7`?+*v#YD_2dsyprbg2*Zm0!zFgWNoi=Gig@zm_~#|H6VnwyNio?g}@3~ zl@dKiXQHEglVBtRy~-;8UR_|QOV*hV*g|)b%eX)eBEeUPMg*4`GuX9!O-)`)y{xJ+Z-Ho^Gm}NjUZil`UR4J4fC)Jzy*dtpX|?k+*NM%a@~)_9H*{`N^w+> z+NH%^0_a!xQa07{ZyQHT& zpptHD!5EYfWV~a3A0$ZU*gPj))^6WiFYhKXNiOCJ*Ldb-cU1fkxdZ-FfU-v_Rx>l> z(KU>YC0%=;mZO0g`;L71u9q<{Je->Z7am#QGX~AVHD8)308OK>po`xYjpXPA&pI zM5Ar0zwF|tk7usLUXB#g)8u%sKiH0R9*pd6I!a808-pn`@3;HD@2pwiTU@#>3NFQ% zX=%f)A8iVb%RO_#yHmS9rp{YWLj|e6mmInfNHcF`NZ2uMS!nAjpmU8yMA979g!Pd!o@2&^pQgj%#s?p?jY?a7g~;^ImUMDKqlW?srnjRWwFAdUPBgRKfF z01ICBb5@rfa6M;f8BLVZ*I460TSI;@^hn*;*=D^G4%0X5bw@tL{H93{lLuV}P=vO~ zJ}7aAnX|H7gi`%*i+@XFeM;u+j%qfT2?Sjt(3??m;=)Uy3;UzjlaP$m!L|f^bF2k^ zcD`Pw?ze$dH+Z`ts?CANik|Qpi z7FoC55v@JB6v&)ohqiXHfG<^tQ%wzkXuYk5J6}3`WM&I|UphOKPHwt*7)C&2U&H#) zFxz1=Avb~H|KPq(^S?EiIMy9e-SxEYC4)qprfHwof*|BW6G_aW(D(~RSQ*X|(}H15 zFs=b*QL$gZ$po5g(+?IX#c+y0haM3O)}7sE`Ns}?xILeMqY4Mv&(XcquAlH_+G~z} za&JjzhZPKLR7OdY7BY^)yC$rx;Gc4iw|XD}rcOwBx=AIvVhaXhFOJml@A2XD3|lK~ z4$3`XpCymRH1$FRhh%3KWK-hqQr(lc_yxVTawAvZ9rg3?oe40}FeV?j321~)yPSU` z{_bsiBR=iqdm;?VMx5naFpnq%l>e%qh^i zvqok^{nUL^v-SmQ4NrXT>t>rJb*$IHa$Mwgr+l?^dV&j$-1(YIW8Al$3uzd6Q7%;W z0>hHxwMZfoPLuV>I|2m}PSm5=g|QeHVtuLOR6Q$>0<%oXuc8t(4bz*5LEJYq)xP`Lp-^O2SDxoixU0|g z%7rXbl*Vaqao?{-!TNa-D19%-9WfN}5X|d$))fB_!fYleFp5e{S-%g{LWAO7+6mP) z28Nx96r##IZ#ttcx-*qhC~;jWQ!#C}QDzcL>I+nFkPNGGgIh4Q0ZMVgqFIO&3XQdF z!1B81p8Z146KQ9KBbs39_yk9pkPn9?zmvZxy$lkWwyPbuxmam#YW08>vd{=8*Y;?H zsU3}on#Mnc0DTKPZ#sZ7e3PVXWk7%=JM}v{4!)J$s z{&}Ga)QkVslOdu2|Dq&zJs3YF3u(b&>}ylsrq3{U3B&v9{%Gp@`n}!ztgb;%>r;7A z-ibV+#Tn1ZbJ9F%;?sB(GfT@;LL(D>%jX53aoef?>JmM03upH`*94g{YmP-@!!Ef^ zb{;?@qGV!csytNoUQ4L2I{cn+btj23c>I-dGHbzkip`b91Url7vb)0E@h$DI5qsjv z=E{6*C56|vkgfMC;xE!1?60E})_D@~3F-Z0x!Lsr717a>iNg(LvVOZfHj-|BU~QA( ztcgZu{9KCG*~kWoa6hB($hNXgY-BN6QujH zQY&e&Q+0PGXPv3N?XA3dk535f9u5kSFm?^jG{*)$4X62AA9sf6oXTPBM^ONHvu( zG>IuHZqY^ycKSKmKz1{&%}Spt*4!40v9ZZdFH1Hl1asBgmMSA*6$!1FI7W-Ar0fpA zZ%I*d771w>+&6)=VMM$n&W|D3qnn}~?9t7%yHXk1-*t!NGK5O`>i}v81#OapZ*p3@ zmpP);6BS>IW3@(gmd!W3pTCBWR%%*{AfS-jOCw6LEB_?GJ@j^+l+l1o-l(ejv(SCp z;H>;NgKb3qiD&C~3|oy=wDixg4N5Dh|C4I;Z?=$26jM|Mj-94La)YnG*uS1sx!Bm7 z#e{3)Xv3XMi_OyL=m%7Ua88+IW2*fIK-#8%aC?wR7DK&Z0qOv$6Ge;+>AnW*_(H7m zr`&{L_+G@JQT`v0HFsHAt>IY_hQQazNAxRMX?ZfHu|dS|dd{eg1q>c!R(PLC`+xxp z+Hn5`!~D0ZLH~9Z;Q#!--1gpFdhC0(Tl4%&au9wac9A%GUZWVBPzIO1hcQ8iLMP;e znajU6T{$f6w|T}QNqj^`yt#e#lZhM!<~Ui^r6j%8=Y$xgMRaZnXQCmufpN2DZ{BJl zf%PlaO<{Aa5IkJ$A@|M6!Cpx&&?3lp0+jUt@)gRxnclq``VaqUZ~Q;G&%e6W*MB$= z#3q0MsQ;@b+O-Pxmc^MaX*#NG$)Rq+rkDr<=hU4$d2!qGv@maIlB8@YJ=v%+9>G6~Dr29wwZmFCiSpnLxm6R!tb>4&k4W`=^eVcQAj`tiL;Ey!z zpi?ZS9Qk{AE)EYz$p&EV@S&G@^IG2h&!P)GqB4H{!2Mk{O(FX3Mss#8)JDRfmB@1j z^W{_CJz_^Mf6IKVlW=ub`yDn0CPjNzqs}I3q7y>Gm+}C!=g-Tpr`}wX5e{goYO(-h zCp}B^5)jc+Z0i(u(eQQ+7odB()-Gf4mU6WY9=%Xo5V?eLPk=pk&B5)^w9D5+AiHdP zsi}XDna2F#4c1maG3+Rk-=OyoPqvhoItO>@`gocgVry^g-OY!}+zWG$1M6$^0eGc+ zD{+wRo_TR%{U$6a;sF2nEz-P`>S~=W^2T3cARnGpT6#aKT|$aCpJ03)s4e_jM?eGp>^b{k6aZsTHrabZ73)({CO_3sl%k`(kG#m zll*&XKz)MUR? zsgFXFbG#J1-*Y;Ps4nZyP6T>=Bt6NzDZO&u57R(B`><{~!4;A|=NEX}5uTQfAO$(p zMJD;v3=rN1H)JgO?!?X=H3hIBdd^S%U_pA|=h|-J&f77W=*#_X_#Bh>QUle8JL$>8 zXL12_K}>=A2mNt`d9Ti_+`WiHuQ+PJXvB$(fnqrSp%yo5_bfeGkIQ7OS57iB%Ki86 z-#xuME+v5#O@8>t0{-5t-I(a;+by7BZUF->8X=s{_X3N?nrLJLpVN^lcfkur<_>tD zgEKxSajo*9+(W6uBl_J18@a`X=B=meWD18jL2x?=Mkn~hZ#x8(m@bv0kKW1l7DwMy zNqd_;DTXOXKw$qMY~Za=bTA&=O;490=)JzNoD-6Rp7VSG-27*7DbM{;s*|iClCu_S zGRX8j{(_KYddGy^c>x^~)i;FCz^#eq9gpCSRSuy|`f9NtklJWyz;iS4jKx$6AKp(Q zp7j0I#y?+|p)!)y*qvz>s&%UlD$cN84awYw)!i9ndI^vHJ?>l?GOz1$V0M2#F?h8b z-9$*+9@xlwi9lPBo;0;-zy8%OHhKLEm6;*CFhKPG(Ds%=aeZIDK7;_lgS#ZSyL)hV zhv4q+?j9V1ySux)yVJP4LnF7pzqxa3rmp;@?!4~mKHcY3@4f2T&sv{~>2u5+^SrF4 zYQMfcfhXlddg;zx*594BOt!Le!p__j6VwC9R#aUYqi;_jtS7TGh&*&AsVko`h?cyd z@8iMBy`|MNb14H5U1LGuN2~J*h2R=w-dn^TU@u{y-h{5v`%N|U9sjJzi{`bElnxf~ z)TQ%x9Chxapz_4rs_LcU+nWn2EU~qwe7qFO_Ftww)L`;b85sLYRD{LKAY|m)ycN~+Z@I*C?ge8dkTd&)0ShoTwV7+2Xe+YpAk+E$4UFR+r2gRzb zt-J5CK!Bj9ymSGrhO`C8HqE|kdUZ!?rYF4d4YZa9&m5jMwg0;4C?9kXmIMLTl_@SU z1k_M|j-#C}Bfkg-17F7Aq1@6RdtE#FhDUxvK?r!H`{L^CW!L_m+eL^e~T9;|_2s|yx0pcIE2Vj?hvDaUjEV9cA6Bov}p zlH$ej{p#=sbL!0oJQ=^uW4#(g5qGD5!;s26)d~EV2yu9V34{Baks{%BLR#j;{YUT@ zn`j)hz_rEPUrlQMC`poQfilfXuLW_eu29&cMPN^VtKE+i{r5jT!V~Gh=ZaTrRKX>6 zU-=EJ+O)$-jdNCk*L0jKw=W^7;#y7NGPcz>eD~=L-?t{UWSw}!wjB@(rgJ(=!i}O> zl&*^qfR{hd)5xS#FnCiX#}?1WjrT^7{7|A-0q<~FdHIYXXoma-d#UM5r7uu2KU|E~ zRIKd<%OP?cQ>s2}=j2g~)^khBzN{;b5aa@kFDKqI4*_2NC76l+2} zL5>|ni_SvE?2iEfH6d??M#4J9y^yM>hjuSt9fXTJyhGpJp6iu#8m|qo^e+`|zn{7^ zf;OUl2=p*Kd=u*xl`%#~+`U0%!jWg(TA$#u;TDTA^OL;SS#~g?O<)gnG4jYE9Qr;} zPF-V8TTM7a3C)V>Pp8B9)k3jtGprWEZY})IBW6*St1J_|gj;F(Ey4!j2#Gf1kLpBO zGp$)O36r~`0no+c!-yqA7D4{l!!sweDd485$}eUwln8fSyPb07DBwHuXe`T}osH?M zS}@u>dW$vK=+o~H^w7=qG)pGz*_ca{#VAIe$81Jnk-_gNECCaPkp5Sl^Mm^xntu6G zOd9lsEV=-BmBjBo-WbbiVCEgiz$+v~Z9zoXn|8vKqyYDM4R>*|PWI>!uemSsv~e3OxfjC`uz8{x=&5h zk+2@0)D(`t&WDW_Y;29ebm}kec((4EuwM87I@#B8DQyDKGzSK_bE36^T45oFp3I}2 zn!YUC)M4)L^$6TLDus1#RFv$MJ)jc+x#oPi!<|Fv^g62fN0I&>AJ-|~KjMkLv8C^l zJpl@5g`nb(XslW{g8qiRL*@_`6b$I0D5khR5;%clXi|2Vx~cSyECw zj^90RknQ#?-@d)xY#L2#4i{dIABx61RHbgnsNT!o6hw!=n-2|rk~m%Km=>my2^JDR z7}ueP$oQ>3As?&e(M6`8mlf60o#U;BU zd6L~!=kwx_T)jTl#hX6Tq9w@xcRFLR-CSL)%$8v6mJO+qWME<@Zp(*rT*J*I8 z=1}ISahgTzRS5R!X^SN7k`$$LVlx+FKDQZnQn^3H+-~2Jw6YZg21bg z&zoP{$jEIz?rC#7TrrEkLF+UzIfNrQJ`kATcCIFbm5?^tG8>EhbS8oJ;kkBz*c-#{ z4BvvgYzleq67YI0Zfp0FlIusWb#Fq2^9tE>qpe4T>-H}*$NaH3B|=>xV`tXIq=R#y z5w8+~GDiz*sD1x%oM(acmw>NZ{#LT9>Eqa7;JBGR=z? zw?p#yQU?!_wnz;1XmmDR5HI=tI;TebK!woFqY{=gSTKW74yuDfNB&n@;lj^9;FX#Q z&fck`{V+)EFU%Kvk2vLpN*NF8^DJ)`c&(sTSo$ZzOzR?@nyKcBGujVatcQF^X-@~d zazwM#5IPgbEh7V4zuUcMmh{=_eR(fQQhQl~^dquYcQfjeuAIe2xr)!Jm1CcKX0SGV zPjo-g8{DnK++2FdS)K*?u<(W?tyLECqycVjB_;D6zecD+Ih;tqdT%^5ngv*)lv>;G zGcZJv!4>kjsEo-9pRuoWaMS<_s|4f#roSbd$9OcHeutp<@N`xezyI6_s)bmY=CIF# z?v~>%u1QD|MUXWhXD%bdfXEqTXvBPFd@b$19uazz* z6C&pDu00n2yG0&ogG+lXre5X)zB4-Im;fx8EaBwaNSKJZ3;AX!M0c}wq~GdgV)xgS z4>|$F9t{lf6j-n~LJ5r44TjB{9H17boTPODQmlrQJ;;i!9Wdorq#L#JR#=@0Vb{>z zGFU6$rQYclqv7n>ZM#5=n8GLieic3kKZs(#b%WOC&`*^H+lbD+bKvlNa}q?(&54XM z-)0-IgA10DnmQdnV541*1r`wxux7ViY*L}7$GwYCGCLbBxqb7cNM@op4_m^=nRi?a z3MUlMr7hx!#a{hQEJe6(sZ&BU=S16!?m$qc5g)Tw=BhrDwM7=fX~+&PNB8=%)ZB$8 z8|oufs=7{I8Pe%$Oz9FWigrs0Cz+mlM&TGZ-bH(4tOCAji=p6LOGw|9cuVq{lUX4BO){=e)}Iw<<&LI=sIKoqHz2#wM{UtL|7b}ii~eH6z1~bHELI3G zWEUUSmGYr5QBft^D>^W+oI9+n1#W&hw_!k#{=wH1t!_U8-1*Bm<|MRWwx0f~_w$D8 zSoJDYE!sRAvt}LgUfe{$DG>6gVqbc+2vkW7oIg~w3Vs0>b4=rF2Wv-}j&d&7g8w>0 zgw#cuGb2ZE3`n%whMytDaYV|QhUWuHu`Vutd?wP~`z)AC1NPr9qphDWh=pzif`R zX>h9Ju+W@|-KKTAanmWI%xAad9h!S`r@s+%h5UeXy9>u9y~5k?ZiA03isv|DC1ZcF zV*nF^O%O|!pVH8H`Gq*hzd%11iy8lJ0glL?O5htYaCVBOcI+`LA)&zdV!JQDqW6n? zvo-x3o4WQCv}+_twEn~}Q{S5U;VFpN3dGgUS%+r0WrS$C*q=_9>5U5k7aeZ&#is8v z78L8{aq|0Q4?VlLC5RoKe{Fxqw0DO+8>&wZsA?~HDThd@meGioCAD1rqKGR;ebE$` zo~FZJ2adn2Yt13+X&TGoT&fG;p^Jq?Ah?XVn8#xFW1zr`zO4lG=LR zw#{2|>hLqKhqe1npjf=~yc9EDsTaj&*pgU)xKq&8^>$kkvv%Z&#aIZ|ewiM;c$^u$ z=%Ec#{i0aijUM2>Z)~VK`0xg7P$eyNIAA>4zTYSgA7IH}9CBurO*EoVmkD9gxooT1T^5Luc7%m>n~e7ptT)8|_aig>1oJ(l#$2}Y_z2hq> zPuz{APx#7Fb};;9g|j5yw}=1>2INmY{?~V|qy~Qzaj&1jj0tn%@SowoADuB(4pjLkdAVYUmtHSB zzh@3W6&LW;rB32bR*X{@j9BzLfT0D350FDK9InEnKse(;*!2SKb;L~*lVyQ8^UHm* zv$6yO9wXCG#TV*w%b(Q-3p48^2m;0GWJgy-T(;^45B;qSI;u39FNQ989tzM=VBE@-9;>;LcK`W!`q#`h zocyd4|NfI9`;9-@ZoectP{nTzeLnF0gnyqh%Y&zBvM4!@{T~}`&h)?1M*B6D&Ht(N zulZfLa0m$2F0CCP0Q+yFyZ)~#aA3cDPQSiYWC8{oLQQLnZ@Q7tzDSy3g=qDMpW}=5 z5eH%=u0S*n;-7LnRq1U}<5HBXh^t>?TZ#s;wu@M=Vr;qFk{2E%WxVOi_q8EA`=a#E zlQWbQJK&QqOKvnKcHS}FEhX-fM3fR*8ULC#S>jh2N_K6oas2#ICYLFq2JaLZVR#)@ zjGev_NIfME=x=~7W}Wo)y~G+n#AgmS*WLvoGQqjn3xksl5v$S}`4GC{S53w3BrR@) zqwk%vh9IoW>?r7o(fX>wt1)$ddiI4Wc&X~38F30Ia)aZ&M z-iJ9^A$;eJq;OP6>V^EOMqiCGw2OOG13yuVIQ%rd=bF(Fdx82NHc7M9vwn4vTTzFz z>>iwc754KoPC*(gMHq?yb1Kp&FOl}jMt^!)tb|V^a!YW~P7Ljdhb4?Wn?P1N#bmBj zrS2d}gXX;fhgV($E<*I63ZdgJJo}8nfF_^7qWz4hH%Emc=ppLon*;d#>~tH)i#KQC z@EGO08!fd5g6V0%r-AYnSn!h68a#d!ZV_ry48HC(8p-MiYS2>X{0xsc@z2XFcssMM zs5v*1d&Rjm8DRZZMitj-7&e=}dg4x=G;A^ZKe%Lx7w8B&`67=ijA-%HoI`17E2N$7dRvY!-p=wy%6j5)WiNwlhCIHlw9_>kuMLa1yFYqz=)>`@s z6qp$WJt7(psa#JiOaSqOk zZr;Y2*d}}fTPooso?&+V>&=ksbyt7Ayx5^+;lf?nZ)%|%d|*5h$4Fj5=vk(N0mSozL(8o!dGS> z&`+BT{w5#e%}-sc&p_9rhN#+lHJ_o-;fqB;=Q@2yRqt}5vhO-7@G(HDmd%`OiZ7g4 zcjHf=_tf4hyS+%!&{T{>;S5Vjo-zhqzDe`|c$fOPMnDBfA71-dPs3jReFe>BY&&UC zIw;rXMBe^;bpIf>eD(1amLOVJ{8v9Tuu5dYqZsdEiR7?KeWXNcJ(JS#WM7&MZ>uLS zTEZ$5>j3DDSv?K-br4I%xR-JbocP*Dmw8yKnB2vXju0xP@%oAp|G^OPI-?}Ds#Tww zq$PUcJHY5-faU4vy3;YlT=l-Mg5iBC+-=A)4$u&vvwj$F=(il5P#1g|aMU)(VmMRJqpTE;4 zW`|x6AF+M=3I_UGm`pc5kg}IX{l$%yKAN=ioZ9!uN$li|%uhl~Mg#iCGjAAcnCmOf zUs?>Y96<@!V5}}*s`*p=PF?C84X@eLZFI)-Di;LAq+M0k_SHE%O+PC2Tol|3JUEkGQ)SQ_ug6$?gE3u+jGikDVy3X9iVXqx1&Eou z2VQI>bu`;xHU=?Ue8j&BfX$G`lxi?z0~gex(rtDXF&2`~jcjQtK*>W!nt=jfc3NMQ zvI#}qz8yo4Q&_rPwUw{jNpNoP`*$GSrl#p_7g_S@M|*pY&&Y>iznr6k#@}|D(ij5E zw_WO7eYatU9{#WgKIYu(El^j>Yov`fC2(up^v;t+Rad8#A_uw|@9F8{c4N5lr?gO0 zzW2fW*x`9q&&YP3MomLjRVxTHcn2g7B@K zeY8&N!b@YPEM~ zVb0>3`3z#Bq7Ol0C{&!2*!+PcpqJLXt&y!fm z8(ah0W`9f*)JX#RbV%8j zl8TDp6$Sy)w=9lsBtftZ6=l^dNaeiC`+2=B)ddPDUi5!xr=|USHR2Ebpy0*jkMgl` z`_|s?xMHzATDpR*v=OQ6UkIhjD+*43+KG|$%SnO%y9>Z;V^c#!5+Nw})%_0!cGz3+ z7ghWtK*kKo16P$Mg8a8nk>(|?WAuWw68wi2{wZLahR&$#GU~?X<$yEtF^Z^q4q4%P z8}b%7ha<|->KTVBw}SE&LS?4;55o;p&X*^QrCCXL?rQi-sV~zppteof>FlVGyL}CT zUc2<}zodS@nza!8rbd-$*_%!p53D?m*({OHN}i$&&Uz=;aS)xcVruZHh zIkR%X#_{&e#$4!MM4rIA7ZBFgzPq`Nm=)2kiyF^7WyRb@Hct3hN0e~^S$9-NO-7vD!p zCLc|a_5#il+9MFAr%l$w>&+TvEu51^y86>V$&!Y9>rJ&P;hAH%DLNBA-k5hGYPYjO z_QKAsFc-?4#?VxpxERAhFH%@rqQ;t_Ll4;2(eIq50Emu4F*(?2Xt+*`<)=O674=;*a-rab-EHUKS zVK4t8qo?BwGLsvN?9K&U@UQFRtD0r8D!NmK==kqZaLXE*6c11%lS-cIEbeUUW$1aL z-FbJ4Qwix^8B~Q3$Yw`!cOlzJSn(+>tlyDsV@mA5!ym|oeEpZ$`@wOs;{dp5WpXze`sizi7H*8U_Ur&&8 z9LOqtTrB@Fs>qZ*Vbt$>vEqLo>f#2GCO2s^?h30QM|*^%5uFak{kv+5>d;huh2c9` z&>-X8L9KAe^VldhKQv6Z{wX3ZTROs7*TsG(PsX*9=PH9MOTI2;A?@(tN$7I?%er*3 zPu1OZ1y9pey#cq?L651H245BQBH@JLy)E_d`hVDxV%e;l1kiwp6e99-6VS*t;OF3n8qC#bG0*f&MVU=++;`r<-3w^uOg0MlQXy$ft!W)^g zE&-MYkPutAn&axvI$=4e)%?YfN}o3jmn(_2l|F7pq}b!Eyu+|_iy5XzXD}qAF)>p` z!s7;s1`l5>mI3k?+JcP&iH!Ett!cizY|`z}RdeI0^E2lH_NT;g;uXD+W8fTj%<-A~ z-X(^F8*~XDXvuO9yK~jezISER$5#$Wg)N^W0quH8r8fibF&9>yL0dvzR(CWKCe0Fly2G!_!6!$Vl zkntJUGod1dg=@xznJwkM{n{0wY4CT;6jNs*3tS%N>`RV9lMS*K;u6Z^^Vx*VqpK;^ z47flj95}8?^|bpW?gID5fQ&yMUk6KJ#7BO!Z9!6?KMQIjbY%uQePxdHozFc}udRPO z><J>ow*5Dud-J2PcqC>VAC0*l?>%X<1(!$LR=>B(v7R`e?N!+KBrz z0U8yPs6~a~(WhmpQZ3fybL3JqXNX@o1NL<%^Za(csv^nCP>%L{oUkH2AU4RGPV9r8z$hLzB&hxoa?F%#wb1Fv{^r1i&_CclCBusys|$UTR9FOd9s< z;};$=43T}(`~r7Un62;x4SA%M0#Mq1-)5m=pE8*(Zv37+Ffhvq?jkSs@RagbVC`nF z1Xp53Z|H*z<=%m9LPt^~kUafk6XTF}U4)hjMvho1cNS{@W>k5A_f)Q&pWijdaF?$wq%`7=DxXEjkZcmJ@INj#S{<{N02ke1<4Wr6 zS&e8;ofyk7 zp{D%4L66_Yc)qxdeF?Nsqs|&W#$GR3@~*FOxtr?K^!WwB8d5vH%rI{Y4YN&Wda{?l zDqqR;h6JttEPqy9O=>(ouW{r2o*-OZS#Vo`2!aBw`FxvW?@FV%hgJzJcA&e5%A}#c zNW>B~@|&08%`JZHD#Oescw{)`dMdAg8jvxSSnG7l!iKj*H?6sr4`)_fR?3%~3!#rR zxsTLM2%1ieqn;A~HUX7U67&>7QjzAKo*0z2FJmPyG2>mI>7xLbyCXa(Y+%RD>Cp05 z6TffQ`)F<(7*tkSpO#n;U3{6|$dZS9DuSspT-NzyXNH#l8yM&s!$??ZhMacAmP&qn z3*V|Q{0Lj^OMDeSumRAu2q-tWNFdg*$1*7L9yx@XpR=<-x+9x1S!c9O3vd7Y-Phg| zyherSs{?8+c=#x-&|vMoN~EQQL9q78_YGOv$MuuFsJ*4uWkiQ$rHOBO=GY~`ohsR< z03p)K9UfKL7YTkT5Xtb_y+u@*(q8?I&3Ej~2On!_y(MwC~F`_#THZWSAT(Q)^F`W+_wD^N4_Z_6K7hkUoA&CGM!>Vk6;3jH{eD6H`k zdqatq{MNhKPC}jFl8psLr6Ja$>fp@0Oq$}inrwtAWu+}Wto%6{A=?BNuv|vzk@r6( z(YE#a%Y0PU#>%eWi0kA~`a@*&`t}rL(h=GR81FKXB%a;iv?3%+c~m7{o_Z#se(n+e z)Zb3v;Up>YE@t6$U1Cpa52?@Vc%j=F5nVJRji{w=Mt=_WanXX8cp_)Cx~D+-Sxw{1pH*6P zYv(?H_4{7442j(h$0-ulWQehaHaz%BVJh|dgsP-OP>h?@>x0Dng45=e1qN=V4gL&)x-}W<|(B+OXlAg%!SiGv^pp5HA($>f+&x zA$#x?P{f^t;wJm`O5&|jtuYCfq&Aqz&W>*OB8-R=6VFD_30D>-?}Rk_)zigx+2}w@ z1f7!z#x_DqIio*}GXv@~Btg&$4`QvReDn`78awfX5M6ly*NL&}RwAlM>ROz5 zOD8c0n|56EH``*Ixg~4X@XWX87?6%EJ$WC+D;5+A1OD6Q|TE7P5>#wuKo=7zrBK;I^^uUqT8gVzl zYXzKwj+kC9-MEzcYi%P(b>$~6-{Dp|$PkpITt8;D4?B!Sx~KNFQykeGnCC}VuL8b9 z?q!;Igq(7eU8py$)x6b)LD*WE*gNbMH-s3Qb0sWk%*r#0J``ScFYnZ7tb8N~$G<0x zu5$#gK!FQZQ|YZeTSdfp-c350>MFY;#B{5#_$>52%0+5)*dvr~;^Ldyb0yqXnAym; zr!%IbIfde%9YLz|112vg7WXHkkn(C@ z4I+P-jMKs6aHl-bYO#+S0Omf_n;EO=?7sw6IvFdh z)65;VplhiGkb4i7XWe`LKP~)C=_WI!Q~x+8GZ-;u-!>=q|ZDOUjyk`bQQe zb2GW>{EJ$4=f%lCiaL8oAorS#x}4ILgXlU_Z(|TptR@_El%;>iL#>N<9v&8Ueebao z>!W7-&a&YCac>Yn{_`L0Gr`ua%Szsh&g)Zb+DaE2x9d^;uKlIkFw2?P)BUFh;HtIz zpWC3;b`)q5VX*%HEOmNqseO=pu(54i`gYxSa80B`2dXJAe17<%XC?J}xc~h5ljq5_ z;tuaE8;Tpap3PH@c(>L4B>grp&wzXC@)9%eS!23Yg{rm@i2j1g3&ZO@4P|L(s$EaI zspVqwLD_7&QNfFqtc;%s_zu+hl$oFWByk1nP@>eFGsPohI6pL@){uw^0&GF{}7^ zZs8>-1#)`xgzHo>%PgO2GeTys{1_0octlaeWrL;^RjktmIjb2|Mf>s~{K>{vwHgCQ zY|c3@VcD`tdowXI^^=%4Fp)dywK>Zcmf~48R4aZ|GhXnV7U9v=^C$CRfdMSI^`{L@ z%9Rdg@RcWEZ1N;hgJH&Pkk{-#7^HBgiS%a9U{0#s&?#b-pg~6e+2g!Wqq*ELq=@8S z8QYCJ+}4EIV3U00BpMLE?Q!E@$HHb{Ab`va1V+42_W9rJ(6IT|g+u`ZEQ{(sU%uh! zx#jSCzc(D=Kp_@p!?raBLG)()Sr@&`^Eyu*P2FtJ*teL+Oq6FXrFO01^aL9sqBF8A z7F^fu>vhc8U{3_!Os7-3l={Jm$D#6TG#lc4V3|i)847Mlq*-=+wXwyF#C{H*EH+#E z6v9Vwc2xRGmtuRV#GbW82cOEw6P-e#w19WTi5sN7uNuMVtC`Zt)X>Vj<|ng`%vi#|6zSmwEo?!?FUXI?VC< zkJ#5STrQ_8--bj~=~+V!60oUalye}8DC$4YVHgc#RJYv_#9~^uyF@KpFZSuQLl&(~ zZr>;eLI?_KRFvG%m6<6|z(LK*>mrlb_`l$tSQns`fIp~NZH_&3yT_l@q!I}r#K*0Q z0zVr>@dd*CUxh$!bef=5PqfpiEO4wL*HZqZWZp?2*ReReC|fQ-kei8j2FWlCLChJF z4MBR@V^WT@r1cwCo58n*0;hIlL+F}pm)!+0e%35p39&}z?Yv8Aojd9_)(n2L8SRve ze|I(GSt%JyrrNXl5sH-{(Au^Lj(76)Ix4BRdu{>)hmaSu*t3lRew>crN|!1`BmVIG zkniQcB0gX@*!=cA=|3a-ZN$WSjhOQAH)TH7(<$D{z6_ho_sb0M#g8RQl%@vKp>Z|E z!quMS@G=0#jkZUFsUT68#cSb#j1s+rYAa}uO_XLx@vM_EK~ z!@^^0YEqz8#a4^o08z-81#~ha`E*Ft5c6JZ;~f zLRz$=MIF%pv~#8GuiDmmN&lG~Rg-vyUsDRJ7P`sv@oud8@9riY8vZFa{X^ukeT7RN zUxuV4hh2x?3!KZInH~A@uxkRZC4NfV&R<-Ku~q{Ic8yu?!34HLDBqGsDtxbH4fCRi zVkZ!x{JqE{$9xgGX~fT*89)VjcRWagy?Pz~JG2VaGHpv&uoqOB z_+d_cmO^j((pGuCULPLSj((s2GNI9dOWX5?ST=+ke>w_T5pH4N=jS!wgwV8 z<&M1m!Ilx-vA+WrdtLiK-F{qi*y(h4NT1l@4_nY@?xw{h__rp+yuVBEJeGrIQ%o3b z91n*9A4p(7c`qD7^@jh!meFVRUtG8az1<4fTi(Xyds1jUn9&j5zic_o=6;0RPNz?g z**Z;}bRFj6Cc03h*mq~jejr0SzFOSY;qset|L+w}2z-g*j~y0Ko_5$s8-wF`85%+h z_yz~wlwjkN{It7T{6+x$tpmC44Xw~!Q~}Luuc;?rzS9v|o#>EZn~48+NTFEax!-_^ z-QnnM7x@;tXJltJp}*m4aleg+eURn*PwaVh9bF#TkXq>=F@2{jGn^ruiqcdAaU)*! zFw*%M>K&ZXizj~SR&iYv243#U0haV3gi>8y zS9ozcySzK4D5$8ga{1Hqt;esK&cd)m#Afng4 z%kLr3+3VA;#h?cdd}@nRPp%`Qsm3>%dWW5>U+|ZrMpZd#(8m8e^F1~n>FcaYjkQ&R7F8*U-z8Rx} z4CDBb-8zf1a%v6!I((`5S?!`}x}K^5X2Z=>3m6!Xo)xwDn?yJ@z3ytm3pH8CCjduh(`B)>xOQr3MVB>lRiVwkV1Kf3BecD|WsHP#| zPulI~-{*QyAKtrZ==J$z)z0c`yQu&*q|;e^oc0`8rYPqtjcPXeEey>|wp;I;n6P85 z=Ip(ePXgnlU73sM9?sH-4{CFI`!{R7aSTmthjTrbFTC4-MQ?^|7J44(k7^PA11n=N z8LU;or^baeSwXV0eJgs@?BS~hCU;Q6>+9_}v!F%xwEjTim-9AT4Ap#B4AnDxgavh` zrTw@?dI38}tjBl6aum06-;~|r!>}kPu{|i5JYn46U zV@}0YUg1Q*kvdhhWAtzt`s>r-e$$`#a>o63p8(2=?$Mo{*;WquPFs)3uhlW{^(*(< zfsz52=@$(HkeN}bSFE!Sd4aIy_$i1X(Dfr)3HGfn*1F6AKcp&;|9P2(MXNnFGf`}b zm@0$XWUYU1#b-@RHT~TWls6CwXk#Ib$W-YMtvUZbFtyRUGyx^J?*=|USHJ3`4J|T= zwTWw347Z@NTJygBzQ*;@%C7OQZC9W3yArp{=lU*ZkISOd0g*^VkTo^Tw}$ZDCsT4w zfVm8?QPpq15V5%G6z;MS^6-3sKX-GVlo>jdJHDzPWqPmWWc*`8aVG#fe^w-C5q4~GY94rVeIgM zn0KdI>*L9Y4$!O##bOvFN#JsTRTBN{OE4L7#B#c^jS#oF35pHwv)UI2R8<|7Z(+!qXVSxDgfxj?Zv1tj46dgm%4; z`|6^gp(8%w6HZc{J?~P^!0q)b)Pr)>-o9pddvcxYZs#bYbC zml!UW{K5~V^~VwZHVXFe1UD>(37&R?*e9N;kJ}AJFIUs}hN~ThWQ_mL5Nma4GTXyw z(Q(FR4%Bs}VE@Gsz0f_`y2g`vjg@__gwJw7hvNN0Lc>fq&Gtd8QxM}k1XCNLmF;QX zRE!gEM$Cp+Eiq0yaIUlm*&Y2MI)!+zmn5dn@;1>VWwu}Xhog+;tN%fX^==Th-OZtO zV;0&+=r&!%5iMe(jQSg&wpVY4yByOzAEg3WVW{)!n)@Xc+Q#NoLydH}Vz*_<9p=tf z-YJcFI5>;AvXeuc(D_9{+|U~f+eLl}%me6T(G20i?3c(W*yrs*52MZE63Jt;>UFjy z8Bxv{hUgcnO<1F|IXO{{6i>r+H9ePgG2p|Oz!x}^L+__FSHRJG*VWyJI-zlz&(Yu< zOt*JNZ>;F-FnLWJ1*M9cTX}YIR#ji-AiCNBG;QX`k|@^{*2?PXB@^+0p2iJ|L*EFu#Mb>{lEx|Lv2aUF)hxdYGA+%CbGm`ql`A)2?s9d&W^fWt|k7h`X&N?Mz;+8rTmrNwo zF)luTsvL!cU7Y9JmlMd!n5v>}He6z)iw%y|q^%@0?{AtdbbZv1ubO7vIHKZ0#cA6S z*p;~+7f{-agyJ9$pb5~(uz)O*pVSIJkNzI>qU6v_-d&8+r%8iZ>a;AImeNBSCswq| ztlK0}X3oHQ9&?!f$_={ngf$jnyD5h{HOGIj`uecA*Pu*eRYC|tcezUe7_}%yD@5QC2$~?ll$JDhjOU`lZ~HypiZs=H(*EV zHKXil=F=K_>{b=g`Sy)b9T8Tl)Z#4PW-s7OiEM3z=&A$zroA`Eca&syB(-Uda>R=J%j({R4>Fr9BWS^gLu zt^!B~rt2%qYq4wx1Hf{k4*Z{g>t|I@o}DqvD0e&z_%kO}>O0xb43Hi^*MdEE!K$sf zB*tTv=tmEazdEL?Ot!=JLL73M5}wCgEI$)Y9hggX6<1)&fNyaltYIi}uT zFV;0-E(AnQqoP~dLZ)K)S<2kzv0@zR&9S|)TpV@=%--!|CQ350vR{ZkOZ>aot}d`@ zqE5)=8Y;UOw0l7Z~m4YE?qO)|nMHl{l zt_-{|^e^iuH$E_4TY!;1$t8VZJ8geAQ`+8`O;m*eH`-{|TB+9vc>bQtd|21LG`A9! z#T)$lJb!0JqV01~NJ8)uZE1vP$6n8yRL1-l7?EvRE+{i`UpOik^@TgsyEe259Ku`) zf&OE`UF-2+P&XewTAgAsGlOR<=(9aijF{S@zmXxAnUUgF{;8ew2iTt_;L4{W>C`XW zi%p*emf4p%NmcRI=c(wQn2hMg*ANh}naW)YT;yND;!!_sPfP5;mifub3Jax-d`c*R}B5&&L z3hBkbD7w$jx}q&~<=M&NCZ#^voBBO)QGkkgW=&Fs@;hPPS!b<#^8xgGL)LM=acI15 zB2g|Xez;(6LPWM3Q-S^N`k`Q-DDF!t_YdeoE!<=8>V)m0Hyo_J_!vkIHqRpjfT`V^ z5jO$$`7DJ6AARBs4F2h`z>gMJANBpSl@v@7Sd~z@IUp|K6i?U4K~wK5Ndz0@T)vMN zwgGT1f4%e>(at;QxY%)-yt}5xf7jL*;xN@joWuRRmCGW_J*I3D`3yhK>8Lui$3pu=eYjKdi_T>wQ zl$ZZCDvewjgoHu$?0RT^4@)$lhfcYE-m6i9%Irj^CI9Hm9kgsHEe(MP9b@&Q5E>+p z`Pmc8TA}b@d+-2ANHn0ND{xgWp-~eCspUM_!Ig~s;ki;%lcWP4ZZ zOt#5;9QX@fHQ|bblGbnK{wrfb#)XD#g|b|s_*9n2jkh>tPs89o$u&ZCSXDGE6V&0$ zx3Y`g`kISY91(oEk2I@}I?SyV0VE-NyNfb?AU>820Gme9n#;ldk)SK_^nE(KYH&Xh zVa2Rk?XJL_$rcefL6plb^!(W5Xl9zFKan&PWp};kW598}c-foK6k*#Zt-jQ@`sOD>^(&M%3EAxI;GJJ*e||R7>rIed zh8u04GH|5)0S`~ZV4(>|J90*~K2}iZ*NZzfE%%(3Q)D`upUi#dj{+UmP;5Q{PwPaWbQ zpX%VYerVEUYO|HmNxq2yaEJ?SO9?ci51N5Kb>EePy^NM6v4po?@C&v4_QoK5O#EkQ zBZU>$@fj_SV`N=%_m(}7rCn^tBqrqOFhznf3D(4DF1-7KB;ESCDL<;=nj0}CMIaq_ z_&#Bwo6xX?eZo--f`i$(16Do%$0Mr~a% zqPKz$KZ+~PJJTuQEO7+L#1^)3gF6cSxA<*xjwW5Px^nU}BbDm5qOV?{vesMu<1l~u z1@*^W1WWVrp$<|d-thmft#E`qJn<-N$GpOJdw$_t%Wlb}XV$gK>wd?E=GU0HM{D^8 z+VN*dm6+=34VgGy|G-c;7WdS$S)ctjuiCs}>dHiKnF?Z{D~*zcoMTg~BB-smUg+M4 z`iz)NOh&YpMm8EAkS)Mh`5MdrQn})J$%(G^sFW>hsy!+ed z^Ev0nfV(g>gJN8 z&dQ)Dla%udk4lM;q)^5ve2p=E{NYra{Lv|exR#2{NpTqFrXdNsp#eG+SeX2iyC6_Z z6v*4Cp+63gbwoB>aZKzHy#U}7g6Z2?)4ULD9MsFA1MKRH$$K0gqf^CT2y(W#pH^;J zU`2BF-CubDbCZa&LbX&#r0BC+6B7$W1P(W!aLHCT11Q}X^(lCppyd*tZi|4%L4r~k zpETr!CCJ-ig>`9J`VLtqtcv-tN{^Ru%Ct9#qIueBxI_~?oxJn1%GY^jW>?X4(I9Ev zYR%my7=zMneX5E|ePIiYo?Q+c2vJ`@*))_d?i#$0(Qqa#?!7{|8C+5f{P{Vq1=HS^ z$k#r+JNfHi3&8(FEm*}c#m&Syv;1X!<*#0co_Ux1N~_w8&)N_ihiuC&FXXee`>zh; zD3)2u=Q!F134ax5{-mTbm{8`PTLLB0PiE%eURHRbg7^{-Vh2I$nA)t(a~15T??0f6 zaqGkT#V?g;-2{8Jaf5V`;d#QE6J_<{Ta|W67nmDpmmYu^DaS`&KW+wi z;47=)1t}ycEXki|4X26K)h4*>@8_jq67J-rP_wod0@p5)396V# zYtsQ>SB&u1p+<$7k_ot_rHw|1D-o%z4~B&s+WEM}aCqkw(F6PwC3|T>Y+QzlIE=^a@1Ud4>k{#vB7i-&f$?c#9j#gU~`-!Ih64rzZ3aD~>!$Tx8 zYOuOjXCj9uk=ULdvCLg%j`3hs=oc7zL1<}D@6xAcru1ReO#Edw;~EB^Fe^k3Ft)(? zVw6fn;Hd}|F{{(V6+WP@V`Z#f=$+VxT zXqL$w*49NllhcBtT~3oi7XRVU)WTva!j!&<8l~Z)D#-EFd^TP-#Ed)w3Kg77FEY1KkL(Cc8J{~T9)Scbr#%JNrPV@BWDZhZ9O$1e$Vjq zy9}p%VAGJ*m{`R~m}5FfmYv^nyUJT0=X6yfm&?*YCkui zE+HX`)`-GB#qI5*>`6}N72%rI8i^U|2V;d1);D97cRQ_oA}@aK zbT`PH(RMS~$~sfHi67P16Wq2|&%mB#wgYw{t}Cl@7a7eZ#9Y$-nQbGvud84E zipCB^^DW=GQ2NBbUsb&VtB*HB7jX?m@jyfAG<)zCD5$O=jc5ft2#`tx?lZuHkbNNO zps>f-r~b=4B diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml deleted file mode 100644 index 7edd7ef..0000000 --- a/.github/release-drafter.yml +++ /dev/null @@ -1,19 +0,0 @@ -name-template: 'v$NEXT_PATCH_VERSION' -tag-template: 'v$NEXT_PATCH_VERSION' -categories: - - title: 'New Features' - label: 'feature' - - title: 'Breaking Change' - label: 'breaking change' - - title: 'Bug Fixes' - label: 'fix' - - title: 'Documentation' - label: 'documentation' - - title: 'Dependencies' - label: 'dependencies' - - title: 'Enhancement' - label: 'enhancement' -change-template: '- $TITLE @$AUTHOR (#$NUMBER)' -template: | - ## Changes - $CHANGES diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index 1fd42a8..0000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,39 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Python package - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.7, 3.8, 3.9] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml deleted file mode 100644 index b1f2f0b..0000000 --- a/.github/workflows/python-publish.yml +++ /dev/null @@ -1,31 +0,0 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -name: Upload Python Package - -on: - release: - types: [created] - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index d722dc0..23c7bc4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,137 +1,29 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook +__pycache__/ +_docs/ +_proc/ +.DS_Store +.gitattributes +.gitconfig +.idea +.idea .ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env +.luarc.json +.ruff_cache/ .venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ -*.png -Untitle*.ipynb - -.idea/ - -#files +.vscode *.csv -*.ipynb +*.egg-info +*.gif +*.icloud +*.parquet +build +data +dist +docs/_site +Gemfile.lock +Gemfile* +mlruns/ +nbs/.last_checked +nbs/data +tmp \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1d115a9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,34 @@ +# Configures pre-commit hooks for this repository +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + # specific nbdev hooks, cleans notebooks + - repo: https://github.com/fastai/nbdev + rev: 2.3.13 + hooks: + - id: nbdev_clean + + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.1.10 + hooks: + # run the linter with the .ruff.toml file for config + # fix all fixable errors + # - id: ruff + # args: [".", "--config", ".ruff.toml", "--fix"] + + # # run the formatter with the .ruff.toml file for config + - id: ruff-format + args: [".", "--config", ".ruff.toml"] + + # export the relevant files as modified by ruff + - repo: https://github.com/fastai/nbdev + rev: 2.3.13 + hooks: + - id: nbdev_export diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..99a99d3 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,66 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + # custom added below + "tsfeatures/", + "setup.py", + "_modidx.py", +] + +# extende the inclusion list to also include jupyternotebooks +extend-include = ["*.ipynb"] + + +# I -> enable isort +# N -> pep8 naming +# PD -> pandas-vet + +extend-select = ['I', 'N', 'PD'] + +# Same as Black. +line-length = 88 +indent-width = 4 + + +[lint] +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] + +ignore = ["F403", "F405"] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] + +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/LICENSE b/LICENSE index c9ab762..3b106e8 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2022 Nixtla + Copyright 2022, fastai Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..5c0e7ce --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include settings.ini +include LICENSE +include CONTRIBUTING.md +include README.md +recursive-exclude * __pycache__ diff --git a/README.md b/README.md index 0520204..eadb806 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,17 @@ -[![Build](https://github.com/FedericoGarza/tsfeatures/workflows/Python%20package/badge.svg)](https://github.com/FedericoGarza/tsfeatures/tree/master) -[![PyPI version fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/tsfeatures/) -[![Downloads](https://pepy.tech/badge/tsfeatures)](https://pepy.tech/project/tsfeatures) -[![Python 3.6+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370+/) -[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE) +# tsfeatures + + # tsfeatures -Calculates various features from time series data. Python implementation of the R package _[tsfeatures](https://github.com/robjhyndman/tsfeatures)_. +Calculates various features from time series data. Python implementation +of the R package +*[tsfeatures](https://github.com/robjhyndman/tsfeatures)*. # Installation -You can install the *released* version of `tsfeatures` from the [Python package index](pypi.org) with: +You can install the *released* version of `tsfeatures` from the [Python +package index](pypi.org) with: ``` python pip install tsfeatures @@ -18,23 +19,30 @@ pip install tsfeatures # Usage -The `tsfeatures` main function calculates by default the features used by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their implementation of the FFORMA model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). +The `tsfeatures` main function calculates by default the features used +by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their +implementation of the FFORMA +model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). -```python +``` python from tsfeatures import tsfeatures ``` -This function receives a panel pandas df with columns `unique_id`, `ds`, `y` and optionally the frequency of the data. +This function receives a panel pandas df with columns `unique_id`, `ds`, +`y` and optionally the frequency of the data. -```python +``` python tsfeatures(panel, freq=7) ``` -By default (`freq=None`) the function will try to infer the frequency of each time series (using `infer_freq` from `pandas` on the `ds` column) and assign a seasonal period according to the built-in dictionary `FREQS`: +By default (`freq=None`) the function will try to infer the frequency of +each time series (using `infer_freq` from `pandas` on the `ds` column) +and assign a seasonal period according to the built-in dictionary +`FREQS`: -```python +``` python FREQS = {'H': 24, 'D': 1, 'M': 12, 'Q': 4, 'W':1, 'Y': 1} @@ -42,34 +50,36 @@ FREQS = {'H': 24, 'D': 1, You can use your own dictionary using the `dict_freqs` argument: -```python +``` python tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) ``` ## List of available features -| Features ||| -|:--------|:------|:-------------| -|acf_features|heterogeneity|series_length| -|arch_stat|holt_parameters|sparsity| -|count_entropy|hurst|stability| -|crossing_points|hw_parameters|stl_features| -|entropy|intervals|unitroot_kpss| -|flat_spots|lumpiness|unitroot_pp| -|frequency|nonlinearity|| -|guerrero|pacf_features|| +| Features | | | +|:----------------|:----------------|:--------------| +| acf_features | heterogeneity | series_length | +| arch_stat | holt_parameters | sparsity | +| count_entropy | hurst | stability | +| crossing_points | hw_parameters | stl_features | +| entropy | intervals | unitroot_kpss | +| flat_spots | lumpiness | unitroot_pp | +| frequency | nonlinearity | | +| guerrero | pacf_features | | -See the docs for a description of the features. To use a particular feature included in the package you need to import it: +See the docs for a description of the features. To use a particular +feature included in the package you need to import it: -```python +``` python from tsfeatures import acf_features tsfeatures(panel, freq=7, features=[acf_features]) ``` -You can also define your own function and use it together with the included features: +You can also define your own function and use it together with the +included features: -```python +``` python def number_zeros(x, freq): number = (x == 0).sum() @@ -78,36 +88,41 @@ def number_zeros(x, freq): tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) ``` -`tsfeatures` can handle functions that receives a numpy array `x` and a frequency `freq` (this parameter is needed even if you don't use it) and returns a dictionary with the feature name as a key and its value. +`tsfeatures` can handle functions that receives a numpy array `x` and a +frequency `freq` (this parameter is needed even if you don’t use it) and +returns a dictionary with the feature name as a key and its value. ## R implementation -You can use this package to call `tsfeatures` from R inside python (you need to have installed R, the packages `forecast` and `tsfeatures`; also the python package `rpy2`): +You can use this package to call `tsfeatures` from R inside python (you +need to have installed R, the packages `forecast` and `tsfeatures`; also +the python package `rpy2`): -```python +``` python from tsfeatures.tsfeatures_r import tsfeatures_r tsfeatures_r(panel, freq=7, features=["acf_features"]) ``` -Observe that this function receives a list of strings instead of a list of functions. +Observe that this function receives a list of strings instead of a list +of functions. ## Comparison with the R implementation (sum of absolute differences) ### Non-seasonal data (100 Daily M4 time series) -| feature | diff | feature | diff | feature | diff | feature | diff | -|:----------------|-------:|:----------------|-------:|:----------------|-------:|:----------------|-------:| -| e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 | -| seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 | -| nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 | -| linearity | 0 | crossing_points | 0 | nonlinearity | 0 | garch_r2 | 4.74 | -| hw_gamma | 0 | lumpiness | 0 | diff2x_pacf5 | 0 | hurst | 5.45 | -| hw_beta | 0 | diff1x_pacf5 | 0 | unitroot_kpss | 0 | garch_acf | 5.53 | -| hw_alpha | 0 | diff1_acf10 | 0 | x_pacf5 | 0 | entropy | 11.65 | -| trend | 0 | arch_lm | 0 | x_acf10 | 0 | -| flat_spots | 0 | diff1_acf1 | 0 | unitroot_pp | 0 | -| series_length | 0 | stability | 0 | arch_r2 | 1.37 | +| feature | diff | feature | diff | feature | diff | feature | diff | +|:----------------|-----:|:----------------|-----:|:--------------|-----:|:----------|------:| +| e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 | +| seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 | +| nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 | +| linearity | 0 | crossing_points | 0 | nonlinearity | 0 | garch_r2 | 4.74 | +| hw_gamma | 0 | lumpiness | 0 | diff2x_pacf5 | 0 | hurst | 5.45 | +| hw_beta | 0 | diff1x_pacf5 | 0 | unitroot_kpss | 0 | garch_acf | 5.53 | +| hw_alpha | 0 | diff1_acf10 | 0 | x_pacf5 | 0 | entropy | 11.65 | +| trend | 0 | arch_lm | 0 | x_acf10 | 0 | | | +| flat_spots | 0 | diff1_acf1 | 0 | unitroot_pp | 0 | | | +| series_length | 0 | stability | 0 | arch_r2 | 1.37 | | | To replicate this results use: @@ -118,33 +133,126 @@ python -m tsfeatures.compare_with_r --results_directory /some/path ### Sesonal data (100 Hourly M4 time series) -| feature | diff | feature | diff | feature | diff | feature | diff | -|:------------------|-------:|:-------------|-----:|:----------|--------:|:-----------|--------:| -| series_length | 0 |seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 | -| flat_spots | 0 |x_acf1|0| arch_r2 | 2.29 | hw_beta | 32.39 | -| nperiods | 0 |unitroot_kpss|0| alpha | 2.52 | trough | 35 | -| crossing_points | 0 |nonlinearity|0| beta | 3.67 | peak | 69 | -| seasonal_period | 0 |diff1_acf10|0| linearity | 3.97 | -| lumpiness | 0 |x_acf10|0| curvature | 4.8 | -| stability | 0 |seas_pacf|0| e_acf10 | 7.05 | -| arch_lm | 0 |unitroot_pp|0| garch_r2 | 7.32 | -| diff2_acf1 | 0 |spike|0| hw_gamma | 7.32 | -| diff2_acf10 | 0 |seasonal_strength|0.79| hw_alpha | 7.47 | -| diff1_acf1 | 0 |e_acf1|1.67| garch_acf | 7.53 | -| diff2x_pacf5 | 0 |arch_acf|2.18| entropy | 9.45 | +| feature | diff | feature | diff | feature | diff | feature | diff | +|:----------------|-----:|:------------------|-----:|:----------|-----:|:--------|------:| +| series_length | 0 | seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 | +| flat_spots | 0 | x_acf1 | 0 | arch_r2 | 2.29 | hw_beta | 32.39 | +| nperiods | 0 | unitroot_kpss | 0 | alpha | 2.52 | trough | 35 | +| crossing_points | 0 | nonlinearity | 0 | beta | 3.67 | peak | 69 | +| seasonal_period | 0 | diff1_acf10 | 0 | linearity | 3.97 | | | +| lumpiness | 0 | x_acf10 | 0 | curvature | 4.8 | | | +| stability | 0 | seas_pacf | 0 | e_acf10 | 7.05 | | | +| arch_lm | 0 | unitroot_pp | 0 | garch_r2 | 7.32 | | | +| diff2_acf1 | 0 | spike | 0 | hw_gamma | 7.32 | | | +| diff2_acf10 | 0 | seasonal_strength | 0.79 | hw_alpha | 7.47 | | | +| diff1_acf1 | 0 | e_acf1 | 1.67 | garch_acf | 7.53 | | | +| diff2x_pacf5 | 0 | arch_acf | 2.18 | entropy | 9.45 | | | -To replicate this results use: +[![Build](https://github.com/FedericoGarza/tsfeatures/workflows/Python%20package/badge.svg)](https://github.com/FedericoGarza/tsfeatures/tree/master) +[![PyPI version +fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/tsfeatures/) +[![Downloads](https://pepy.tech/badge/tsfeatures.png)](https://pepy.tech/project/tsfeatures) +[![Python +3.6+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370+/) +[![License: +MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE) -``` console -python -m tsfeatures.compare_with_r --results_directory /some/path \ - --dataset_name Hourly --num_obs 100 +# tsfeatures + +Calculates various features from time series data. Python implementation +of the R package +*[tsfeatures](https://github.com/robjhyndman/tsfeatures)*. + +# Installation + +You can install the *released* version of `tsfeatures` from the [Python +package index](pypi.org) with: + +``` python +pip install tsfeatures +``` + +# Usage + +The `tsfeatures` main function calculates by default the features used +by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their +implementation of the FFORMA +model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). + +``` python +from tsfeatures import tsfeatures +``` + +This function receives a panel pandas df with columns `unique_id`, `ds`, +`y` and optionally the frequency of the data. + + + +``` python +tsfeatures(panel, freq=7) ``` +By default (`freq=None`) the function will try to infer the frequency of +each time series (using `infer_freq` from `pandas` on the `ds` column) +and assign a seasonal period according to the built-in dictionary +`FREQS`: + +``` python +FREQS = {'H': 24, 'D': 1, + 'M': 12, 'Q': 4, + 'W':1, 'Y': 1} +``` + +You can use your own dictionary using the `dict_freqs` argument: + +``` python +tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) +``` + +## List of available features + +| Features | | | +|:----------------|:----------------|:--------------| +| acf_features | heterogeneity | series_length | +| arch_stat | holt_parameters | sparsity | +| count_entropy | hurst | stability | +| crossing_points | hw_parameters | stl_features | +| entropy | intervals | unitroot_kpss | +| flat_spots | lumpiness | unitroot_pp | +| frequency | nonlinearity | | +| guerrero | pacf_features | | + +See the docs for a description of the features. To use a particular +feature included in the package you need to import it: + +``` python +from tsfeatures import acf_features + +tsfeatures(panel, freq=7, features=[acf_features]) +``` + +You can also define your own function and use it together with the +included features: + +``` python +def number_zeros(x, freq): + + number = (x == 0).sum() + return {'number_zeros': number} + +tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) +``` + +`tsfeatures` can handle functions that receives a numpy array `x` and a +frequency `freq` (this parameter is needed even if you don’t use it) and +returns a dictionary with the feature name as a key and its value. + # Authors -* **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza) -* **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier) -* **Cristian Challu** - [cristianchallu](https://github.com/cristianchallu) -* **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez) -* **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar) -* **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler) +- **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza) +- **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier) +- **Cristian Challu** - + [cristianchallu](https://github.com/cristianchallu) +- **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez) +- **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar) +- **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler) diff --git a/nbs/.gitignore b/nbs/.gitignore new file mode 100644 index 0000000..075b254 --- /dev/null +++ b/nbs/.gitignore @@ -0,0 +1 @@ +/.quarto/ diff --git a/nbs/00_utils.ipynb b/nbs/00_utils.ipynb new file mode 100644 index 0000000..57ae6e8 --- /dev/null +++ b/nbs/00_utils.ipynb @@ -0,0 +1,651 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# utils\n", + "\n", + "> supporting utils for tsfeatures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |default_exp utils" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "import numpy as np\n", + "import statsmodels.api as sm\n", + "from fastcore.test import *\n", + "\n", + "\n", + "# from scipy.signal import periodogram, welch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.seterr(divide=\"ignore\", invalid=\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "FREQS = {\"H\": 24, \"D\": 1, \"M\": 12, \"Q\": 4, \"W\": 1, \"Y\": 1}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def scalets(x: np.array) -> np.array:\n", + " \"\"\"Mean-std scale a time series.\n", + "\n", + " Scales the time series x by removing the mean and dividing by the standard deviation.\n", + "\n", + " Parameters\n", + " ----------\n", + " x : np.array\n", + " The input time series data.\n", + "\n", + " Returns\n", + " -------\n", + " np.array\n", + " The scaled time series values.\n", + " \"\"\"\n", + " return (x - x.mean()) / x.std(ddof=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def poly(x: np.array, p: int) -> np.array:\n", + " \"\"\"Returns or evaluates orthogonal polynomials of degree 1 to degree over the\n", + " specified set of points x:\n", + " these are all orthogonal to the constant polynomial of degree 0.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " Time series.\n", + " p: int\n", + " Degree of the polynomial.\n", + "\n", + " References\n", + " ----------\n", + " https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/poly\n", + " \"\"\"\n", + " X = np.transpose(np.vstack([x**k for k in range(p + 1)]))\n", + "\n", + " return np.linalg.qr(X)[0][:, 1:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def embed(x: np.array, p: int) -> np.array:\n", + " \"\"\"Embeds the time series x into a low-dimensional Euclidean space.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " Time series.\n", + " p: int\n", + " Embedding dimension.\n", + "\n", + " References\n", + " ----------\n", + " https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/embed\n", + " \"\"\"\n", + " x = np.transpose(np.vstack([np.roll(x, k) for k in range(p)]))\n", + " return x[p - 1 :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float:\n", + " \"\"\"Generically computes Teraesvirta's neural network test for neglected\n", + " nonlinearity either for the time series x or the regression y~x.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " Time series.\n", + " lag: int\n", + " Specifies the model order in terms of lags.\n", + " scale: bool\n", + " Whether the data should be scaled before computing the test.\n", + "\n", + " Returns\n", + " -------\n", + " float\n", + " Terasvirta statistic.\n", + "\n", + " References\n", + " ----------\n", + " https://www.rdocumentation.org/packages/tseries/versions/0.10-47/topics/terasvirta.test\n", + " \"\"\"\n", + " if scale:\n", + " x = scalets(x)\n", + "\n", + " size_x = len(x)\n", + " y = embed(x, lag + 1)\n", + "\n", + " X = y[:, 1:]\n", + " X = sm.add_constant(X)\n", + "\n", + " y = y[:, 0]\n", + "\n", + " ols = sm.OLS(y, X).fit()\n", + "\n", + " u = ols.resid\n", + " ssr0 = (u**2).sum()\n", + "\n", + " X_nn_list = []\n", + "\n", + " for i in range(lag):\n", + " for j in range(i, lag):\n", + " element = X[:, i + 1] * X[:, j + 1]\n", + " element = np.vstack(element)\n", + " X_nn_list.append(element)\n", + "\n", + " for i in range(lag):\n", + " for j in range(i, lag):\n", + " for k in range(j, lag):\n", + " element = X[:, i + 1] * X[:, j + 1] * X[:, k + 1]\n", + " element = np.vstack(element)\n", + " X_nn_list.append(element)\n", + "\n", + " X_nn = np.concatenate(X_nn_list, axis=1)\n", + " X_nn = np.concatenate([X, X_nn], axis=1)\n", + " ols_nn = sm.OLS(u, X_nn).fit()\n", + "\n", + " v = ols_nn.resid\n", + " ssr = (v**2).sum()\n", + "\n", + " stat = size_x * np.log(ssr0 / ssr)\n", + "\n", + " return stat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def hurst_exponent(x: np.array) -> float:\n", + " \"\"\"Computes hurst exponent.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " Time series.\n", + "\n", + " References\n", + " ----------\n", + " [1] Taken from https://gist.github.com/alexvorndran/aad69fa741e579aad093608ccaab4fe1\n", + " [2] Based on https://codereview.stackexchange.com/questions/224360/hurst-exponent-calculator\n", + " \"\"\"\n", + " n = x.size # num timesteps\n", + " t = np.arange(1, n + 1)\n", + " y = x.cumsum() # marginally more efficient than: np.cumsum(sig)\n", + " mean_t = y / t # running mean\n", + "\n", + " s_t = np.sqrt(np.array([np.mean((x[: i + 1] - mean_t[i]) ** 2) for i in range(n)]))\n", + " r_t = np.array([np.ptp(y[: i + 1] - t[: i + 1] * mean_t[i]) for i in range(n)])\n", + "\n", + " with np.errstate(invalid=\"ignore\"):\n", + " r_s = r_t / s_t\n", + "\n", + " r_s = np.log(r_s)[1:]\n", + " n = np.log(t)[1:]\n", + " a = np.column_stack((n, np.ones(n.size)))\n", + " hurst_exponent, _ = np.linalg.lstsq(a, r_s, rcond=-1)[0]\n", + "\n", + " return hurst_exponent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def ur_pp(x: np.array) -> float:\n", + " \"\"\"Performs the Phillips and Perron unit root test.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " Time series.\n", + "\n", + " References\n", + " ----------\n", + " https://www.rdocumentation.org/packages/urca/versions/1.3-0/topics/ur.pp\n", + " \"\"\"\n", + " n = len(x)\n", + " lmax = 4 * (n / 100) ** (1 / 4)\n", + "\n", + " lmax, _ = divmod(lmax, 1)\n", + " lmax = int(lmax)\n", + "\n", + " y, y_l1 = x[1:], x[: n - 1]\n", + "\n", + " n -= 1\n", + "\n", + " y_l1 = sm.add_constant(y_l1)\n", + "\n", + " model = sm.OLS(y, y_l1).fit()\n", + " my_tstat, res = model.tvalues[0], model.resid\n", + " s = 1 / (n * np.sum(res**2))\n", + " myybar = (1 / n**2) * (((y - y.mean()) ** 2).sum())\n", + " myy = (1 / n**2) * ((y**2).sum())\n", + " my = (n ** (-3 / 2)) * (y.sum())\n", + "\n", + " idx = np.arange(lmax)\n", + " coprods = []\n", + " for i in idx:\n", + " first_del = res[i + 1 :]\n", + " sec_del = res[: n - i - 1]\n", + " prod = first_del * sec_del\n", + " coprods.append(prod.sum())\n", + " coprods = np.array(coprods)\n", + "\n", + " weights = 1 - (idx + 1) / (lmax + 1)\n", + " sig = s + (2 / n) * ((weights * coprods).sum())\n", + " lambda_ = 0.5 * (sig - s)\n", + " lambda_prime = lambda_ / sig\n", + "\n", + " alpha = model.params[1]\n", + "\n", + " test_stat = n * (alpha - 1) - lambda_ / myybar\n", + "\n", + " return test_stat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def lambda_coef_var(lambda_par: float, x: np.array, period: int = 2):\n", + " \"\"\"Calculates coefficient of variation for subseries of x.\n", + "\n", + " Parameters\n", + " ----------\n", + " lambda_par: float\n", + " Lambda Box-cox transformation parameter.\n", + " Must be greater than zero.\n", + " x: numpy array\n", + " Time series.\n", + " period: int\n", + " The length of each subseries (usually the length of seasonal period).\n", + "\n", + " Returns\n", + " -------\n", + " float\n", + " Coefficient of variation.\n", + " \"\"\"\n", + " if len(np.unique(x)) == 1:\n", + " return 1\n", + "\n", + " split_size = divmod(len(x) - 1, period)\n", + " split_size, _ = split_size\n", + "\n", + " split = np.array_split(x, split_size)\n", + "\n", + " mu_h = np.array([np.nanmean(sub) for sub in split])\n", + " sig_h = np.array([np.nanstd(sub, ddof=1) for sub in split])\n", + "\n", + " rat = sig_h / mu_h ** (1 - lambda_par)\n", + "\n", + " value = np.nanstd(rat, ddof=1) / np.nanmean(rat)\n", + "\n", + " return value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "WWWusage = [\n", + " 88,\n", + " 84,\n", + " 85,\n", + " 85,\n", + " 84,\n", + " 85,\n", + " 83,\n", + " 85,\n", + " 88,\n", + " 89,\n", + " 91,\n", + " 99,\n", + " 104,\n", + " 112,\n", + " 126,\n", + " 138,\n", + " 146,\n", + " 151,\n", + " 150,\n", + " 148,\n", + " 147,\n", + " 149,\n", + " 143,\n", + " 132,\n", + " 131,\n", + " 139,\n", + " 147,\n", + " 150,\n", + " 148,\n", + " 145,\n", + " 140,\n", + " 134,\n", + " 131,\n", + " 131,\n", + " 129,\n", + " 126,\n", + " 126,\n", + " 132,\n", + " 137,\n", + " 140,\n", + " 142,\n", + " 150,\n", + " 159,\n", + " 167,\n", + " 170,\n", + " 171,\n", + " 172,\n", + " 172,\n", + " 174,\n", + " 175,\n", + " 172,\n", + " 172,\n", + " 174,\n", + " 174,\n", + " 169,\n", + " 165,\n", + " 156,\n", + " 142,\n", + " 131,\n", + " 121,\n", + " 112,\n", + " 104,\n", + " 102,\n", + " 99,\n", + " 99,\n", + " 95,\n", + " 88,\n", + " 84,\n", + " 84,\n", + " 87,\n", + " 89,\n", + " 88,\n", + " 85,\n", + " 86,\n", + " 89,\n", + " 91,\n", + " 91,\n", + " 94,\n", + " 101,\n", + " 110,\n", + " 121,\n", + " 135,\n", + " 145,\n", + " 149,\n", + " 156,\n", + " 165,\n", + " 171,\n", + " 175,\n", + " 177,\n", + " 182,\n", + " 193,\n", + " 204,\n", + " 208,\n", + " 210,\n", + " 215,\n", + " 222,\n", + " 228,\n", + " 226,\n", + " 222,\n", + " 220,\n", + "]\n", + "\n", + "USAccDeaths = [\n", + " 9007,\n", + " 8106,\n", + " 8928,\n", + " 9137,\n", + " 10017,\n", + " 10826,\n", + " 11317,\n", + " 10744,\n", + " 9713,\n", + " 9938,\n", + " 9161,\n", + " 8927,\n", + " 7750,\n", + " 6981,\n", + " 8038,\n", + " 8422,\n", + " 8714,\n", + " 9512,\n", + " 10120,\n", + " 9823,\n", + " 8743,\n", + " 9129,\n", + " 8710,\n", + " 8680,\n", + " 8162,\n", + " 7306,\n", + " 8124,\n", + " 7870,\n", + " 9387,\n", + " 9556,\n", + " 10093,\n", + " 9620,\n", + " 8285,\n", + " 8466,\n", + " 8160,\n", + " 8034,\n", + " 7717,\n", + " 7461,\n", + " 7767,\n", + " 7925,\n", + " 8623,\n", + " 8945,\n", + " 10078,\n", + " 9179,\n", + " 8037,\n", + " 8488,\n", + " 7874,\n", + " 8647,\n", + " 7792,\n", + " 6957,\n", + " 7726,\n", + " 8106,\n", + " 8890,\n", + " 9299,\n", + " 10625,\n", + " 9302,\n", + " 8314,\n", + " 8850,\n", + " 8265,\n", + " 8796,\n", + " 7836,\n", + " 6892,\n", + " 7791,\n", + " 8192,\n", + " 9115,\n", + " 9434,\n", + " 10484,\n", + " 9827,\n", + " 9110,\n", + " 9070,\n", + " 8633,\n", + " 9240,\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "import nbdev\n", + "\n", + "nbdev.nbdev_export()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/01_features.ipynb b/nbs/01_features.ipynb new file mode 100644 index 0000000..d33d05f --- /dev/null +++ b/nbs/01_features.ipynb @@ -0,0 +1,1228 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# features\n", + "\n", + "> Fill in a module description here\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |default_exp features\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "warnings.warn = lambda *a, **kw: False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n", + "os.environ[\"NUMEXPR_NUM_THREADS\"] = \"1\"\n", + "os.environ[\"OMP_NUM_THREADS\"] = \"1\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "from itertools import groupby\n", + "from math import e # maybe change with numpy e\n", + "from typing import Dict\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from antropy import spectral_entropy\n", + "from arch import arch_model\n", + "from scipy.optimize import minimize_scalar\n", + "from sklearn.linear_model import LinearRegression\n", + "from statsmodels.api import OLS, add_constant\n", + "from statsmodels.tsa.ar_model import AR\n", + "from statsmodels.tsa.holtwinters import ExponentialSmoothing\n", + "from statsmodels.tsa.seasonal import STL\n", + "from statsmodels.tsa.stattools import acf, kpss, pacf\n", + "from supersmoother import SuperSmoother\n", + "\n", + "from tsfeatures.utils import (\n", + " embed,\n", + " hurst_exponent,\n", + " lambda_coef_var,\n", + " poly,\n", + " terasvirta_test,\n", + " ur_pp,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Calculates autocorrelation function features.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'x_acf1': First autocorrelation coefficient.\n", + " 'x_acf10': Sum of squares of first 10 autocorrelation coefficients.\n", + " 'diff1_acf1': First autocorrelation ciefficient of differenced series.\n", + " 'diff1_acf10': Sum of squared of first 10 autocorrelation coefficients\n", + " of differenced series.\n", + " 'diff2_acf1': First autocorrelation coefficient of twice-differenced series.\n", + " 'diff2_acf10': Sum of squared of first 10 autocorrelation coefficients of\n", + " twice-differenced series.\n", + "\n", + " Only for seasonal data (freq > 1).\n", + " 'seas_acf1': Autocorrelation coefficient at the first seasonal lag.\n", + " \"\"\"\n", + " m = freq\n", + " size_x = len(x)\n", + "\n", + " acfx = acf(x, nlags=max(m, 10), fft=False)\n", + " if size_x > 10:\n", + " acfdiff1x = acf(np.diff(x, n=1), nlags=10, fft=False)\n", + " else:\n", + " acfdiff1x = [np.nan] * 2\n", + "\n", + " if size_x > 11:\n", + " acfdiff2x = acf(np.diff(x, n=2), nlags=10, fft=False)\n", + " else:\n", + " acfdiff2x = [np.nan] * 2\n", + " # first autocorrelation coefficient\n", + " acf_1 = acfx[1]\n", + " # sum of squares of first 10 autocorrelation coefficients\n", + " sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan\n", + " # first autocorrelation ciefficient of differenced series\n", + " diff1_acf1 = acfdiff1x[1]\n", + " # sum of squared of first 10 autocorrelation coefficients of differenced series\n", + " diff1_acf10 = np.sum((acfdiff1x[1:11]) ** 2) if size_x > 10 else np.nan\n", + " # first autocorrelation coefficient of twice-differenced series\n", + " diff2_acf1 = acfdiff2x[1]\n", + " # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series\n", + " diff2_acf10 = np.sum((acfdiff2x[1:11]) ** 2) if size_x > 11 else np.nan\n", + "\n", + " output = {\n", + " \"x_acf1\": acf_1,\n", + " \"x_acf10\": sum_of_sq_acf10,\n", + " \"diff1_acf1\": diff1_acf1,\n", + " \"diff1_acf10\": diff1_acf10,\n", + " \"diff2_acf1\": diff2_acf1,\n", + " \"diff2_acf10\": diff2_acf10,\n", + " }\n", + "\n", + " if m > 1:\n", + " output[\"seas_acf1\"] = acfx[m] if len(acfx) > m else np.nan\n", + "\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from math import isclose\n", + "\n", + "from tsfeatures.utils import USAccDeaths, WWWusage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test_acf_features_seasonal():\n", + " z = acf_features(USAccDeaths, 12)\n", + " assert isclose(len(z), 7)\n", + " assert isclose(z[\"x_acf1\"], 0.70, abs_tol=0.01)\n", + " assert isclose(z[\"x_acf10\"], 1.20, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf1\"], 0.023, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf10\"], 0.27, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf1\"], -0.48, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf10\"], 0.74, abs_tol=0.01)\n", + " assert isclose(z[\"seas_acf1\"], 0.62, abs_tol=0.01)\n", + "\n", + "\n", + "test_acf_features_seasonal()\n", + "\n", + "\n", + "def test_acf_features_non_seasonal():\n", + " z = acf_features(WWWusage, 1)\n", + " assert isclose(len(z), 6)\n", + " assert isclose(z[\"x_acf1\"], 0.96, abs_tol=0.01)\n", + " assert isclose(z[\"x_acf10\"], 4.19, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf1\"], 0.79, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf10\"], 1.40, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf1\"], 0.17, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf10\"], 0.33, abs_tol=0.01)\n", + "\n", + "\n", + "test_acf_features_non_seasonal()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def arch_stat(\n", + " x: np.array, freq: int = 1, lags: int = 12, demean: bool = True\n", + ") -> Dict[str, float]:\n", + " \"\"\"Arch model features.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'arch_lm': R^2 value of an autoregressive model of order lags applied to x**2.\n", + " \"\"\"\n", + " if len(x) <= lags + 1:\n", + " return {\"arch_lm\": np.nan}\n", + " if demean:\n", + " x -= np.mean(x)\n", + "\n", + " size_x = len(x)\n", + " mat = embed(x**2, lags + 1)\n", + " X = mat[:, 1:]\n", + " y = np.vstack(mat[:, 0])\n", + "\n", + " try:\n", + " r_squared = LinearRegression().fit(X, y).score(X, y)\n", + " except:\n", + " r_squared = np.nan\n", + "\n", + " return {\"arch_lm\": r_squared}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fastcore.test import *\n", + "\n", + "from tsfeatures.utils import USAccDeaths, WWWusage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test_arch_stat_seasonal():\n", + " z = arch_stat(USAccDeaths, 12)\n", + " test_close(len(z), 1)\n", + " test_close(z[\"arch_lm\"], 0.54, eps=0.01)\n", + "\n", + "\n", + "test_arch_stat_seasonal()\n", + "\n", + "\n", + "def test_arch_stat_non_seasonal():\n", + " z = arch_stat(WWWusage, 12)\n", + " test_close(len(z), 1)\n", + " test_close(z[\"arch_lm\"], 0.98, eps=0.01)\n", + "\n", + "\n", + "test_arch_stat_non_seasonal()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Count entropy.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'count_entropy': Entropy using only positive data.\n", + " \"\"\"\n", + " entropy = x[x > 0] * np.log(x[x > 0])\n", + " entropy = -entropy.sum()\n", + "\n", + " return {\"count_entropy\": entropy}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Crossing points.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'crossing_points': Number of times that x crosses the median.\n", + " \"\"\"\n", + " midline = np.median(x)\n", + " ab = x <= midline\n", + " lenx = len(x)\n", + " p1 = ab[: (lenx - 1)]\n", + " p2 = ab[1:]\n", + " cross = (p1 & (~p2)) | (p2 & (~p1))\n", + "\n", + " return {\"crossing_points\": cross.sum()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]:\n", + " \"\"\"Calculates sample entropy.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'entropy': Wrapper of the function spectral_entropy.\n", + " \"\"\"\n", + " try:\n", + " with np.errstate(divide=\"ignore\"):\n", + " entropy = spectral_entropy(x, 1, normalize=True)\n", + " except:\n", + " entropy = np.nan\n", + "\n", + " return {\"entropy\": entropy}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Flat spots.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'flat_spots': Number of flat spots in x.\n", + " \"\"\"\n", + " try:\n", + " cutx = pd.cut(x, bins=10, include_lowest=True, labels=False) + 1\n", + " except:\n", + " return {\"flat_spots\": np.nan}\n", + "\n", + " rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def frequency(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Frequency.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'frequency': Wrapper of freq.\n", + " \"\"\"\n", + "\n", + " return {\"frequency\": freq}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def guerrero(\n", + " x: np.array, freq: int = 1, lower: int = -1, upper: int = 2\n", + ") -> Dict[str, float]:\n", + " \"\"\"Applies Guerrero's (1993) method to select the lambda which minimises the\n", + " coefficient of variation for subseries of x.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series.\n", + " lower: float\n", + " The lower bound for lambda.\n", + " upper: float\n", + " The upper bound for lambda.\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'guerrero': Minimum coefficient of variation for subseries of x.\n", + "\n", + " References\n", + " ----------\n", + " [1] Guerrero, V.M. (1993) Time-series analysis supported by power transformations.\n", + " Journal of Forecasting, 12, 37–48.\n", + " \"\"\"\n", + " func_to_min = lambda lambda_par: lambda_coef_var(lambda_par, x=x, period=freq)\n", + "\n", + " min_ = minimize_scalar(func_to_min, bounds=[lower, upper])\n", + " min_ = min_[\"fun\"]\n", + "\n", + " return {\"guerrero\": min_}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Heterogeneity.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'arch_acf': Sum of squares of the first 12 autocorrelations of the\n", + " residuals of the AR model applied to x\n", + " 'garch_acf': Sum of squares of the first 12 autocorrelations of the\n", + " residuals of the GARCH model applied to x\n", + " 'arch_r2': Function arch_stat applied to the residuals of the\n", + " AR model applied to x.\n", + " 'garch_r2': Function arch_stat applied to the residuals of the GARCH\n", + " model applied to x.\n", + " \"\"\"\n", + " m = freq\n", + "\n", + " size_x = len(x)\n", + " order_ar = min(size_x - 1, np.floor(10 * np.log10(size_x)))\n", + " order_ar = int(order_ar)\n", + "\n", + " try:\n", + " x_whitened = AR(x).fit(maxlag=order_ar, ic=\"aic\", trend=\"c\").resid\n", + " except:\n", + " try:\n", + " x_whitened = AR(x).fit(maxlag=order_ar, ic=\"aic\", trend=\"nc\").resid\n", + " except:\n", + " output = {\n", + " \"arch_acf\": np.nan,\n", + " \"garch_acf\": np.nan,\n", + " \"arch_r2\": np.nan,\n", + " \"garch_r2\": np.nan,\n", + " }\n", + "\n", + " return output\n", + " # arch and box test\n", + " x_archtest = arch_stat(x_whitened, m)[\"arch_lm\"]\n", + " LBstat = (acf(x_whitened**2, nlags=12, fft=False)[1:] ** 2).sum()\n", + " # Fit garch model\n", + " garch_fit = arch_model(x_whitened, vol=\"GARCH\", rescale=False).fit(disp=\"off\")\n", + " # compare arch test before and after fitting garch\n", + " garch_fit_std = garch_fit.resid\n", + " x_garch_archtest = arch_stat(garch_fit_std, m)[\"arch_lm\"]\n", + " # compare Box test of squared residuals before and after fittig.garch\n", + " LBstat2 = (acf(garch_fit_std**2, nlags=12, fft=False)[1:] ** 2).sum()\n", + "\n", + " output = {\n", + " \"arch_acf\": LBstat,\n", + " \"garch_acf\": LBstat2,\n", + " \"arch_r2\": x_archtest,\n", + " \"garch_r2\": x_garch_archtest,\n", + " }\n", + "\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Fitted parameters of a Holt model.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'alpha': Level paramater of the Holt model.\n", + " 'beta': Trend parameter of the Hold model.\n", + " \"\"\"\n", + " try:\n", + " fit = ExponentialSmoothing(x, trend=\"add\", seasonal=None).fit()\n", + " params = {\n", + " \"alpha\": fit.params[\"smoothing_level\"],\n", + " \"beta\": fit.params[\"smoothing_trend\"],\n", + " }\n", + " except:\n", + " params = {\"alpha\": np.nan, \"beta\": np.nan}\n", + "\n", + " return params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def hurst(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Hurst index.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'hurst': Hurst exponent.\n", + " \"\"\"\n", + " try:\n", + " hurst_index = hurst_exponent(x)\n", + " except:\n", + " hurst_index = np.nan\n", + "\n", + " return {\"hurst\": hurst_index}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Fitted parameters of a Holt-Winters model.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'hw_alpha': Level parameter of the HW model.\n", + " 'hw_beta': Trend parameter of the HW model.\n", + " 'hw_gamma': Seasonal parameter of the HW model.\n", + " \"\"\"\n", + " try:\n", + " fit = ExponentialSmoothing(\n", + " x, seasonal_periods=freq, trend=\"add\", seasonal=\"add\"\n", + " ).fit()\n", + " params = {\n", + " \"hw_alpha\": fit.params[\"smoothing_level\"],\n", + " \"hw_beta\": fit.params[\"smoothing_trend\"],\n", + " \"hw_gamma\": fit.params[\"smoothing_seasonal\"],\n", + " }\n", + " except:\n", + " params = {\"hw_alpha\": np.nan, \"hw_beta\": np.nan, \"hw_gamma\": np.nan}\n", + "\n", + " return params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def intervals(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Intervals with demand.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'intervals_mean': Mean of intervals with positive values.\n", + " 'intervals_sd': SD of intervals with positive values.\n", + " \"\"\"\n", + " x[x > 0] = 1\n", + "\n", + " y = [sum(val) for keys, val in groupby(x, key=lambda k: k != 0) if keys != 0]\n", + " y = np.array(y)\n", + "\n", + " return {\"intervals_mean\": np.mean(y), \"intervals_sd\": np.std(y, ddof=1)}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"lumpiness.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'lumpiness': Variance of the variances of tiled windows.\n", + " \"\"\"\n", + " if freq == 1:\n", + " width = 10\n", + " else:\n", + " width = freq\n", + "\n", + " nr = len(x)\n", + " lo = np.arange(0, nr, width)\n", + " up = lo + width\n", + " nsegs = nr / width\n", + " varx = [np.nanvar(x[lo[idx] : up[idx]], ddof=1) for idx in np.arange(int(nsegs))]\n", + "\n", + " if len(x) < 2 * width:\n", + " lumpiness = 0\n", + " else:\n", + " lumpiness = np.nanvar(varx, ddof=1)\n", + "\n", + " return {\"lumpiness\": lumpiness}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Nonlinearity.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'nonlinearity': 10 t**2/len(x) where t is the statistic used in\n", + " Terasvirta's test.\n", + " \"\"\"\n", + " try:\n", + " test = terasvirta_test(x)\n", + " test = 10 * test / len(x)\n", + " except:\n", + " test = np.nan\n", + "\n", + " return {\"nonlinearity\": test}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Calculates partial autocorrelation function features.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'x_pacf5': Sum of squares of the first 5 partial autocorrelation\n", + " coefficients.\n", + " 'diff1x_pacf5': Sum of squares of the first 5 partial autocorrelation\n", + " coefficients of differenced series.\n", + " 'diff2x_pacf5': Sum of squares of the first 5 partial autocorrelation\n", + " coefficients of twice-differenced series.\n", + "\n", + " Only for seasonal data (freq > 1).\n", + " 'seas_pacf': Partial autocorrelation\n", + " coefficient at the first seasonal lag.\n", + " \"\"\"\n", + " m = freq\n", + "\n", + " nlags_ = max(m, 5)\n", + "\n", + " if len(x) > 1:\n", + " try:\n", + " pacfx = pacf(x, nlags=nlags_, method=\"ldb\")\n", + " except:\n", + " pacfx = np.nan\n", + " else:\n", + " pacfx = np.nan\n", + " # Sum of first 6 PACs squared\n", + " if len(x) > 5 and not np.all(np.isnan(pacfx)):\n", + " pacf_5 = np.sum(pacfx[1:6] ** 2)\n", + " else:\n", + " pacf_5 = np.nan\n", + " # Sum of first 5 PACs of difference series squared\n", + " if len(x) > 6:\n", + " try:\n", + " diff1_pacf = pacf(np.diff(x, n=1), nlags=5, method=\"ldb\")[1:6]\n", + " diff1_pacf_5 = np.sum(diff1_pacf**2)\n", + " except:\n", + " diff1_pacf_5 = np.nan\n", + " else:\n", + " diff1_pacf_5 = np.nan\n", + " # Sum of first 5 PACs of twice differenced series squared\n", + " if len(x) > 7:\n", + " try:\n", + " diff2_pacf = pacf(np.diff(x, n=2), nlags=5, method=\"ldb\")[1:6]\n", + " diff2_pacf_5 = np.sum(diff2_pacf**2)\n", + " except:\n", + " diff2_pacf_5 = np.nan\n", + " else:\n", + " diff2_pacf_5 = np.nan\n", + "\n", + " output = {\n", + " \"x_pacf5\": pacf_5,\n", + " \"diff1x_pacf5\": diff1_pacf_5,\n", + " \"diff2x_pacf5\": diff2_pacf_5,\n", + " }\n", + "\n", + " if m > 1:\n", + " output[\"seas_pacf\"] = pacfx[m] if len(pacfx) > m else np.nan\n", + "\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def series_length(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Series length.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'series_length': Wrapper of len(x).\n", + " \"\"\"\n", + "\n", + " return {\"series_length\": len(x)}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Sparsity.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'sparsity': Average obs with zero values.\n", + " \"\"\"\n", + "\n", + " return {\"sparsity\": np.mean(x == 0)}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def stability(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Stability.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'stability': Variance of the means of tiled windows.\n", + " \"\"\"\n", + " if freq == 1:\n", + " width = 10\n", + " else:\n", + " width = freq\n", + "\n", + " nr = len(x)\n", + " lo = np.arange(0, nr, width)\n", + " up = lo + width\n", + " nsegs = nr / width\n", + " meanx = [np.nanmean(x[lo[idx] : up[idx]]) for idx in np.arange(int(nsegs))]\n", + "\n", + " if len(x) < 2 * width:\n", + " stability = 0\n", + " else:\n", + " stability = np.nanvar(meanx, ddof=1)\n", + "\n", + " return {\"stability\": stability}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Calculates seasonal trend using loess decomposition.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'nperiods': Number of seasonal periods in x.\n", + " 'seasonal_period': Frequency of the time series.\n", + " 'trend': Strength of trend.\n", + " 'spike': Measures \"spikiness\" of x.\n", + " 'linearity': Linearity of x based on the coefficients of an\n", + " orthogonal quadratic regression.\n", + " 'curvature': Curvature of x based on the coefficients of an\n", + " orthogonal quadratic regression.\n", + " 'e_acf1': acfremainder['x_acf1']\n", + " 'e_acf10': acfremainder['x_acf10']\n", + "\n", + " Only for sesonal data (freq > 0).\n", + " 'seasonal_strength': Strength of seasonality.\n", + " 'peak': Strength of peaks.\n", + " 'trough': Strength of trough.\n", + " \"\"\"\n", + " m = freq\n", + " nperiods = int(m > 1)\n", + " # STL fits\n", + " if m > 1:\n", + " try:\n", + " stlfit = STL(x, m, 13).fit()\n", + " except:\n", + " output = {\n", + " \"nperiods\": nperiods,\n", + " \"seasonal_period\": m,\n", + " \"trend\": np.nan,\n", + " \"spike\": np.nan,\n", + " \"linearity\": np.nan,\n", + " \"curvature\": np.nan,\n", + " \"e_acf1\": np.nan,\n", + " \"e_acf10\": np.nan,\n", + " \"seasonal_strength\": np.nan,\n", + " \"peak\": np.nan,\n", + " \"trough\": np.nan,\n", + " }\n", + "\n", + " return output\n", + "\n", + " trend0 = stlfit.trend\n", + " remainder = stlfit.resid\n", + " seasonal = stlfit.seasonal\n", + " else:\n", + " deseas = x\n", + " t = np.arange(len(x)) + 1\n", + " try:\n", + " trend0 = SuperSmoother().fit(t, deseas).predict(t)\n", + " except:\n", + " output = {\n", + " \"nperiods\": nperiods,\n", + " \"seasonal_period\": m,\n", + " \"trend\": np.nan,\n", + " \"spike\": np.nan,\n", + " \"linearity\": np.nan,\n", + " \"curvature\": np.nan,\n", + " \"e_acf1\": np.nan,\n", + " \"e_acf10\": np.nan,\n", + " }\n", + "\n", + " return output\n", + "\n", + " remainder = deseas - trend0\n", + " seasonal = np.zeros(len(x))\n", + " # De-trended and de-seasonalized data\n", + " detrend = x - trend0\n", + " deseason = x - seasonal\n", + " fits = x - remainder\n", + " # Summay stats\n", + " n = len(x)\n", + " varx = np.nanvar(x, ddof=1)\n", + " vare = np.nanvar(remainder, ddof=1)\n", + " vardetrend = np.nanvar(detrend, ddof=1)\n", + " vardeseason = np.nanvar(deseason, ddof=1)\n", + " # Measure of trend strength\n", + " if varx < np.finfo(float).eps:\n", + " trend = 0\n", + " elif vardeseason / varx < 1e-10:\n", + " trend = 0\n", + " else:\n", + " trend = max(0, min(1, 1 - vare / vardeseason))\n", + " # Measure of seasonal strength\n", + " if m > 1:\n", + " if varx < np.finfo(float).eps:\n", + " season = 0\n", + " elif np.nanvar(remainder + seasonal, ddof=1) < np.finfo(float).eps:\n", + " season = 0\n", + " else:\n", + " season = max(0, min(1, 1 - vare / np.nanvar(remainder + seasonal, ddof=1)))\n", + "\n", + " peak = (np.argmax(seasonal) + 1) % m\n", + " peak = m if peak == 0 else peak\n", + "\n", + " trough = (np.argmin(seasonal) + 1) % m\n", + " trough = m if trough == 0 else trough\n", + " # Compute measure of spikiness\n", + " d = (remainder - np.nanmean(remainder)) ** 2\n", + " varloo = (vare * (n - 1) - d) / (n - 2)\n", + " spike = np.nanvar(varloo, ddof=1)\n", + " # Compute measures of linearity and curvature\n", + " time = np.arange(n) + 1\n", + " poly_m = poly(time, 2)\n", + " time_x = add_constant(poly_m)\n", + " coefs = OLS(trend0, time_x).fit().params\n", + "\n", + " linearity = coefs[1]\n", + " curvature = -coefs[2]\n", + " # ACF features\n", + " acfremainder = acf_features(remainder, m)\n", + " # Assemble features\n", + " output = {\n", + " \"nperiods\": nperiods,\n", + " \"seasonal_period\": m,\n", + " \"trend\": trend,\n", + " \"spike\": spike,\n", + " \"linearity\": linearity,\n", + " \"curvature\": curvature,\n", + " \"e_acf1\": acfremainder[\"x_acf1\"],\n", + " \"e_acf10\": acfremainder[\"x_acf10\"],\n", + " }\n", + "\n", + " if m > 1:\n", + " output[\"seasonal_strength\"] = season\n", + " output[\"peak\"] = peak\n", + " output[\"trough\"] = trough\n", + "\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Unit root kpss.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'unitroot_kpss': Statistic for the Kwiatowski et al unit root test.\n", + " \"\"\"\n", + " n = len(x)\n", + " nlags = int(4 * (n / 100) ** (1 / 4))\n", + "\n", + " try:\n", + " test_kpss, _, _, _ = kpss(x, nlags=nlags)\n", + " except:\n", + " test_kpss = np.nan\n", + "\n", + " return {\"unitroot_kpss\": test_kpss}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "\n", + "def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Unit root pp.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'unitroot_pp': Statistic for the Phillips-Perron unit root test.\n", + " \"\"\"\n", + " try:\n", + " test_pp = ur_pp(x)\n", + " except:\n", + " test_pp = np.nan\n", + "\n", + " return {\"unitroot_pp\": test_pp}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "import nbdev\n", + "\n", + "nbdev.nbdev_export()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/02_tsfeatures_core.ipynb b/nbs/02_tsfeatures_core.ipynb new file mode 100644 index 0000000..c6883ae --- /dev/null +++ b/nbs/02_tsfeatures_core.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# core\n", + "\n", + "> Fill in a module description here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |default_exp tsfeatures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "import os\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "warnings.warn = lambda *a, **kw: False\n", + "\n", + "os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n", + "os.environ[\"NUMEXPR_NUM_THREADS\"] = \"1\"\n", + "os.environ[\"OMP_NUM_THREADS\"] = \"1\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "from collections import ChainMap\n", + "from functools import partial\n", + "from multiprocessing import Pool\n", + "from typing import Callable, Dict, List, Optional\n", + "\n", + "import pandas as pd\n", + "\n", + "from tsfeatures.features import *\n", + "from tsfeatures.utils import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def _get_feats(\n", + " index,\n", + " ts,\n", + " freq,\n", + " scale=True,\n", + " features=[\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " heterogeneity,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ],\n", + " dict_freqs=FREQS,\n", + "):\n", + " print(\"dict_freq\")\n", + " if freq is None:\n", + " inf_freq = pd.infer_freq(ts[\"ds\"])\n", + " if inf_freq is None:\n", + " raise Exception(\n", + " \"Failed to infer frequency from the `ds` column, \"\n", + " \"please provide the frequency using the `freq` argument.\"\n", + " )\n", + "\n", + " freq = dict_freqs.get(inf_freq)\n", + " if freq is None:\n", + " raise Exception(\n", + " \"Error trying to convert infered frequency from the `ds` column \"\n", + " \"to integer. Please provide a dictionary with that frequency \"\n", + " \"as key and the integer frequency as value. \"\n", + " f\"Infered frequency: {inf_freq}\"\n", + " )\n", + "\n", + " if isinstance(ts, pd.DataFrame):\n", + " assert \"y\" in ts.columns\n", + " ts = ts[\"y\"].values\n", + "\n", + " if isinstance(ts, pd.Series):\n", + " ts = ts.values\n", + "\n", + " if scale:\n", + " ts = scalets(ts)\n", + "\n", + " c_map = ChainMap(\n", + " *[dict_feat for dict_feat in [func(ts, freq) for func in features]]\n", + " )\n", + "\n", + " return pd.DataFrame(dict(c_map), index=[index])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def tsfeatures(\n", + " ts: pd.DataFrame,\n", + " freq: Optional[int] = None,\n", + " features: List[Callable] = [\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " heterogeneity,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ],\n", + " dict_freqs: Dict[str, int] = FREQS,\n", + " scale: bool = True,\n", + " threads: Optional[int] = None,\n", + ") -> pd.DataFrame:\n", + " \"\"\"Calculates features for time series.\n", + "\n", + " Parameters\n", + " ----------\n", + " ts: pandas df\n", + " Pandas DataFrame with columns ['unique_id', 'ds', 'y'].\n", + " Long panel of time series.\n", + " freq: int\n", + " Frequency of the time series. If None the frequency of\n", + " each time series is infered and assigns the seasonal periods according to\n", + " dict_freqs.\n", + " features: iterable\n", + " Iterable of features functions.\n", + " scale: bool\n", + " Whether (mean-std)scale data.\n", + " dict_freqs: dict\n", + " Dictionary that maps string frequency of int. Ex: {'D': 7, 'W': 1}\n", + " threads: int\n", + " Number of threads to use. Use None (default) for parallel processing.\n", + "\n", + " Returns\n", + " -------\n", + " pandas df\n", + " Pandas DataFrame where each column is a feature and each row\n", + " a time series.\n", + " \"\"\"\n", + " partial_get_feats = partial(\n", + " _get_feats, freq=freq, scale=scale, features=features, dict_freqs=dict_freqs\n", + " )\n", + "\n", + " with Pool(threads) as pool:\n", + " ts_features = pool.starmap(partial_get_feats, ts.groupby(\"unique_id\"))\n", + "\n", + " ts_features = pd.concat(ts_features).rename_axis(\"unique_id\")\n", + " ts_features = ts_features.reset_index()\n", + "\n", + " return ts_features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "import nbdev\n", + "\n", + "nbdev.nbdev_export()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/03_test_features.ipynb b/nbs/03_test_features.ipynb new file mode 100644 index 0000000..604063a --- /dev/null +++ b/nbs/03_test_features.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tsfeatures.features import *\n", + "from tsfeatures.m4_data import *\n", + "from tsfeatures.tsfeatures import *\n", + "from tsfeatures.utils import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test_pipeline():\n", + " def calculate_features_m4(dataset_name, directory, num_obs=1000000):\n", + " _, y_train_df, _, _ = prepare_m4_data(\n", + " dataset_name=dataset_name, directory=directory, num_obs=num_obs\n", + " )\n", + "\n", + " freq = FREQS[dataset_name[0]]\n", + "\n", + " py_feats = tsfeatures(\n", + " y_train_df, freq=freq, features=[count_entropy]\n", + " ).set_index(\"unique_id\")\n", + "\n", + " calculate_features_m4(\"Hourly\", \"data\", 100)\n", + " calculate_features_m4(\"Daily\", \"data\", 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unexpected exception formatting exception. Falling back to standard exception\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3526, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"/var/folders/ny/_85npg2s1j3_clzbjzxsshn80000gp/T/ipykernel_28927/253115070.py\", line 1, in \n", + " test_pipeline()\n", + " File \"/var/folders/ny/_85npg2s1j3_clzbjzxsshn80000gp/T/ipykernel_28927/632637266.py\", line 13, in test_pipeline\n", + " calculate_features_m4(\"Hourly\", \"data\", 100)\n", + " File \"/var/folders/ny/_85npg2s1j3_clzbjzxsshn80000gp/T/ipykernel_28927/632637266.py\", line 9, in calculate_features_m4\n", + " py_feats = tsfeatures(\n", + " File \"/Users/JdeTheije/Developer/github_juicetea/nbdev_port/tsfeatures_nbdev/tsfeatures/tsfeatures.py\", line 147, in tsfeatures\n", + " ts_features = pool.starmap(partial_get_feats, ts.groupby(\"unique_id\"))\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/pool.py\", line 375, in starmap\n", + " return self._map_async(func, iterable, starmapstar, chunksize).get()\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/pool.py\", line 774, in get\n", + " raise self._value\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/pool.py\", line 540, in _handle_tasks\n", + " put(task)\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/connection.py\", line 206, in send\n", + " self._send_bytes(_ForkingPickler.dumps(obj))\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/reduction.py\", line 51, in dumps\n", + " cls(buf, protocol).dump(obj)\n", + "_pickle.PicklingError: Can't pickle : it's not the same object as tsfeatures.features.count_entropy\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 2120, in showtraceback\n", + " stb = self.InteractiveTB.structured_traceback(\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1435, in structured_traceback\n", + " return FormattedTB.structured_traceback(\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1326, in structured_traceback\n", + " return VerboseTB.structured_traceback(\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1173, in structured_traceback\n", + " formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1088, in format_exception_as_a_whole\n", + " frames.append(self.format_record(record))\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 970, in format_record\n", + " frame_info.lines, Colors, self.has_colors, lvals\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 792, in lines\n", + " return self._sd.lines\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", + " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/core.py\", line 698, in lines\n", + " pieces = self.included_pieces\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", + " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/core.py\", line 649, in included_pieces\n", + " pos = scope_pieces.index(self.executing_piece)\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", + " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/core.py\", line 628, in executing_piece\n", + " return only(\n", + " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/executing/executing.py\", line 164, in only\n", + " raise NotOneValueFound('Expected one value, found 0')\n", + "executing.executing.NotOneValueFound: Expected one value, found 0\n" + ] + } + ], + "source": [ + "test_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/04_m4_data.ipynb b/nbs/04_m4_data.ipynb new file mode 100644 index 0000000..ac2994e --- /dev/null +++ b/nbs/04_m4_data.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# features\n", + "\n", + "> Fill in a module description here\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |default_exp m4_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "import os\n", + "import urllib\n", + "\n", + "import pandas as pd\n", + "\n", + "seas_dict = {\n", + " \"Hourly\": {\"seasonality\": 24, \"input_size\": 24, \"output_size\": 48, \"freq\": \"H\"},\n", + " \"Daily\": {\"seasonality\": 7, \"input_size\": 7, \"output_size\": 14, \"freq\": \"D\"},\n", + " \"Weekly\": {\"seasonality\": 52, \"input_size\": 52, \"output_size\": 13, \"freq\": \"W\"},\n", + " \"Monthly\": {\"seasonality\": 12, \"input_size\": 12, \"output_size\": 18, \"freq\": \"M\"},\n", + " \"Quarterly\": {\"seasonality\": 4, \"input_size\": 4, \"output_size\": 8, \"freq\": \"Q\"},\n", + " \"Yearly\": {\"seasonality\": 1, \"input_size\": 4, \"output_size\": 6, \"freq\": \"D\"},\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "SOURCE_URL = (\n", + " \"https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "\n", + "def maybe_download(filename, directory):\n", + " \"\"\"Download the data from M4's website, unless it's already here.\n", + "\n", + " Parameters\n", + " ----------\n", + " filename: str\n", + " Filename of M4 data with format /Type/Frequency.csv. Example: /Test/Daily-train.csv\n", + " directory: str\n", + " Custom directory where data will be downloaded.\n", + " \"\"\"\n", + " data_directory = directory + \"/m4\"\n", + " train_directory = data_directory + \"/Train/\"\n", + " test_directory = data_directory + \"/Test/\"\n", + "\n", + " os.makedirs(data_directory, exist_ok=True)\n", + " os.makedirs(train_directory, exist_ok=True)\n", + " os.makedirs(test_directory, exist_ok=True)\n", + "\n", + " filepath = os.path.join(data_directory, filename)\n", + " if not os.path.exists(filepath):\n", + " filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)\n", + "\n", + " size = os.path.getsize(filepath)\n", + " print(\"Successfully downloaded\", filename, size, \"bytes.\")\n", + "\n", + " return filepath" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "\n", + "def m4_parser(dataset_name, directory, num_obs=1000000):\n", + " \"\"\"Transform M4 data into a panel.\n", + "\n", + " Parameters\n", + " ----------\n", + " dataset_name: str\n", + " Frequency of the data. Example: 'Yearly'.\n", + " directory: str\n", + " Custom directory where data will be saved.\n", + " num_obs: int\n", + " Number of time series to return.\n", + " \"\"\"\n", + " data_directory = directory + \"/m4\"\n", + " train_directory = data_directory + \"/Train/\"\n", + " test_directory = data_directory + \"/Test/\"\n", + " freq = seas_dict[dataset_name][\"freq\"]\n", + "\n", + " m4_info = pd.read_csv(data_directory + \"/M4-info.csv\", usecols=[\"M4id\", \"category\"])\n", + " m4_info = m4_info[m4_info[\"M4id\"].str.startswith(dataset_name[0])].reset_index(\n", + " drop=True\n", + " )\n", + "\n", + " # Train data\n", + " train_path = \"{}{}-train.csv\".format(train_directory, dataset_name)\n", + "\n", + " train_df = pd.read_csv(train_path, nrows=num_obs)\n", + " train_df = train_df.rename(columns={\"V1\": \"unique_id\"})\n", + "\n", + " train_df = pd.wide_to_long(\n", + " train_df, stubnames=[\"V\"], i=\"unique_id\", j=\"ds\"\n", + " ).reset_index()\n", + " train_df = train_df.rename(columns={\"V\": \"y\"})\n", + " train_df = train_df.dropna()\n", + " train_df[\"split\"] = \"train\"\n", + " train_df[\"ds\"] = train_df[\"ds\"] - 1\n", + " # Get len of series per unique_id\n", + " len_series = train_df.groupby(\"unique_id\").agg({\"ds\": \"max\"}).reset_index()\n", + " len_series.columns = [\"unique_id\", \"len_serie\"]\n", + "\n", + " # Test data\n", + " test_path = \"{}{}-test.csv\".format(test_directory, dataset_name)\n", + "\n", + " test_df = pd.read_csv(test_path, nrows=num_obs)\n", + " test_df = test_df.rename(columns={\"V1\": \"unique_id\"})\n", + "\n", + " test_df = pd.wide_to_long(\n", + " test_df, stubnames=[\"V\"], i=\"unique_id\", j=\"ds\"\n", + " ).reset_index()\n", + " test_df = test_df.rename(columns={\"V\": \"y\"})\n", + " test_df = test_df.dropna()\n", + " test_df[\"split\"] = \"test\"\n", + " test_df = test_df.merge(len_series, on=\"unique_id\")\n", + " test_df[\"ds\"] = test_df[\"ds\"] + test_df[\"len_serie\"] - 1\n", + " test_df = test_df[[\"unique_id\", \"ds\", \"y\", \"split\"]]\n", + "\n", + " df = pd.concat((train_df, test_df))\n", + " df = df.sort_values(by=[\"unique_id\", \"ds\"]).reset_index(drop=True)\n", + "\n", + " # Create column with dates with freq of dataset\n", + " len_series = df.groupby(\"unique_id\").agg({\"ds\": \"max\"}).reset_index()\n", + " dates = []\n", + " for i in range(len(len_series)):\n", + " len_serie = len_series.iloc[i, 1]\n", + " ranges = pd.date_range(start=\"1970/01/01\", periods=len_serie, freq=freq)\n", + " dates += list(ranges)\n", + " df.loc[:, \"ds\"] = dates\n", + "\n", + " df = df.merge(m4_info, left_on=[\"unique_id\"], right_on=[\"M4id\"])\n", + " df.drop(columns=[\"M4id\"], inplace=True)\n", + " df = df.rename(columns={\"category\": \"x\"})\n", + "\n", + " X_train_df = df[df[\"split\"] == \"train\"].filter(items=[\"unique_id\", \"ds\", \"x\"])\n", + " y_train_df = df[df[\"split\"] == \"train\"].filter(items=[\"unique_id\", \"ds\", \"y\"])\n", + " X_test_df = df[df[\"split\"] == \"test\"].filter(items=[\"unique_id\", \"ds\", \"x\"])\n", + " y_test_df = df[df[\"split\"] == \"test\"].filter(items=[\"unique_id\", \"ds\", \"y\"])\n", + "\n", + " X_train_df = X_train_df.reset_index(drop=True)\n", + " y_train_df = y_train_df.reset_index(drop=True)\n", + " X_test_df = X_test_df.reset_index(drop=True)\n", + " y_test_df = y_test_df.reset_index(drop=True)\n", + "\n", + " return X_train_df, y_train_df, X_test_df, y_test_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "\n", + "def prepare_m4_data(dataset_name, directory, num_obs):\n", + " \"\"\"Pipeline that obtains M4 times series, tranforms it and\n", + " gets naive2 predictions.\n", + "\n", + " Parameters\n", + " ----------\n", + " dataset_name: str\n", + " Frequency of the data. Example: 'Yearly'.\n", + " directory: str\n", + " Custom directory where data will be saved.\n", + " num_obs: int\n", + " Number of time series to return.\n", + " \"\"\"\n", + " m4info_filename = maybe_download(\"M4-info.csv\", directory)\n", + "\n", + " dailytrain_filename = maybe_download(\"Train/Daily-train.csv\", directory)\n", + " hourlytrain_filename = maybe_download(\"Train/Hourly-train.csv\", directory)\n", + " monthlytrain_filename = maybe_download(\"Train/Monthly-train.csv\", directory)\n", + " quarterlytrain_filename = maybe_download(\"Train/Quarterly-train.csv\", directory)\n", + " weeklytrain_filename = maybe_download(\"Train/Weekly-train.csv\", directory)\n", + " yearlytrain_filename = maybe_download(\"Train/Yearly-train.csv\", directory)\n", + "\n", + " dailytest_filename = maybe_download(\"Test/Daily-test.csv\", directory)\n", + " hourlytest_filename = maybe_download(\"Test/Hourly-test.csv\", directory)\n", + " monthlytest_filename = maybe_download(\"Test/Monthly-test.csv\", directory)\n", + " quarterlytest_filename = maybe_download(\"Test/Quarterly-test.csv\", directory)\n", + " weeklytest_filename = maybe_download(\"Test/Weekly-test.csv\", directory)\n", + " yearlytest_filename = maybe_download(\"Test/Yearly-test.csv\", directory)\n", + " print(\"\\n\")\n", + "\n", + " X_train_df, y_train_df, X_test_df, y_test_df = m4_parser(\n", + " dataset_name, directory, num_obs\n", + " )\n", + "\n", + " return X_train_df, y_train_df, X_test_df, y_test_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | hide\n", + "import nbdev\n", + "\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/_quarto.yml b/nbs/_quarto.yml new file mode 100644 index 0000000..006b406 --- /dev/null +++ b/nbs/_quarto.yml @@ -0,0 +1,20 @@ +project: + type: website + +format: + html: + theme: cosmo + css: styles.css + toc: true + +website: + twitter-card: true + open-graph: true + repo-actions: [issue] + navbar: + background: primary + search: true + sidebar: + style: floating + +metadata-files: [nbdev.yml, sidebar.yml] diff --git a/nbs/index.ipynb b/nbs/index.ipynb new file mode 100644 index 0000000..3491c78 --- /dev/null +++ b/nbs/index.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# tsfeatures\n", + "\n", + "Calculates various features from time series data. Python implementation of the R package _[tsfeatures](https://github.com/robjhyndman/tsfeatures)_.\n", + "\n", + "# Installation\n", + "\n", + "You can install the *released* version of `tsfeatures` from the [Python package index](pypi.org) with:\n", + "\n", + "``` python\n", + "pip install tsfeatures\n", + "```\n", + "\n", + "# Usage\n", + "\n", + "The `tsfeatures` main function calculates by default the features used by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their implementation of the FFORMA model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features).\n", + "\n", + "```python\n", + "from tsfeatures import tsfeatures\n", + "```\n", + "\n", + "This function receives a panel pandas df with columns `unique_id`, `ds`, `y` and optionally the frequency of the data.\n", + "\n", + "\n", + "\n", + "```python\n", + "tsfeatures(panel, freq=7)\n", + "```\n", + "\n", + "By default (`freq=None`) the function will try to infer the frequency of each time series (using `infer_freq` from `pandas` on the `ds` column) and assign a seasonal period according to the built-in dictionary `FREQS`:\n", + "\n", + "```python\n", + "FREQS = {'H': 24, 'D': 1,\n", + " 'M': 12, 'Q': 4,\n", + " 'W':1, 'Y': 1}\n", + "```\n", + "\n", + "You can use your own dictionary using the `dict_freqs` argument:\n", + "\n", + "```python\n", + "tsfeatures(panel, dict_freqs={'D': 7, 'W': 52})\n", + "```\n", + "\n", + "## List of available features\n", + "\n", + "| Features |||\n", + "|:--------|:------|:-------------|\n", + "|acf_features|heterogeneity|series_length|\n", + "|arch_stat|holt_parameters|sparsity|\n", + "|count_entropy|hurst|stability|\n", + "|crossing_points|hw_parameters|stl_features|\n", + "|entropy|intervals|unitroot_kpss|\n", + "|flat_spots|lumpiness|unitroot_pp|\n", + "|frequency|nonlinearity||\n", + "|guerrero|pacf_features||\n", + "\n", + "See the docs for a description of the features. To use a particular feature included in the package you need to import it:\n", + "\n", + "```python\n", + "from tsfeatures import acf_features\n", + "\n", + "tsfeatures(panel, freq=7, features=[acf_features])\n", + "```\n", + "\n", + "You can also define your own function and use it together with the included features:\n", + "\n", + "```python\n", + "def number_zeros(x, freq):\n", + "\n", + " number = (x == 0).sum()\n", + " return {'number_zeros': number}\n", + "\n", + "tsfeatures(panel, freq=7, features=[acf_features, number_zeros])\n", + "```\n", + "\n", + "`tsfeatures` can handle functions that receives a numpy array `x` and a frequency `freq` (this parameter is needed even if you don't use it) and returns a dictionary with the feature name as a key and its value.\n", + "\n", + "## R implementation\n", + "\n", + "You can use this package to call `tsfeatures` from R inside python (you need to have installed R, the packages `forecast` and `tsfeatures`; also the python package `rpy2`):\n", + "\n", + "```python\n", + "from tsfeatures.tsfeatures_r import tsfeatures_r\n", + "\n", + "tsfeatures_r(panel, freq=7, features=[\"acf_features\"])\n", + "```\n", + "\n", + "Observe that this function receives a list of strings instead of a list of functions.\n", + "\n", + "## Comparison with the R implementation (sum of absolute differences)\n", + "\n", + "### Non-seasonal data (100 Daily M4 time series)\n", + "\n", + "| feature | diff | feature | diff | feature | diff | feature | diff |\n", + "|:----------------|-------:|:----------------|-------:|:----------------|-------:|:----------------|-------:|\n", + "| e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 |\n", + "| seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 |\n", + "| nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 |\n", + "| linearity | 0 | crossing_points | 0 | nonlinearity | 0 | garch_r2 | 4.74 |\n", + "| hw_gamma | 0 | lumpiness | 0 | diff2x_pacf5 | 0 | hurst | 5.45 |\n", + "| hw_beta | 0 | diff1x_pacf5 | 0 | unitroot_kpss | 0 | garch_acf | 5.53 |\n", + "| hw_alpha | 0 | diff1_acf10 | 0 | x_pacf5 | 0 | entropy | 11.65 |\n", + "| trend | 0 | arch_lm | 0 | x_acf10 | 0 |\n", + "| flat_spots | 0 | diff1_acf1 | 0 | unitroot_pp | 0 |\n", + "| series_length | 0 | stability | 0 | arch_r2 | 1.37 |\n", + "\n", + "To replicate this results use:\n", + "\n", + "``` console\n", + "python -m tsfeatures.compare_with_r --results_directory /some/path\n", + " --dataset_name Daily --num_obs 100\n", + "```\n", + "\n", + "### Sesonal data (100 Hourly M4 time series)\n", + "\n", + "| feature | diff | feature | diff | feature | diff | feature | diff |\n", + "|:------------------|-------:|:-------------|-----:|:----------|--------:|:-----------|--------:|\n", + "| series_length | 0 |seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 |\n", + "| flat_spots | 0 |x_acf1|0| arch_r2 | 2.29 | hw_beta | 32.39 |\n", + "| nperiods | 0 |unitroot_kpss|0| alpha | 2.52 | trough | 35 |\n", + "| crossing_points | 0 |nonlinearity|0| beta | 3.67 | peak | 69 |\n", + "| seasonal_period | 0 |diff1_acf10|0| linearity | 3.97 |\n", + "| lumpiness | 0 |x_acf10|0| curvature | 4.8 |\n", + "| stability | 0 |seas_pacf|0| e_acf10 | 7.05 |\n", + "| arch_lm | 0 |unitroot_pp|0| garch_r2 | 7.32 |\n", + "| diff2_acf1 | 0 |spike|0| hw_gamma | 7.32 |\n", + "| diff2_acf10 | 0 |seasonal_strength|0.79| hw_alpha | 7.47 |\n", + "| diff1_acf1 | 0 |e_acf1|1.67| garch_acf | 7.53 |\n", + "| diff2x_pacf5 | 0 |arch_acf|2.18| entropy | 9.45 |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | hide\n", + "import nbdev\n", + "\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# tsfeatures\n", + "\n", + "Calculates various features from time series data. Python implementation of the R package _[tsfeatures](https://github.com/robjhyndman/tsfeatures)_.\n", + "\n", + "# Installation\n", + "\n", + "You can install the *released* version of `tsfeatures` from the [Python package index](pypi.org) with:\n", + "\n", + "``` python\n", + "pip install tsfeatures\n", + "```\n", + "\n", + "# Usage\n", + "\n", + "The `tsfeatures` main function calculates by default the features used by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their implementation of the FFORMA model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features).\n", + "\n", + "```python\n", + "from tsfeatures import tsfeatures\n", + "```\n", + "\n", + "This function receives a panel pandas df with columns `unique_id`, `ds`, `y` and optionally the frequency of the data.\n", + "\n", + "\n", + "\n", + "```python\n", + "tsfeatures(panel, freq=7)\n", + "```\n", + "\n", + "By default (`freq=None`) the function will try to infer the frequency of each time series (using `infer_freq` from `pandas` on the `ds` column) and assign a seasonal period according to the built-in dictionary `FREQS`:\n", + "\n", + "```python\n", + "FREQS = {'H': 24, 'D': 1,\n", + " 'M': 12, 'Q': 4,\n", + " 'W':1, 'Y': 1}\n", + "```\n", + "\n", + "You can use your own dictionary using the `dict_freqs` argument:\n", + "\n", + "```python\n", + "tsfeatures(panel, dict_freqs={'D': 7, 'W': 52})\n", + "```\n", + "\n", + "## List of available features\n", + "\n", + "| Features |||\n", + "|:--------|:------|:-------------|\n", + "|acf_features|heterogeneity|series_length|\n", + "|arch_stat|holt_parameters|sparsity|\n", + "|count_entropy|hurst|stability|\n", + "|crossing_points|hw_parameters|stl_features|\n", + "|entropy|intervals|unitroot_kpss|\n", + "|flat_spots|lumpiness|unitroot_pp|\n", + "|frequency|nonlinearity||\n", + "|guerrero|pacf_features||\n", + "\n", + "See the docs for a description of the features. To use a particular feature included in the package you need to import it:\n", + "\n", + "```python\n", + "from tsfeatures import acf_features\n", + "\n", + "tsfeatures(panel, freq=7, features=[acf_features])\n", + "```\n", + "\n", + "You can also define your own function and use it together with the included features:\n", + "\n", + "```python\n", + "def number_zeros(x, freq):\n", + "\n", + " number = (x == 0).sum()\n", + " return {'number_zeros': number}\n", + "\n", + "tsfeatures(panel, freq=7, features=[acf_features, number_zeros])\n", + "```\n", + "\n", + "`tsfeatures` can handle functions that receives a numpy array `x` and a frequency `freq` (this parameter is needed even if you don't use it) and returns a dictionary with the feature name as a key and its value.\n", + "\n", + "## R implementation\n", + "\n", + "You can use this package to call `tsfeatures` from R inside python (you need to have installed R, the packages `forecast` and `tsfeatures`; also the python package `rpy2`):\n", + "\n", + "```python\n", + "from tsfeatures.tsfeatures_r import tsfeatures_r\n", + "\n", + "tsfeatures_r(panel, freq=7, features=[\"acf_features\"])\n", + "```\n", + "\n", + "Observe that this function receives a list of strings instead of a list of functions.\n", + "\n", + "## Comparison with the R implementation (sum of absolute differences)\n", + "\n", + "### Non-seasonal data (100 Daily M4 time series)\n", + "\n", + "| feature | diff | feature | diff | feature | diff | feature | diff |\n", + "|:----------------|-------:|:----------------|-------:|:----------------|-------:|:----------------|-------:|\n", + "| e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 |\n", + "| seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 |\n", + "| nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 |\n", + "| linearity | 0 | crossing_points | 0 | nonlinearity | 0 | garch_r2 | 4.74 |\n", + "| hw_gamma | 0 | lumpiness | 0 | diff2x_pacf5 | 0 | hurst | 5.45 |\n", + "| hw_beta | 0 | diff1x_pacf5 | 0 | unitroot_kpss | 0 | garch_acf | 5.53 |\n", + "| hw_alpha | 0 | diff1_acf10 | 0 | x_pacf5 | 0 | entropy | 11.65 |\n", + "| trend | 0 | arch_lm | 0 | x_acf10 | 0 |\n", + "| flat_spots | 0 | diff1_acf1 | 0 | unitroot_pp | 0 |\n", + "| series_length | 0 | stability | 0 | arch_r2 | 1.37 |\n", + "\n", + "To replicate this results use:\n", + "\n", + "``` console\n", + "python -m tsfeatures.compare_with_r --results_directory /some/path\n", + " --dataset_name Daily --num_obs 100\n", + "```\n", + "\n", + "### Sesonal data (100 Hourly M4 time series)\n", + "\n", + "| feature | diff | feature | diff | feature | diff | feature | diff |\n", + "|:------------------|-------:|:-------------|-----:|:----------|--------:|:-----------|--------:|\n", + "| series_length | 0 |seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 |\n", + "| flat_spots | 0 |x_acf1|0| arch_r2 | 2.29 | hw_beta | 32.39 |\n", + "| nperiods | 0 |unitroot_kpss|0| alpha | 2.52 | trough | 35 |\n", + "| crossing_points | 0 |nonlinearity|0| beta | 3.67 | peak | 69 |\n", + "| seasonal_period | 0 |diff1_acf10|0| linearity | 3.97 |\n", + "| lumpiness | 0 |x_acf10|0| curvature | 4.8 |\n", + "| stability | 0 |seas_pacf|0| e_acf10 | 7.05 |\n", + "| arch_lm | 0 |unitroot_pp|0| garch_r2 | 7.32 |\n", + "| diff2_acf1 | 0 |spike|0| hw_gamma | 7.32 |\n", + "| diff2_acf10 | 0 |seasonal_strength|0.79| hw_alpha | 7.47 |\n", + "| diff1_acf1 | 0 |e_acf1|1.67| garch_acf | 7.53 |\n", + "| diff2x_pacf5 | 0 |arch_acf|2.18| entropy | 9.45 |\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Build](https://github.com/FedericoGarza/tsfeatures/workflows/Python%20package/badge.svg)](https://github.com/FedericoGarza/tsfeatures/tree/master)\n", + "[![PyPI version fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/tsfeatures/)\n", + "[![Downloads](https://pepy.tech/badge/tsfeatures)](https://pepy.tech/project/tsfeatures)\n", + "[![Python 3.6+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370+/)\n", + "[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE)\n", + "\n", + "# tsfeatures\n", + "\n", + "Calculates various features from time series data. Python implementation of the R package _[tsfeatures](https://github.com/robjhyndman/tsfeatures)_.\n", + "\n", + "# Installation\n", + "\n", + "You can install the *released* version of `tsfeatures` from the [Python package index](pypi.org) with:\n", + "\n", + "``` python\n", + "pip install tsfeatures\n", + "```\n", + "\n", + "# Usage\n", + "\n", + "The `tsfeatures` main function calculates by default the features used by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their implementation of the FFORMA model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features).\n", + "\n", + "```python\n", + "from tsfeatures import tsfeatures\n", + "```\n", + "\n", + "This function receives a panel pandas df with columns `unique_id`, `ds`, `y` and optionally the frequency of the data.\n", + "\n", + "\n", + "\n", + "```python\n", + "tsfeatures(panel, freq=7)\n", + "```\n", + "\n", + "By default (`freq=None`) the function will try to infer the frequency of each time series (using `infer_freq` from `pandas` on the `ds` column) and assign a seasonal period according to the built-in dictionary `FREQS`:\n", + "\n", + "```python\n", + "FREQS = {'H': 24, 'D': 1,\n", + " 'M': 12, 'Q': 4,\n", + " 'W':1, 'Y': 1}\n", + "```\n", + "\n", + "You can use your own dictionary using the `dict_freqs` argument:\n", + "\n", + "```python\n", + "tsfeatures(panel, dict_freqs={'D': 7, 'W': 52})\n", + "```\n", + "\n", + "## List of available features\n", + "\n", + "| Features |||\n", + "|:--------|:------|:-------------|\n", + "|acf_features|heterogeneity|series_length|\n", + "|arch_stat|holt_parameters|sparsity|\n", + "|count_entropy|hurst|stability|\n", + "|crossing_points|hw_parameters|stl_features|\n", + "|entropy|intervals|unitroot_kpss|\n", + "|flat_spots|lumpiness|unitroot_pp|\n", + "|frequency|nonlinearity||\n", + "|guerrero|pacf_features||\n", + "\n", + "See the docs for a description of the features. To use a particular feature included in the package you need to import it:\n", + "\n", + "```python\n", + "from tsfeatures import acf_features\n", + "\n", + "tsfeatures(panel, freq=7, features=[acf_features])\n", + "```\n", + "\n", + "You can also define your own function and use it together with the included features:\n", + "\n", + "```python\n", + "def number_zeros(x, freq):\n", + "\n", + " number = (x == 0).sum()\n", + " return {'number_zeros': number}\n", + "\n", + "tsfeatures(panel, freq=7, features=[acf_features, number_zeros])\n", + "```\n", + "\n", + "`tsfeatures` can handle functions that receives a numpy array `x` and a frequency `freq` (this parameter is needed even if you don't use it) and returns a dictionary with the feature name as a key and its value.\n", + "\n", + " \n", + "\n", + "# Authors\n", + "\n", + "* **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza)\n", + "* **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier)\n", + "* **Cristian Challu** - [cristianchallu](https://github.com/cristianchallu)\n", + "* **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez)\n", + "* **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar)\n", + "* **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/nbdev.yml b/nbs/nbdev.yml new file mode 100644 index 0000000..eb58aae --- /dev/null +++ b/nbs/nbdev.yml @@ -0,0 +1,9 @@ +project: + output-dir: _docs + +website: + title: "tsfeatures" + site-url: "https://jope35.github.io/tsfeatures" + description: "porting tsfeature to nbdev" + repo-branch: main + repo-url: "https://github.com/jope35/tsfeatures" diff --git a/nbs/styles.css b/nbs/styles.css new file mode 100644 index 0000000..66ccc49 --- /dev/null +++ b/nbs/styles.css @@ -0,0 +1,37 @@ +.cell { + margin-bottom: 1rem; +} + +.cell > .sourceCode { + margin-bottom: 0; +} + +.cell-output > pre { + margin-bottom: 0; +} + +.cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre { + margin-left: 0.8rem; + margin-top: 0; + background: none; + border-left: 2px solid lightsalmon; + border-top-left-radius: 0; + border-top-right-radius: 0; +} + +.cell-output > .sourceCode { + border: none; +} + +.cell-output > .sourceCode { + background: none; + margin-top: 0; +} + +div.description { + padding-left: 2px; + padding-top: 5px; + font-style: italic; + font-size: 135%; + opacity: 70%; +} diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c2e88d9..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -antropy>=0.1.4 -arch>=4.14 -pandas>=1.0.5 -scikit-learn>=0.23.1 -statsmodels>=0.12.2 -supersmoother>=0.4 diff --git a/settings.ini b/settings.ini new file mode 100644 index 0000000..ba87868 --- /dev/null +++ b/settings.ini @@ -0,0 +1,47 @@ +[DEFAULT] +# All sections below are required unless otherwise specified. +# See https://github.com/fastai/nbdev/blob/master/settings.ini for examples. + +### Python library ### +repo = tsfeatures +lib_name = %(repo)s +version = 1.0.0 +min_python = 3.9 +license = apache2 + +### nbdev ### +doc_path = _docs +lib_path = tsfeatures +nbs_path = nbs +recursive = True +tst_flags = notest +put_version_in_init = True + +### Docs ### +branch = main +custom_sidebar = False +doc_host = https://%(user)s.github.io +doc_baseurl = /%(repo)s +git_url = https://github.com/%(user)s/%(repo)s +title = %(lib_name)s + +### PyPI ### +audience = Developers +author = Joost de Theije +author_email = info@example.com +copyright = 2023 onwards, %(author)s +description = porting tsfeature to nbdev +keywords = nbdev jupyter notebook python +language = English +status = 3 +user = jope35 + +### Optional ### +requirements = antropy>=0.1.4 arch>=4.14 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.12.2 supersmoother>=0.4 tqdm +dev_requirements = nbdev ruff pre-commit +# console_scripts = + +black_formatting = False +jupyter_hooks = True +clean_ids = True +clear_all = False diff --git a/setup.py b/setup.py deleted file mode 100644 index a847a2b..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -import setuptools - -with open("README.md", "r") as fh: - long_description = fh.read() - -setuptools.setup( - name="tsfeatures", - version="0.4.5", - description="Calculates various features from time series data.", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/Nixtla/tsfeatures", - packages=setuptools.find_packages(), - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.7', - install_requires=[ - "antropy>=0.1.4", - "arch>=4.11", - "pandas>=1.0.5", - "scikit-learn>=0.23.1", - "statsmodels>=0.13.2", - "supersmoother>=0.4" - ] -) diff --git a/tsfeatures/__init__.py b/tsfeatures/__init__.py index 26574de..5becc17 100644 --- a/tsfeatures/__init__.py +++ b/tsfeatures/__init__.py @@ -1,4 +1 @@ -#!/usr/bin/env python -# coding: utf-8 - -from .tsfeatures import * +__version__ = "1.0.0" diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py new file mode 100644 index 0000000..b0b9d4c --- /dev/null +++ b/tsfeatures/_modidx.py @@ -0,0 +1,41 @@ +# Autogenerated by nbdev + +d = { 'settings': { 'branch': 'main', + 'doc_baseurl': '/tsfeatures', + 'doc_host': 'https://jope35.github.io', + 'git_url': 'https://github.com/jope35/tsfeatures', + 'lib_path': 'tsfeatures'}, + 'syms': { 'tsfeatures.features': { 'tsfeatures.features.acf_features': ('features.html#acf_features', 'tsfeatures/features.py'), + 'tsfeatures.features.arch_stat': ('features.html#arch_stat', 'tsfeatures/features.py'), + 'tsfeatures.features.count_entropy': ('features.html#count_entropy', 'tsfeatures/features.py'), + 'tsfeatures.features.crossing_points': ('features.html#crossing_points', 'tsfeatures/features.py'), + 'tsfeatures.features.entropy': ('features.html#entropy', 'tsfeatures/features.py'), + 'tsfeatures.features.flat_spots': ('features.html#flat_spots', 'tsfeatures/features.py'), + 'tsfeatures.features.frequency': ('features.html#frequency', 'tsfeatures/features.py'), + 'tsfeatures.features.guerrero': ('features.html#guerrero', 'tsfeatures/features.py'), + 'tsfeatures.features.heterogeneity': ('features.html#heterogeneity', 'tsfeatures/features.py'), + 'tsfeatures.features.holt_parameters': ('features.html#holt_parameters', 'tsfeatures/features.py'), + 'tsfeatures.features.hurst': ('features.html#hurst', 'tsfeatures/features.py'), + 'tsfeatures.features.hw_parameters': ('features.html#hw_parameters', 'tsfeatures/features.py'), + 'tsfeatures.features.intervals': ('features.html#intervals', 'tsfeatures/features.py'), + 'tsfeatures.features.lumpiness': ('features.html#lumpiness', 'tsfeatures/features.py'), + 'tsfeatures.features.nonlinearity': ('features.html#nonlinearity', 'tsfeatures/features.py'), + 'tsfeatures.features.pacf_features': ('features.html#pacf_features', 'tsfeatures/features.py'), + 'tsfeatures.features.series_length': ('features.html#series_length', 'tsfeatures/features.py'), + 'tsfeatures.features.sparsity': ('features.html#sparsity', 'tsfeatures/features.py'), + 'tsfeatures.features.stability': ('features.html#stability', 'tsfeatures/features.py'), + 'tsfeatures.features.stl_features': ('features.html#stl_features', 'tsfeatures/features.py'), + 'tsfeatures.features.unitroot_kpss': ('features.html#unitroot_kpss', 'tsfeatures/features.py'), + 'tsfeatures.features.unitroot_pp': ('features.html#unitroot_pp', 'tsfeatures/features.py')}, + 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), + 'tsfeatures.m4_data.maybe_download': ('m4_data.html#maybe_download', 'tsfeatures/m4_data.py'), + 'tsfeatures.m4_data.prepare_m4_data': ('m4_data.html#prepare_m4_data', 'tsfeatures/m4_data.py')}, + 'tsfeatures.tsfeatures': { 'tsfeatures.tsfeatures._get_feats': ('tsfeatures_core.html#_get_feats', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.tsfeatures': ('tsfeatures_core.html#tsfeatures', 'tsfeatures/tsfeatures.py')}, + 'tsfeatures.utils': { 'tsfeatures.utils.embed': ('utils.html#embed', 'tsfeatures/utils.py'), + 'tsfeatures.utils.hurst_exponent': ('utils.html#hurst_exponent', 'tsfeatures/utils.py'), + 'tsfeatures.utils.lambda_coef_var': ('utils.html#lambda_coef_var', 'tsfeatures/utils.py'), + 'tsfeatures.utils.poly': ('utils.html#poly', 'tsfeatures/utils.py'), + 'tsfeatures.utils.scalets': ('utils.html#scalets', 'tsfeatures/utils.py'), + 'tsfeatures.utils.terasvirta_test': ('utils.html#terasvirta_test', 'tsfeatures/utils.py'), + 'tsfeatures.utils.ur_pp': ('utils.html#ur_pp', 'tsfeatures/utils.py')}}} diff --git a/tsfeatures/compare_with_r.py b/tsfeatures/compare_with_r.py deleted file mode 100644 index 3937a42..0000000 --- a/tsfeatures/compare_with_r.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import argparse -import sys -import time - -from tsfeatures import tsfeatures -from .tsfeatures_r import tsfeatures_r -from .m4_data import prepare_m4_data -from .utils import FREQS - - -def compare_features_m4(dataset_name, directory, num_obs=1000000): - _, y_train_df, _, _ = prepare_m4_data(dataset_name=dataset_name, - directory = directory, - num_obs=num_obs) - - freq = FREQS[dataset_name[0]] - - print('Calculating python features...') - init = time.time() - py_feats = tsfeatures(y_train_df, freq=freq).set_index('unique_id') - print('Total time: ', time.time() - init) - - print('Calculating r features...') - init = time.time() - r_feats = tsfeatures_r(y_train_df, freq=freq, parallel=True).set_index('unique_id') - print('Total time: ', time.time() - init) - - diff = py_feats.sub(r_feats, 1).abs().sum(0).sort_values() - - return diff - -def main(args): - if args.num_obs: - num_obs = args.num_obs - else: - num_obs = 100000 - - if args.dataset_name: - datasets = [args.dataset_name] - else: - datasets = ['Daily', 'Hourly', 'Yearly', 'Quarterly', 'Weekly', 'Monthly'] - - for dataset_name in datasets: - diff = compare_features_m4(dataset_name, args.results_directory, num_obs) - diff.name = 'diff' - diff = diff.rename_axis('feature') - diff = diff.reset_index() - diff['diff'] = diff['diff'].map('{:.2f}'.format) - save_dir = args.results_directory + '/' + dataset_name + '_comparison_' - save_dir += str(num_obs) + '.csv' - diff.to_csv(save_dir, index=False) - - print('Comparison saved at: ', save_dir) - -if __name__=='__main__': - - parser = argparse.ArgumentParser(description='Get features for M4 data') - - parser.add_argument("--results_directory", required=True, type=str, - help="directory where M4 data will be downloaded") - parser.add_argument("--num_obs", required=False, type=int, - help="number of M4 time series to be tested (uses all data by default)") - parser.add_argument("--dataset_name", required=False, type=str, - help="type of dataset to get features") - - args = parser.parse_args() - - main(args) diff --git a/tsfeatures/features.py b/tsfeatures/features.py new file mode 100644 index 0000000..ea7ee33 --- /dev/null +++ b/tsfeatures/features.py @@ -0,0 +1,857 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_features.ipynb. + +# %% auto 0 +__all__ = ['acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'guerrero', + 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', 'nonlinearity', + 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', 'unitroot_pp'] + +# %% ../nbs/01_features.ipynb 3 +import warnings + +# %% ../nbs/01_features.ipynb 4 +warnings.warn = lambda *a, **kw: False + +# %% ../nbs/01_features.ipynb 5 +import os + +# %% ../nbs/01_features.ipynb 6 +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" + +# %% ../nbs/01_features.ipynb 7 +from itertools import groupby +from math import e # maybe change with numpy e +from typing import Dict + +import numpy as np +import pandas as pd +from antropy import spectral_entropy +from arch import arch_model +from scipy.optimize import minimize_scalar +from sklearn.linear_model import LinearRegression +from statsmodels.api import OLS, add_constant +from statsmodels.tsa.ar_model import AR +from statsmodels.tsa.holtwinters import ExponentialSmoothing +from statsmodels.tsa.seasonal import STL +from statsmodels.tsa.stattools import acf, kpss, pacf +from supersmoother import SuperSmoother + +from tsfeatures.utils import ( + embed, + hurst_exponent, + lambda_coef_var, + poly, + terasvirta_test, + ur_pp, +) + +# %% ../nbs/01_features.ipynb 8 +def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: + """Calculates autocorrelation function features. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'x_acf1': First autocorrelation coefficient. + 'x_acf10': Sum of squares of first 10 autocorrelation coefficients. + 'diff1_acf1': First autocorrelation ciefficient of differenced series. + 'diff1_acf10': Sum of squared of first 10 autocorrelation coefficients + of differenced series. + 'diff2_acf1': First autocorrelation coefficient of twice-differenced series. + 'diff2_acf10': Sum of squared of first 10 autocorrelation coefficients of + twice-differenced series. + + Only for seasonal data (freq > 1). + 'seas_acf1': Autocorrelation coefficient at the first seasonal lag. + """ + m = freq + size_x = len(x) + + acfx = acf(x, nlags=max(m, 10), fft=False) + if size_x > 10: + acfdiff1x = acf(np.diff(x, n=1), nlags=10, fft=False) + else: + acfdiff1x = [np.nan] * 2 + + if size_x > 11: + acfdiff2x = acf(np.diff(x, n=2), nlags=10, fft=False) + else: + acfdiff2x = [np.nan] * 2 + # first autocorrelation coefficient + acf_1 = acfx[1] + # sum of squares of first 10 autocorrelation coefficients + sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan + # first autocorrelation ciefficient of differenced series + diff1_acf1 = acfdiff1x[1] + # sum of squared of first 10 autocorrelation coefficients of differenced series + diff1_acf10 = np.sum((acfdiff1x[1:11]) ** 2) if size_x > 10 else np.nan + # first autocorrelation coefficient of twice-differenced series + diff2_acf1 = acfdiff2x[1] + # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series + diff2_acf10 = np.sum((acfdiff2x[1:11]) ** 2) if size_x > 11 else np.nan + + output = { + "x_acf1": acf_1, + "x_acf10": sum_of_sq_acf10, + "diff1_acf1": diff1_acf1, + "diff1_acf10": diff1_acf10, + "diff2_acf1": diff2_acf1, + "diff2_acf10": diff2_acf10, + } + + if m > 1: + output["seas_acf1"] = acfx[m] if len(acfx) > m else np.nan + + return output + +# %% ../nbs/01_features.ipynb 11 +def arch_stat( + x: np.array, freq: int = 1, lags: int = 12, demean: bool = True +) -> Dict[str, float]: + """Arch model features. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'arch_lm': R^2 value of an autoregressive model of order lags applied to x**2. + """ + if len(x) <= lags + 1: + return {"arch_lm": np.nan} + if demean: + x -= np.mean(x) + + size_x = len(x) + mat = embed(x**2, lags + 1) + X = mat[:, 1:] + y = np.vstack(mat[:, 0]) + + try: + r_squared = LinearRegression().fit(X, y).score(X, y) + except: + r_squared = np.nan + + return {"arch_lm": r_squared} + +# %% ../nbs/01_features.ipynb 14 +def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: + """Count entropy. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'count_entropy': Entropy using only positive data. + """ + entropy = x[x > 0] * np.log(x[x > 0]) + entropy = -entropy.sum() + + return {"count_entropy": entropy} + +# %% ../nbs/01_features.ipynb 15 +def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: + """Crossing points. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'crossing_points': Number of times that x crosses the median. + """ + midline = np.median(x) + ab = x <= midline + lenx = len(x) + p1 = ab[: (lenx - 1)] + p2 = ab[1:] + cross = (p1 & (~p2)) | (p2 & (~p1)) + + return {"crossing_points": cross.sum()} + +# %% ../nbs/01_features.ipynb 16 +def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: + """Calculates sample entropy. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'entropy': Wrapper of the function spectral_entropy. + """ + try: + with np.errstate(divide="ignore"): + entropy = spectral_entropy(x, 1, normalize=True) + except: + entropy = np.nan + + return {"entropy": entropy} + +# %% ../nbs/01_features.ipynb 17 +def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: + """Flat spots. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'flat_spots': Number of flat spots in x. + """ + try: + cutx = pd.cut(x, bins=10, include_lowest=True, labels=False) + 1 + except: + return {"flat_spots": np.nan} + + rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max() + +# %% ../nbs/01_features.ipynb 18 +def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: + """Frequency. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'frequency': Wrapper of freq. + """ + + return {"frequency": freq} + +# %% ../nbs/01_features.ipynb 19 +def guerrero( + x: np.array, freq: int = 1, lower: int = -1, upper: int = 2 +) -> Dict[str, float]: + """Applies Guerrero's (1993) method to select the lambda which minimises the + coefficient of variation for subseries of x. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series. + lower: float + The lower bound for lambda. + upper: float + The upper bound for lambda. + + Returns + ------- + dict + 'guerrero': Minimum coefficient of variation for subseries of x. + + References + ---------- + [1] Guerrero, V.M. (1993) Time-series analysis supported by power transformations. + Journal of Forecasting, 12, 37–48. + """ + func_to_min = lambda lambda_par: lambda_coef_var(lambda_par, x=x, period=freq) + + min_ = minimize_scalar(func_to_min, bounds=[lower, upper]) + min_ = min_["fun"] + + return {"guerrero": min_} + +# %% ../nbs/01_features.ipynb 20 +def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: + """Heterogeneity. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'arch_acf': Sum of squares of the first 12 autocorrelations of the + residuals of the AR model applied to x + 'garch_acf': Sum of squares of the first 12 autocorrelations of the + residuals of the GARCH model applied to x + 'arch_r2': Function arch_stat applied to the residuals of the + AR model applied to x. + 'garch_r2': Function arch_stat applied to the residuals of the GARCH + model applied to x. + """ + m = freq + + size_x = len(x) + order_ar = min(size_x - 1, np.floor(10 * np.log10(size_x))) + order_ar = int(order_ar) + + try: + x_whitened = AR(x).fit(maxlag=order_ar, ic="aic", trend="c").resid + except: + try: + x_whitened = AR(x).fit(maxlag=order_ar, ic="aic", trend="nc").resid + except: + output = { + "arch_acf": np.nan, + "garch_acf": np.nan, + "arch_r2": np.nan, + "garch_r2": np.nan, + } + + return output + # arch and box test + x_archtest = arch_stat(x_whitened, m)["arch_lm"] + LBstat = (acf(x_whitened**2, nlags=12, fft=False)[1:] ** 2).sum() + # Fit garch model + garch_fit = arch_model(x_whitened, vol="GARCH", rescale=False).fit(disp="off") + # compare arch test before and after fitting garch + garch_fit_std = garch_fit.resid + x_garch_archtest = arch_stat(garch_fit_std, m)["arch_lm"] + # compare Box test of squared residuals before and after fittig.garch + LBstat2 = (acf(garch_fit_std**2, nlags=12, fft=False)[1:] ** 2).sum() + + output = { + "arch_acf": LBstat, + "garch_acf": LBstat2, + "arch_r2": x_archtest, + "garch_r2": x_garch_archtest, + } + + return output + +# %% ../nbs/01_features.ipynb 21 +def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: + """Fitted parameters of a Holt model. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'alpha': Level paramater of the Holt model. + 'beta': Trend parameter of the Hold model. + """ + try: + fit = ExponentialSmoothing(x, trend="add", seasonal=None).fit() + params = { + "alpha": fit.params["smoothing_level"], + "beta": fit.params["smoothing_trend"], + } + except: + params = {"alpha": np.nan, "beta": np.nan} + + return params + +# %% ../nbs/01_features.ipynb 22 +def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: + """Hurst index. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'hurst': Hurst exponent. + """ + try: + hurst_index = hurst_exponent(x) + except: + hurst_index = np.nan + + return {"hurst": hurst_index} + +# %% ../nbs/01_features.ipynb 23 +def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: + """Fitted parameters of a Holt-Winters model. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'hw_alpha': Level parameter of the HW model. + 'hw_beta': Trend parameter of the HW model. + 'hw_gamma': Seasonal parameter of the HW model. + """ + try: + fit = ExponentialSmoothing( + x, seasonal_periods=freq, trend="add", seasonal="add" + ).fit() + params = { + "hw_alpha": fit.params["smoothing_level"], + "hw_beta": fit.params["smoothing_trend"], + "hw_gamma": fit.params["smoothing_seasonal"], + } + except: + params = {"hw_alpha": np.nan, "hw_beta": np.nan, "hw_gamma": np.nan} + + return params + +# %% ../nbs/01_features.ipynb 24 +def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: + """Intervals with demand. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'intervals_mean': Mean of intervals with positive values. + 'intervals_sd': SD of intervals with positive values. + """ + x[x > 0] = 1 + + y = [sum(val) for keys, val in groupby(x, key=lambda k: k != 0) if keys != 0] + y = np.array(y) + + return {"intervals_mean": np.mean(y), "intervals_sd": np.std(y, ddof=1)} + +# %% ../nbs/01_features.ipynb 25 +def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: + """lumpiness. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'lumpiness': Variance of the variances of tiled windows. + """ + if freq == 1: + width = 10 + else: + width = freq + + nr = len(x) + lo = np.arange(0, nr, width) + up = lo + width + nsegs = nr / width + varx = [np.nanvar(x[lo[idx] : up[idx]], ddof=1) for idx in np.arange(int(nsegs))] + + if len(x) < 2 * width: + lumpiness = 0 + else: + lumpiness = np.nanvar(varx, ddof=1) + + return {"lumpiness": lumpiness} + +# %% ../nbs/01_features.ipynb 26 +def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: + """Nonlinearity. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'nonlinearity': 10 t**2/len(x) where t is the statistic used in + Terasvirta's test. + """ + try: + test = terasvirta_test(x) + test = 10 * test / len(x) + except: + test = np.nan + + return {"nonlinearity": test} + +# %% ../nbs/01_features.ipynb 27 +def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: + """Calculates partial autocorrelation function features. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'x_pacf5': Sum of squares of the first 5 partial autocorrelation + coefficients. + 'diff1x_pacf5': Sum of squares of the first 5 partial autocorrelation + coefficients of differenced series. + 'diff2x_pacf5': Sum of squares of the first 5 partial autocorrelation + coefficients of twice-differenced series. + + Only for seasonal data (freq > 1). + 'seas_pacf': Partial autocorrelation + coefficient at the first seasonal lag. + """ + m = freq + + nlags_ = max(m, 5) + + if len(x) > 1: + try: + pacfx = pacf(x, nlags=nlags_, method="ldb") + except: + pacfx = np.nan + else: + pacfx = np.nan + # Sum of first 6 PACs squared + if len(x) > 5 and not np.all(np.isnan(pacfx)): + pacf_5 = np.sum(pacfx[1:6] ** 2) + else: + pacf_5 = np.nan + # Sum of first 5 PACs of difference series squared + if len(x) > 6: + try: + diff1_pacf = pacf(np.diff(x, n=1), nlags=5, method="ldb")[1:6] + diff1_pacf_5 = np.sum(diff1_pacf**2) + except: + diff1_pacf_5 = np.nan + else: + diff1_pacf_5 = np.nan + # Sum of first 5 PACs of twice differenced series squared + if len(x) > 7: + try: + diff2_pacf = pacf(np.diff(x, n=2), nlags=5, method="ldb")[1:6] + diff2_pacf_5 = np.sum(diff2_pacf**2) + except: + diff2_pacf_5 = np.nan + else: + diff2_pacf_5 = np.nan + + output = { + "x_pacf5": pacf_5, + "diff1x_pacf5": diff1_pacf_5, + "diff2x_pacf5": diff2_pacf_5, + } + + if m > 1: + output["seas_pacf"] = pacfx[m] if len(pacfx) > m else np.nan + + return output + +# %% ../nbs/01_features.ipynb 28 +def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: + """Series length. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'series_length': Wrapper of len(x). + """ + + return {"series_length": len(x)} + +# %% ../nbs/01_features.ipynb 29 +def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: + """Sparsity. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'sparsity': Average obs with zero values. + """ + + return {"sparsity": np.mean(x == 0)} + +# %% ../nbs/01_features.ipynb 30 +def stability(x: np.array, freq: int = 1) -> Dict[str, float]: + """Stability. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'stability': Variance of the means of tiled windows. + """ + if freq == 1: + width = 10 + else: + width = freq + + nr = len(x) + lo = np.arange(0, nr, width) + up = lo + width + nsegs = nr / width + meanx = [np.nanmean(x[lo[idx] : up[idx]]) for idx in np.arange(int(nsegs))] + + if len(x) < 2 * width: + stability = 0 + else: + stability = np.nanvar(meanx, ddof=1) + + return {"stability": stability} + +# %% ../nbs/01_features.ipynb 31 +def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: + """Calculates seasonal trend using loess decomposition. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'nperiods': Number of seasonal periods in x. + 'seasonal_period': Frequency of the time series. + 'trend': Strength of trend. + 'spike': Measures "spikiness" of x. + 'linearity': Linearity of x based on the coefficients of an + orthogonal quadratic regression. + 'curvature': Curvature of x based on the coefficients of an + orthogonal quadratic regression. + 'e_acf1': acfremainder['x_acf1'] + 'e_acf10': acfremainder['x_acf10'] + + Only for sesonal data (freq > 0). + 'seasonal_strength': Strength of seasonality. + 'peak': Strength of peaks. + 'trough': Strength of trough. + """ + m = freq + nperiods = int(m > 1) + # STL fits + if m > 1: + try: + stlfit = STL(x, m, 13).fit() + except: + output = { + "nperiods": nperiods, + "seasonal_period": m, + "trend": np.nan, + "spike": np.nan, + "linearity": np.nan, + "curvature": np.nan, + "e_acf1": np.nan, + "e_acf10": np.nan, + "seasonal_strength": np.nan, + "peak": np.nan, + "trough": np.nan, + } + + return output + + trend0 = stlfit.trend + remainder = stlfit.resid + seasonal = stlfit.seasonal + else: + deseas = x + t = np.arange(len(x)) + 1 + try: + trend0 = SuperSmoother().fit(t, deseas).predict(t) + except: + output = { + "nperiods": nperiods, + "seasonal_period": m, + "trend": np.nan, + "spike": np.nan, + "linearity": np.nan, + "curvature": np.nan, + "e_acf1": np.nan, + "e_acf10": np.nan, + } + + return output + + remainder = deseas - trend0 + seasonal = np.zeros(len(x)) + # De-trended and de-seasonalized data + detrend = x - trend0 + deseason = x - seasonal + fits = x - remainder + # Summay stats + n = len(x) + varx = np.nanvar(x, ddof=1) + vare = np.nanvar(remainder, ddof=1) + vardetrend = np.nanvar(detrend, ddof=1) + vardeseason = np.nanvar(deseason, ddof=1) + # Measure of trend strength + if varx < np.finfo(float).eps: + trend = 0 + elif vardeseason / varx < 1e-10: + trend = 0 + else: + trend = max(0, min(1, 1 - vare / vardeseason)) + # Measure of seasonal strength + if m > 1: + if varx < np.finfo(float).eps: + season = 0 + elif np.nanvar(remainder + seasonal, ddof=1) < np.finfo(float).eps: + season = 0 + else: + season = max(0, min(1, 1 - vare / np.nanvar(remainder + seasonal, ddof=1))) + + peak = (np.argmax(seasonal) + 1) % m + peak = m if peak == 0 else peak + + trough = (np.argmin(seasonal) + 1) % m + trough = m if trough == 0 else trough + # Compute measure of spikiness + d = (remainder - np.nanmean(remainder)) ** 2 + varloo = (vare * (n - 1) - d) / (n - 2) + spike = np.nanvar(varloo, ddof=1) + # Compute measures of linearity and curvature + time = np.arange(n) + 1 + poly_m = poly(time, 2) + time_x = add_constant(poly_m) + coefs = OLS(trend0, time_x).fit().params + + linearity = coefs[1] + curvature = -coefs[2] + # ACF features + acfremainder = acf_features(remainder, m) + # Assemble features + output = { + "nperiods": nperiods, + "seasonal_period": m, + "trend": trend, + "spike": spike, + "linearity": linearity, + "curvature": curvature, + "e_acf1": acfremainder["x_acf1"], + "e_acf10": acfremainder["x_acf10"], + } + + if m > 1: + output["seasonal_strength"] = season + output["peak"] = peak + output["trough"] = trough + + return output + +# %% ../nbs/01_features.ipynb 32 +def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: + """Unit root kpss. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'unitroot_kpss': Statistic for the Kwiatowski et al unit root test. + """ + n = len(x) + nlags = int(4 * (n / 100) ** (1 / 4)) + + try: + test_kpss, _, _, _ = kpss(x, nlags=nlags) + except: + test_kpss = np.nan + + return {"unitroot_kpss": test_kpss} + +# %% ../nbs/01_features.ipynb 33 +def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: + """Unit root pp. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'unitroot_pp': Statistic for the Phillips-Perron unit root test. + """ + try: + test_pp = ur_pp(x) + except: + test_pp = np.nan + + return {"unitroot_pp": test_pp} diff --git a/tsfeatures/m4_data.py b/tsfeatures/m4_data.py index ac3ce68..746f202 100644 --- a/tsfeatures/m4_data.py +++ b/tsfeatures/m4_data.py @@ -1,33 +1,31 @@ -#!/usr/bin/env python -# coding: utf-8 +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_m4_data.ipynb. +# %% auto 0 +__all__ = ['seas_dict', 'SOURCE_URL', 'maybe_download', 'm4_parser', 'prepare_m4_data'] + +# %% ../nbs/04_m4_data.ipynb 3 import os -import subprocess import urllib -import numpy as np import pandas as pd -#from six.moves import urllib - -seas_dict = {'Hourly': {'seasonality': 24, 'input_size': 24, - 'output_size': 48, 'freq': 'H'}, - 'Daily': {'seasonality': 7, 'input_size': 7, - 'output_size': 14, 'freq': 'D'}, - 'Weekly': {'seasonality': 52, 'input_size': 52, - 'output_size': 13, 'freq': 'W'}, - 'Monthly': {'seasonality': 12, 'input_size': 12, - 'output_size':18, 'freq': 'M'}, - 'Quarterly': {'seasonality': 4, 'input_size': 4, - 'output_size': 8, 'freq': 'Q'}, - 'Yearly': {'seasonality': 1, 'input_size': 4, - 'output_size': 6, 'freq': 'D'}} - -SOURCE_URL = 'https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/' - - +seas_dict = { + "Hourly": {"seasonality": 24, "input_size": 24, "output_size": 48, "freq": "H"}, + "Daily": {"seasonality": 7, "input_size": 7, "output_size": 14, "freq": "D"}, + "Weekly": {"seasonality": 52, "input_size": 52, "output_size": 13, "freq": "W"}, + "Monthly": {"seasonality": 12, "input_size": 12, "output_size": 18, "freq": "M"}, + "Quarterly": {"seasonality": 4, "input_size": 4, "output_size": 8, "freq": "Q"}, + "Yearly": {"seasonality": 1, "input_size": 4, "output_size": 6, "freq": "D"}, +} + +# %% ../nbs/04_m4_data.ipynb 4 +SOURCE_URL = ( + "https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/" +) + +# %% ../nbs/04_m4_data.ipynb 5 def maybe_download(filename, directory): - """ Download the data from M4's website, unless it's already here. + """Download the data from M4's website, unless it's already here. Parameters ---------- @@ -49,12 +47,13 @@ def maybe_download(filename, directory): filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath) size = os.path.getsize(filepath) - print('Successfully downloaded', filename, size, 'bytes.') + print("Successfully downloaded", filename, size, "bytes.") return filepath +# %% ../nbs/04_m4_data.ipynb 6 def m4_parser(dataset_name, directory, num_obs=1000000): - """ Transform M4 data into a panel. + """Transform M4 data into a panel. Parameters ---------- @@ -68,60 +67,66 @@ def m4_parser(dataset_name, directory, num_obs=1000000): data_directory = directory + "/m4" train_directory = data_directory + "/Train/" test_directory = data_directory + "/Test/" - freq = seas_dict[dataset_name]['freq'] + freq = seas_dict[dataset_name]["freq"] - m4_info = pd.read_csv(data_directory+'/M4-info.csv', usecols=['M4id','category']) - m4_info = m4_info[m4_info['M4id'].str.startswith(dataset_name[0])].reset_index(drop=True) + m4_info = pd.read_csv(data_directory + "/M4-info.csv", usecols=["M4id", "category"]) + m4_info = m4_info[m4_info["M4id"].str.startswith(dataset_name[0])].reset_index( + drop=True + ) # Train data - train_path='{}{}-train.csv'.format(train_directory, dataset_name) + train_path = "{}{}-train.csv".format(train_directory, dataset_name) train_df = pd.read_csv(train_path, nrows=num_obs) - train_df = train_df.rename(columns={'V1':'unique_id'}) + train_df = train_df.rename(columns={"V1": "unique_id"}) - train_df = pd.wide_to_long(train_df, stubnames=["V"], i="unique_id", j="ds").reset_index() - train_df = train_df.rename(columns={'V':'y'}) + train_df = pd.wide_to_long( + train_df, stubnames=["V"], i="unique_id", j="ds" + ).reset_index() + train_df = train_df.rename(columns={"V": "y"}) train_df = train_df.dropna() - train_df['split'] = 'train' - train_df['ds'] = train_df['ds']-1 + train_df["split"] = "train" + train_df["ds"] = train_df["ds"] - 1 # Get len of series per unique_id - len_series = train_df.groupby('unique_id').agg({'ds': 'max'}).reset_index() - len_series.columns = ['unique_id', 'len_serie'] + len_series = train_df.groupby("unique_id").agg({"ds": "max"}).reset_index() + len_series.columns = ["unique_id", "len_serie"] # Test data - test_path='{}{}-test.csv'.format(test_directory, dataset_name) + test_path = "{}{}-test.csv".format(test_directory, dataset_name) test_df = pd.read_csv(test_path, nrows=num_obs) - test_df = test_df.rename(columns={'V1':'unique_id'}) + test_df = test_df.rename(columns={"V1": "unique_id"}) - test_df = pd.wide_to_long(test_df, stubnames=["V"], i="unique_id", j="ds").reset_index() - test_df = test_df.rename(columns={'V':'y'}) + test_df = pd.wide_to_long( + test_df, stubnames=["V"], i="unique_id", j="ds" + ).reset_index() + test_df = test_df.rename(columns={"V": "y"}) test_df = test_df.dropna() - test_df['split'] = 'test' - test_df = test_df.merge(len_series, on='unique_id') - test_df['ds'] = test_df['ds'] + test_df['len_serie'] - 1 - test_df = test_df[['unique_id','ds','y','split']] + test_df["split"] = "test" + test_df = test_df.merge(len_series, on="unique_id") + test_df["ds"] = test_df["ds"] + test_df["len_serie"] - 1 + test_df = test_df[["unique_id", "ds", "y", "split"]] - df = pd.concat((train_df,test_df)) - df = df.sort_values(by=['unique_id', 'ds']).reset_index(drop=True) + df = pd.concat((train_df, test_df)) + df = df.sort_values(by=["unique_id", "ds"]).reset_index(drop=True) # Create column with dates with freq of dataset - len_series = df.groupby('unique_id').agg({'ds': 'max'}).reset_index() + len_series = df.groupby("unique_id").agg({"ds": "max"}).reset_index() dates = [] for i in range(len(len_series)): - len_serie = len_series.iloc[i,1] - ranges = pd.date_range(start='1970/01/01', periods=len_serie, freq=freq) - dates += list(ranges) - df.loc[:,'ds'] = dates + len_serie = len_series.iloc[i, 1] + ranges = pd.date_range(start="1970/01/01", periods=len_serie, freq=freq) + dates += list(ranges) + df.loc[:, "ds"] = dates - df = df.merge(m4_info, left_on=['unique_id'], right_on=['M4id']) - df.drop(columns=['M4id'], inplace=True) - df = df.rename(columns={'category': 'x'}) + df = df.merge(m4_info, left_on=["unique_id"], right_on=["M4id"]) + df.drop(columns=["M4id"], inplace=True) + df = df.rename(columns={"category": "x"}) - X_train_df = df[df['split']=='train'].filter(items=['unique_id', 'ds', 'x']) - y_train_df = df[df['split']=='train'].filter(items=['unique_id', 'ds', 'y']) - X_test_df = df[df['split']=='test'].filter(items=['unique_id', 'ds', 'x']) - y_test_df = df[df['split']=='test'].filter(items=['unique_id', 'ds', 'y']) + X_train_df = df[df["split"] == "train"].filter(items=["unique_id", "ds", "x"]) + y_train_df = df[df["split"] == "train"].filter(items=["unique_id", "ds", "y"]) + X_test_df = df[df["split"] == "test"].filter(items=["unique_id", "ds", "x"]) + y_test_df = df[df["split"] == "test"].filter(items=["unique_id", "ds", "y"]) X_train_df = X_train_df.reset_index(drop=True) y_train_df = y_train_df.reset_index(drop=True) @@ -130,6 +135,7 @@ def m4_parser(dataset_name, directory, num_obs=1000000): return X_train_df, y_train_df, X_test_df, y_test_df +# %% ../nbs/04_m4_data.ipynb 7 def prepare_m4_data(dataset_name, directory, num_obs): """Pipeline that obtains M4 times series, tranforms it and gets naive2 predictions. @@ -143,23 +149,25 @@ def prepare_m4_data(dataset_name, directory, num_obs): num_obs: int Number of time series to return. """ - m4info_filename = maybe_download('M4-info.csv', directory) - - dailytrain_filename = maybe_download('Train/Daily-train.csv', directory) - hourlytrain_filename = maybe_download('Train/Hourly-train.csv', directory) - monthlytrain_filename = maybe_download('Train/Monthly-train.csv', directory) - quarterlytrain_filename = maybe_download('Train/Quarterly-train.csv', directory) - weeklytrain_filename = maybe_download('Train/Weekly-train.csv', directory) - yearlytrain_filename = maybe_download('Train/Yearly-train.csv', directory) - - dailytest_filename = maybe_download('Test/Daily-test.csv', directory) - hourlytest_filename = maybe_download('Test/Hourly-test.csv', directory) - monthlytest_filename = maybe_download('Test/Monthly-test.csv', directory) - quarterlytest_filename = maybe_download('Test/Quarterly-test.csv', directory) - weeklytest_filename = maybe_download('Test/Weekly-test.csv', directory) - yearlytest_filename = maybe_download('Test/Yearly-test.csv', directory) - print('\n') - - X_train_df, y_train_df, X_test_df, y_test_df = m4_parser(dataset_name, directory, num_obs) + m4info_filename = maybe_download("M4-info.csv", directory) + + dailytrain_filename = maybe_download("Train/Daily-train.csv", directory) + hourlytrain_filename = maybe_download("Train/Hourly-train.csv", directory) + monthlytrain_filename = maybe_download("Train/Monthly-train.csv", directory) + quarterlytrain_filename = maybe_download("Train/Quarterly-train.csv", directory) + weeklytrain_filename = maybe_download("Train/Weekly-train.csv", directory) + yearlytrain_filename = maybe_download("Train/Yearly-train.csv", directory) + + dailytest_filename = maybe_download("Test/Daily-test.csv", directory) + hourlytest_filename = maybe_download("Test/Hourly-test.csv", directory) + monthlytest_filename = maybe_download("Test/Monthly-test.csv", directory) + quarterlytest_filename = maybe_download("Test/Quarterly-test.csv", directory) + weeklytest_filename = maybe_download("Test/Weekly-test.csv", directory) + yearlytest_filename = maybe_download("Test/Yearly-test.csv", directory) + print("\n") + + X_train_df, y_train_df, X_test_df, y_test_df = m4_parser( + dataset_name, directory, num_obs + ) return X_train_df, y_train_df, X_test_df, y_test_df diff --git a/tsfeatures/metrics/__init__.py b/tsfeatures/metrics/__init__.py deleted file mode 100644 index 91847eb..0000000 --- a/tsfeatures/metrics/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from .metrics import * diff --git a/tsfeatures/metrics/metrics.py b/tsfeatures/metrics/metrics.py deleted file mode 100644 index f27ea75..0000000 --- a/tsfeatures/metrics/metrics.py +++ /dev/null @@ -1,345 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import numpy as np -import pandas as pd - -from functools import partial -from math import sqrt -from multiprocessing import Pool -from typing import Callable, Optional - -AVAILABLE_METRICS = ['mse', 'rmse', 'mape', 'smape', 'mase', 'rmsse', - 'mini_owa', 'pinball_loss'] - -###################################################################### -# METRICS -###################################################################### - -def mse(y: np.array, y_hat:np.array) -> float: - """Calculates Mean Squared Error. - - MSE measures the prediction accuracy of a - forecasting method by calculating the squared deviation - of the prediction and the true value at a given time and - averages these devations over the length of the series. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array - predicted values - - Returns - ------- - scalar: - MSE - """ - mse = np.mean(np.square(y - y_hat)) - - return mse - -def rmse(y: np.array, y_hat:np.array) -> float: - """Calculates Root Mean Squared Error. - - RMSE measures the prediction accuracy of a - forecasting method by calculating the squared deviation - of the prediction and the true value at a given time and - averages these devations over the length of the series. - Finally the RMSE will be in the same scale - as the original time series so its comparison with other - series is possible only if they share a common scale. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array - predicted values - - Returns - ------- - scalar: RMSE - """ - rmse = sqrt(np.mean(np.square(y - y_hat))) - - return rmse - -def mape(y: np.array, y_hat:np.array) -> float: - """Calculates Mean Absolute Percentage Error. - - MAPE measures the relative prediction accuracy of a - forecasting method by calculating the percentual deviation - of the prediction and the true value at a given time and - averages these devations over the length of the series. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array - predicted values - - Returns - ------- - scalar: MAPE - """ - mape = np.mean(np.abs(y - y_hat) / np.abs(y)) - mape = 100 * mape - - return mape - -def smape(y: np.array, y_hat:np.array) -> float: - """Calculates Symmetric Mean Absolute Percentage Error. - - SMAPE measures the relative prediction accuracy of a - forecasting method by calculating the relative deviation - of the prediction and the true value scaled by the sum of the - absolute values for the prediction and true value at a - given time, then averages these devations over the length - of the series. This allows the SMAPE to have bounds between - 0% and 200% which is desireble compared to normal MAPE that - may be undetermined. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array - predicted values - - Returns - ------- - scalar: SMAPE - """ - scale = np.abs(y) + np.abs(y_hat) - scale[scale == 0] = 1e-3 - smape = np.mean(np.abs(y - y_hat) / scale) - smape = 200 * smape - - return smape - -def mase(y: np.array, y_hat: np.array, - y_train: np.array, seasonality: int = 1) -> float: - """Calculates the M4 Mean Absolute Scaled Error. - - MASE measures the relative prediction accuracy of a - forecasting method by comparinng the mean absolute errors - of the prediction and the true value against the mean - absolute errors of the seasonal naive model. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array - predicted values - y_train: numpy array - actual train values for Naive1 predictions - seasonality: int - main frequency of the time series - Hourly 24, Daily 7, Weekly 52, - Monthly 12, Quarterly 4, Yearly 1 - - Returns - ------- - scalar: MASE - """ - scale = np.mean(abs(y_train[seasonality:] - y_train[:-seasonality])) - mase = np.mean(abs(y - y_hat)) / scale - mase = 100 * mase - - return mase - -def rmsse(y: np.array, y_hat: np.array, - y_train: np.array, seasonality: int = 1) -> float: - """Calculates the M5 Root Mean Squared Scaled Error. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array of len h (forecasting horizon) - predicted values - y_train: numpy array - actual train values - seasonality: int - main frequency of the time series - Hourly 24, Daily 7, Weekly 52, - Monthly 12, Quarterly 4, Yearly 1 - - Returns - ------- - scalar: RMSSE - """ - scale = np.mean(np.square(y_train[seasonality:] - y_train[:-seasonality])) - rmsse = sqrt(mse(y, y_hat) / scale) - rmsse = 100 * rmsse - - return rmsse - -def mini_owa(y: np.array, y_hat: np.array, - y_train: np.array, - seasonality: int, - y_bench: np.array): - """Calculates the Overall Weighted Average for a single series. - - MASE, sMAPE for Naive2 and current model - then calculatess Overall Weighted Average. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array of len h (forecasting horizon) - predicted values - y_train: numpy array - insample values of the series for scale - seasonality: int - main frequency of the time series - Hourly 24, Daily 7, Weekly 52, - Monthly 12, Quarterly 4, Yearly 1 - y_bench: numpy array of len h (forecasting horizon) - predicted values of the benchmark model - - Returns - ------- - return: mini_OWA - """ - mase_y = mase(y, y_hat, y_train, seasonality) - mase_bench = mase(y, y_bench, y_train, seasonality) - - smape_y = smape(y, y_hat) - smape_bench = smape(y, y_bench) - - mini_owa = ((mase_y/mase_bench) + (smape_y/smape_bench))/2 - - return mini_owa - -def pinball_loss(y: np.array, y_hat: np.array, tau: int = 0.5): - """Calculates the Pinball Loss. - - The Pinball loss measures the deviation of a quantile forecast. - By weighting the absolute deviation in a non symmetric way, the - loss pays more attention to under or over estimation. - A common value for tau is 0.5 for the deviation from the median. - - Parameters - ---------- - y: numpy array - actual test values - y_hat: numpy array of len h (forecasting horizon) - predicted values - tau: float - Fixes the quantile against which the predictions are compared. - - Returns - ------- - return: pinball_loss - """ - delta_y = y - y_hat - pinball = np.maximum(tau * delta_y, (tau-1) * delta_y) - pinball = pinball.mean() - - return pinball - -###################################################################### -# PANEL EVALUATION -###################################################################### - -def _evaluate_ts(uid, y_test, y_hat, - y_train, metric, - seasonality, y_bench, metric_name): - y_test_uid = y_test.loc[uid].y.values - y_hat_uid = y_hat.loc[uid].y_hat.values - - if metric_name in ['mase', 'rmsse']: - y_train_uid = y_train.loc[uid].y.values - evaluation_uid = metric(y=y_test_uid, y_hat=y_hat_uid, - y_train=y_train_uid, - seasonality=seasonality) - elif metric_name in ['mini_owa']: - y_train_uid = y_train.loc[uid].y.values - y_bench_uid = y_bench.loc[uid].y_hat.values - evaluation_uid = metric(y=y_test_uid, y_hat=y_hat_uid, - y_train=y_train_uid, - seasonality=seasonality, - y_bench=y_bench_uid) - - else: - evaluation_uid = metric(y=y_test_uid, y_hat=y_hat_uid) - - return uid, evaluation_uid - -def evaluate_panel(y_test: pd.DataFrame, - y_hat: pd.DataFrame, - y_train: pd.DataFrame, - metric: Callable, - seasonality: Optional[int] = None, - y_bench: Optional[pd.DataFrame] = None, - threads: Optional[int] = None): - """Calculates a specific metric for y and y_hat (and y_train, if needed). - - Parameters - ---------- - y_test: pandas df - df with columns ['unique_id', 'ds', 'y'] - y_hat: pandas df - df with columns ['unique_id', 'ds', 'y_hat'] - y_train: pandas df - df with columns ['unique_id', 'ds', 'y'] (train) - This is used in the scaled metrics ('mase', 'rmsse'). - metric: callable - loss function - seasonality: int - Main frequency of the time series. - Used in ('mase', 'rmsse'). - Commonly used seasonalities: - Hourly: 24, - Daily: 7, - Weekly: 52, - Monthly: 12, - Quarterly: 4, - Yearly: 1. - y_bench: pandas df - df with columns ['unique_id', 'ds', 'y_hat'] - predicted values of the benchmark model - This is used in 'mini_owa'. - threads: int - Number of threads to use. Use None (default) for parallel processing. - - Returns - ------ - pandas dataframe: - loss ofr each unique_id in the panel data - """ - metric_name = metric.__code__.co_name - uids = y_test['unique_id'].unique() - y_hat_uids = y_hat['unique_id'].unique() - - assert len(y_test)==len(y_hat), "not same length" - assert all(uids == y_hat_uids), "not same u_ids" - - y_test = y_test.set_index(['unique_id', 'ds']) - y_hat = y_hat.set_index(['unique_id', 'ds']) - - if metric_name in ['mase', 'rmsse']: - y_train = y_train.set_index(['unique_id', 'ds']) - - elif metric_name in ['mini_owa']: - y_train = y_train.set_index(['unique_id', 'ds']) - y_bench = y_bench.set_index(['unique_id', 'ds']) - - partial_evaluation = partial(_evaluate_ts, y_test=y_test, y_hat=y_hat, - y_train=y_train, metric=metric, - seasonality=seasonality, - y_bench=y_bench, - metric_name=metric_name) - - with Pool(threads) as pool: - evaluations = pool.map(partial_evaluation, uids) - - evaluations = pd.DataFrame(evaluations, columns=['unique_id', 'error']) - - return evaluations diff --git a/tsfeatures/tests/__init__.py b/tsfeatures/tests/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/tsfeatures/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tsfeatures/tests/test_acf_features.py b/tsfeatures/tests/test_acf_features.py deleted file mode 100644 index 833f328..0000000 --- a/tsfeatures/tests/test_acf_features.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from math import isclose -from tsfeatures import acf_features -from tsfeatures.utils import WWWusage, USAccDeaths - -def test_acf_features_seasonal(): - z = acf_features(USAccDeaths, 12) - assert isclose(len(z), 7) - assert isclose(z['x_acf1'], 0.70, abs_tol=0.01) - assert isclose(z['x_acf10'], 1.20, abs_tol=0.01) - assert isclose(z['diff1_acf1'], 0.023, abs_tol=0.01) - assert isclose(z['diff1_acf10'], 0.27, abs_tol=0.01) - assert isclose(z['diff2_acf1'], -0.48, abs_tol=0.01) - assert isclose(z['diff2_acf10'], 0.74, abs_tol=0.01) - assert isclose(z['seas_acf1'], 0.62, abs_tol=0.01) - -def test_acf_features_non_seasonal(): - z = acf_features(WWWusage, 1) - assert isclose(len(z), 6) - assert isclose(z['x_acf1'], 0.96, abs_tol=0.01) - assert isclose(z['x_acf10'], 4.19, abs_tol=0.01) - assert isclose(z['diff1_acf1'], 0.79, abs_tol=0.01) - assert isclose(z['diff1_acf10'], 1.40, abs_tol=0.01) - assert isclose(z['diff2_acf1'], 0.17, abs_tol=0.01) - assert isclose(z['diff2_acf10'], 0.33, abs_tol=0.01) diff --git a/tsfeatures/tests/test_arch_stat.py b/tsfeatures/tests/test_arch_stat.py deleted file mode 100644 index 18a95d2..0000000 --- a/tsfeatures/tests/test_arch_stat.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from math import isclose -from tsfeatures import arch_stat -from tsfeatures.utils import WWWusage, USAccDeaths - -def test_arch_stat_seasonal(): - z = arch_stat(USAccDeaths, 12) - assert isclose(len(z), 1) - assert isclose(z['arch_lm'], 0.54, abs_tol=0.01) - -def test_arch_stat_non_seasonal(): - z = arch_stat(WWWusage, 12) - assert isclose(len(z), 1) - assert isclose(z['arch_lm'], 0.98, abs_tol=0.01) diff --git a/tsfeatures/tests/test_holt_parameters.py b/tsfeatures/tests/test_holt_parameters.py deleted file mode 100644 index 2454564..0000000 --- a/tsfeatures/tests/test_holt_parameters.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from math import isclose -from tsfeatures import holt_parameters -from tsfeatures.utils import WWWusage, USAccDeaths - -def test_holt_parameters_seasonal(): - z = holt_parameters(USAccDeaths, 12) - assert isclose(len(z), 2) - assert isclose(z['alpha'], 0.96, abs_tol=0.07) - assert isclose(z['beta'], 0.00, abs_tol=0.1) - -def test_holt_parameters_non_seasonal(): - z = holt_parameters(WWWusage, 1) - assert isclose(len(z), 2) - assert isclose(z['alpha'], 0.99, abs_tol=0.02) - assert isclose(z['beta'], 0.99, abs_tol=0.02) diff --git a/tsfeatures/tests/test_mutability.py b/tsfeatures/tests/test_mutability.py deleted file mode 100644 index 5dbcc55..0000000 --- a/tsfeatures/tests/test_mutability.py +++ /dev/null @@ -1,31 +0,0 @@ -import numpy as np -import pandas as pd -from tsfeatures import ( - tsfeatures, acf_features, arch_stat, crossing_points, - entropy, flat_spots, heterogeneity, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, sparsity, hurst -) - - -def test_mutability(): - z = np.zeros(100) - z[-1] = 1 - z_df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 101), 'y': z}) - feats=[sparsity, acf_features, arch_stat, crossing_points, - entropy, flat_spots, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst] - feats_2=[acf_features, arch_stat, crossing_points, - entropy, flat_spots, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst, sparsity] - feats_df = tsfeatures(z_df, freq=7, features=feats, scale=False) - feats_2_df = tsfeatures(z_df, freq=7, features=feats_2, scale=False) - pd.testing.assert_frame_equal(feats_df, feats_2_df[feats_df.columns]) - -if __name__=="__main__": - test_mutability() diff --git a/tsfeatures/tests/test_pacf_features.py b/tsfeatures/tests/test_pacf_features.py deleted file mode 100644 index 40d4d37..0000000 --- a/tsfeatures/tests/test_pacf_features.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import numpy as np -from tsfeatures import pacf_features - - -def test_pacf_features_seasonal_short(): - z = np.random.normal(size=15) - pacf_features(z, freq=7) diff --git a/tsfeatures/tests/test_pipeline.py b/tsfeatures/tests/test_pipeline.py deleted file mode 100644 index 58a07b0..0000000 --- a/tsfeatures/tests/test_pipeline.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from tsfeatures import tsfeatures -from tsfeatures.m4_data import prepare_m4_data -from tsfeatures.utils import FREQS - -def test_pipeline(): - def calculate_features_m4(dataset_name, directory, num_obs=1000000): - _, y_train_df, _, _ = prepare_m4_data(dataset_name=dataset_name, - directory = directory, - num_obs=num_obs) - - freq = FREQS[dataset_name[0]] - py_feats = tsfeatures(y_train_df, freq=freq).set_index('unique_id') - - calculate_features_m4('Hourly', 'data', 100) - calculate_features_m4('Daily', 'data', 100) diff --git a/tsfeatures/tests/test_small_ts.py b/tsfeatures/tests/test_small_ts.py deleted file mode 100644 index 8cefeb4..0000000 --- a/tsfeatures/tests/test_small_ts.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import pandas as pd -from tsfeatures import ( - tsfeatures, acf_features, arch_stat, crossing_points, - entropy, flat_spots, heterogeneity, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, sparsity, hurst, statistics -) - - -def test_small(): - z = np.zeros(2) - z[-1] = 1 - z_df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 3), 'y': z}) - feats=[sparsity, acf_features, arch_stat, crossing_points, - entropy, flat_spots, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst, statistics] - feats_df = tsfeatures(z_df, freq=12, features=feats, scale=False) - -def test_small_1(): - z = np.zeros(1) - z[-1] = 1 - z_df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 2), 'y': z}) - feats=[sparsity, acf_features, arch_stat, crossing_points, - entropy, flat_spots, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst, statistics] - feats_df = tsfeatures(z_df, freq=12, features=feats, scale=False) - -if __name__=="__main__": - test_small() - test_small_1() diff --git a/tsfeatures/tests/test_sparsity.py b/tsfeatures/tests/test_sparsity.py deleted file mode 100644 index 59706bd..0000000 --- a/tsfeatures/tests/test_sparsity.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import numpy as np -import pandas as pd -from tsfeatures import sparsity, tsfeatures - -def test_non_zero_sparsity(): - # if we scale the data, the sparsity should be zero - z = np.zeros(10) - z[-1] = 1 - df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 11), 'y': z}) - features = tsfeatures(df, freq=7, scale=True, features=[sparsity]) - z_sparsity = features['sparsity'].values[0] - assert z_sparsity == 0. - - -def test_sparsity(): - z = np.zeros(10) - z[-1] = 1 - df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 11), 'y': z}) - features = tsfeatures(df, freq=7, scale=False, features=[sparsity]) - print(features) - z_sparsity = features['sparsity'].values[0] - assert z_sparsity == 0.9 diff --git a/tsfeatures/tests/test_statistics.py b/tsfeatures/tests/test_statistics.py deleted file mode 100644 index cc452c9..0000000 --- a/tsfeatures/tests/test_statistics.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import numpy as np -import pandas as pd -from tsfeatures import statistics, tsfeatures - -def test_scale(): - z = np.zeros(10) - z[-1] = 1 - df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 11), 'y': z}) - features = tsfeatures(df, freq=7, scale=True, features=[statistics]) - print(features) - -def test_no_scale(): - z = np.zeros(10) - z[-1] = 1 - df = pd.DataFrame({'unique_id': 1, 'ds': range(1, 11), 'y': z}) - features = tsfeatures(df, freq=7, scale=False, features=[statistics]) - print(features) - - -if __name__=="__main__": - test_scale() - test_no_scale() diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index d762075..6a1e0fb 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -1,921 +1,78 @@ -#!/usr/bin/env python -# coding: utf-8 +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_tsfeatures_core.ipynb. +# %% auto 0 +__all__ = ['tsfeatures'] + +# %% ../nbs/02_tsfeatures_core.ipynb 3 +import os import warnings + +# %% ../nbs/02_tsfeatures_core.ipynb 4 warnings.warn = lambda *a, **kw: False -import os + os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1" + +# %% ../nbs/02_tsfeatures_core.ipynb 5 from collections import ChainMap from functools import partial -from itertools import groupby -from math import log, e -from multiprocessing import cpu_count, Pool -from typing import List, Dict, Optional, Callable +from multiprocessing import Pool +from typing import Callable, Dict, List, Optional -import numpy as np import pandas as pd -from antropy import spectral_entropy -from arch import arch_model -from scipy.optimize import minimize_scalar -from sklearn.linear_model import LinearRegression -from statsmodels.api import add_constant, OLS -from statsmodels.tsa.ar_model import AR -from statsmodels.tsa.holtwinters import ExponentialSmoothing -from statsmodels.tsa.seasonal import STL -from statsmodels.tsa.stattools import acf, pacf, kpss -from supersmoother import SuperSmoother - -from .utils import (embed, FREQS, hurst_exponent, - lambda_coef_var, poly, - scalets, terasvirta_test, ur_pp) - - -def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: - """Calculates autocorrelation function features. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'x_acf1': First autocorrelation coefficient. - 'x_acf10': Sum of squares of first 10 autocorrelation coefficients. - 'diff1_acf1': First autocorrelation ciefficient of differenced series. - 'diff1_acf10': Sum of squared of first 10 autocorrelation coefficients - of differenced series. - 'diff2_acf1': First autocorrelation coefficient of twice-differenced series. - 'diff2_acf10': Sum of squared of first 10 autocorrelation coefficients of - twice-differenced series. - - Only for seasonal data (freq > 1). - 'seas_acf1': Autocorrelation coefficient at the first seasonal lag. - """ - m = freq - size_x = len(x) - - acfx = acf(x, nlags=max(m, 10), fft=False) - if size_x > 10: - acfdiff1x = acf(np.diff(x, n=1), nlags=10, fft=False) - else: - acfdiff1x = [np.nan]*2 - - if size_x > 11: - acfdiff2x = acf(np.diff(x, n=2), nlags=10, fft=False) - else: - acfdiff2x = [np.nan] * 2 - # first autocorrelation coefficient - try: - acf_1 = acfx[1] - except: - acf_1 = np.nan - # sum of squares of first 10 autocorrelation coefficients - sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan - # first autocorrelation ciefficient of differenced series - diff1_acf1 = acfdiff1x[1] - # sum of squared of first 10 autocorrelation coefficients of differenced series - diff1_acf10 = np.sum((acfdiff1x[1:11]) ** 2) if size_x > 10 else np.nan - # first autocorrelation coefficient of twice-differenced series - diff2_acf1 = acfdiff2x[1] - # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series - diff2_acf10 = np.sum((acfdiff2x[1:11]) ** 2) if size_x > 11 else np.nan - - output = { - 'x_acf1': acf_1, - 'x_acf10': sum_of_sq_acf10, - 'diff1_acf1': diff1_acf1, - 'diff1_acf10': diff1_acf10, - 'diff2_acf1': diff2_acf1, - 'diff2_acf10': diff2_acf10 - } - - if m > 1: - output['seas_acf1'] = acfx[m] if len(acfx) > m else np.nan - - return output - -def arch_stat(x: np.array, freq: int = 1, - lags: int = 12, demean: bool = True) -> Dict[str, float]: - """Arch model features. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'arch_lm': R^2 value of an autoregressive model of order lags applied to x**2. - """ - if len(x) <= lags + 1: - return {'arch_lm': np.nan} - if demean: - x = x - np.mean(x) - - size_x = len(x) - mat = embed(x ** 2, lags + 1) - X = mat[:, 1:] - y = np.vstack(mat[:, 0]) - - try: - r_squared = LinearRegression().fit(X, y).score(X, y) - except: - r_squared = np.nan - - return {'arch_lm': r_squared} - -def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: - """Count entropy. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'count_entropy': Entropy using only positive data. - """ - entropy = x[x > 0] * np.log(x[x > 0]) - entropy = -entropy.sum() - - return {'count_entropy': entropy} - -def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: - """Crossing points. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'crossing_points': Number of times that x crosses the median. - """ - midline = np.median(x) - ab = x <= midline - lenx = len(x) - p1 = ab[:(lenx - 1)] - p2 = ab[1:] - cross = (p1 & (~p2)) | (p2 & (~p1)) - - return {'crossing_points': cross.sum()} - -def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: - """Calculates sample entropy. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'entropy': Wrapper of the function spectral_entropy. - """ - try: - with np.errstate(divide='ignore'): - entropy = spectral_entropy(x, 1, normalize=True) - except: - entropy = np.nan - - return {'entropy': entropy} - -def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: - """Flat spots. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'flat_spots': Number of flat spots in x. - """ - try: - cutx = pd.cut(x, bins=10, include_lowest=True, labels=False) + 1 - except: - return {'flat_spots': np.nan} - - rlex = np.array([sum(1 for i in g) for k,g in groupby(cutx)]).max() - - return {'flat_spots': rlex} - -def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: - """Frequency. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'frequency': Wrapper of freq. - """ - - return {'frequency': freq} - -def guerrero(x: np.array, freq: int = 1, - lower: int = -1, upper: int = 2) -> Dict[str, float]: - """Applies Guerrero's (1993) method to select the lambda which minimises the - coefficient of variation for subseries of x. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series. - lower: float - The lower bound for lambda. - upper: float - The upper bound for lambda. - - Returns - ------- - dict - 'guerrero': Minimum coefficient of variation for subseries of x. - - References - ---------- - [1] Guerrero, V.M. (1993) Time-series analysis supported by power transformations. - Journal of Forecasting, 12, 37–48. - """ - func_to_min = lambda lambda_par: lambda_coef_var(lambda_par, x=x, period=freq) - - min_ = minimize_scalar(func_to_min, bounds=[lower, upper]) - min_ = min_['fun'] - - return {'guerrero': min_} - -def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: - """Heterogeneity. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'arch_acf': Sum of squares of the first 12 autocorrelations of the - residuals of the AR model applied to x - 'garch_acf': Sum of squares of the first 12 autocorrelations of the - residuals of the GARCH model applied to x - 'arch_r2': Function arch_stat applied to the residuals of the - AR model applied to x. - 'garch_r2': Function arch_stat applied to the residuals of the GARCH - model applied to x. - """ - m = freq - - size_x = len(x) - order_ar = min(size_x - 1, np.floor(10 * np.log10(size_x))) - order_ar = int(order_ar) - - try: - x_whitened = AR(x).fit(maxlag=order_ar, ic='aic', trend='c').resid - except: - try: - x_whitened = AR(x).fit(maxlag=order_ar, ic='aic', trend='nc').resid - except: - output = { - 'arch_acf': np.nan, - 'garch_acf': np.nan, - 'arch_r2': np.nan, - 'garch_r2': np.nan - } - - return output - # arch and box test - x_archtest = arch_stat(x_whitened, m)['arch_lm'] - LBstat = (acf(x_whitened ** 2, nlags=12, fft=False)[1:] ** 2).sum() - #Fit garch model - garch_fit = arch_model(x_whitened, vol='GARCH', rescale=False).fit(disp='off') - # compare arch test before and after fitting garch - garch_fit_std = garch_fit.resid - x_garch_archtest = arch_stat(garch_fit_std, m)['arch_lm'] - # compare Box test of squared residuals before and after fittig.garch - LBstat2 = (acf(garch_fit_std ** 2, nlags=12, fft=False)[1:] ** 2).sum() - - output = { - 'arch_acf': LBstat, - 'garch_acf': LBstat2, - 'arch_r2': x_archtest, - 'garch_r2': x_garch_archtest - } - - return output - -def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: - """Fitted parameters of a Holt model. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'alpha': Level paramater of the Holt model. - 'beta': Trend parameter of the Hold model. - """ - try : - fit = ExponentialSmoothing(x, trend='add', seasonal=None).fit() - params = { - 'alpha': fit.params['smoothing_level'], - 'beta': fit.params['smoothing_trend'] - } - except: - params = { - 'alpha': np.nan, - 'beta': np.nan - } - - return params - -def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: - """Hurst index. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'hurst': Hurst exponent. - """ - try: - hurst_index = hurst_exponent(x) - except: - hurst_index = np.nan - - return {'hurst': hurst_index} - -def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: - """Fitted parameters of a Holt-Winters model. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'hw_alpha': Level parameter of the HW model. - 'hw_beta': Trend parameter of the HW model. - 'hw_gamma': Seasonal parameter of the HW model. - """ - try: - fit = ExponentialSmoothing(x, seasonal_periods=freq, trend='add', seasonal='add').fit() - params = { - 'hw_alpha': fit.params['smoothing_level'], - 'hw_beta': fit.params['smoothing_trend'], - 'hw_gamma': fit.params['smoothing_seasonal'] - } - except: - params = { - 'hw_alpha': np.nan, - 'hw_beta': np.nan, - 'hw_gamma': np.nan - } - - return params - -def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: - """Intervals with demand. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'intervals_mean': Mean of intervals with positive values. - 'intervals_sd': SD of intervals with positive values. - """ - x[x > 0] = 1 - - y = [sum(val) for keys, val in groupby(x, key=lambda k: k != 0) if keys != 0] - y = np.array(y) - - return {'intervals_mean': np.mean(y), 'intervals_sd': np.std(y, ddof=1)} - -def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: - """lumpiness. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'lumpiness': Variance of the variances of tiled windows. - """ - if freq == 1: - width = 10 - else: - width = freq - - nr = len(x) - lo = np.arange(0, nr, width) - up = lo + width - nsegs = nr / width - varx = [np.nanvar(x[lo[idx]:up[idx]], ddof=1) for idx in np.arange(int(nsegs))] - - if len(x) < 2 * width: - lumpiness = 0 - else: - lumpiness = np.nanvar(varx, ddof=1) - - return {'lumpiness': lumpiness} - -def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: - """Nonlinearity. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'nonlinearity': 10 t**2/len(x) where t is the statistic used in - Terasvirta's test. - """ - try: - test = terasvirta_test(x) - test = 10 * test / len(x) - except: - test = np.nan - - return {'nonlinearity': test} - -def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: - """Calculates partial autocorrelation function features. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'x_pacf5': Sum of squares of the first 5 partial autocorrelation - coefficients. - 'diff1x_pacf5': Sum of squares of the first 5 partial autocorrelation - coefficients of differenced series. - 'diff2x_pacf5': Sum of squares of the first 5 partial autocorrelation - coefficients of twice-differenced series. - - Only for seasonal data (freq > 1). - 'seas_pacf': Partial autocorrelation - coefficient at the first seasonal lag. - """ - m = freq - - nlags_ = max(m, 5) - - if len(x) > 1: - try: - pacfx = pacf(x, nlags=nlags_, method='ldb') - except: - pacfx = np.nan - else: - pacfx = np.nan - # Sum of first 6 PACs squared - if len(x) > 5 and not np.all(np.isnan(pacfx)): - pacf_5 = np.sum(pacfx[1:6] ** 2) - else: - pacf_5 = np.nan - # Sum of first 5 PACs of difference series squared - if len(x) > 6: - try: - diff1_pacf = pacf(np.diff(x, n=1), nlags=5, method='ldb')[1:6] - diff1_pacf_5 = np.sum(diff1_pacf ** 2) - except: - diff1_pacf_5 = np.nan - else: - diff1_pacf_5 = np.nan - # Sum of first 5 PACs of twice differenced series squared - if len(x) > 7: - try: - diff2_pacf = pacf(np.diff(x, n = 2), nlags = 5, method='ldb')[1:6] - diff2_pacf_5 = np.sum(diff2_pacf ** 2) - except: - diff2_pacf_5 = np.nan - else: - diff2_pacf_5 = np.nan - - output = { - 'x_pacf5': pacf_5, - 'diff1x_pacf5': diff1_pacf_5, - 'diff2x_pacf5': diff2_pacf_5 - } - - if m > 1: - try: - output['seas_pacf'] = pacfx[m] if len(pacfx) > m else np.nan - except: - output['seas_pacf'] = np.nan - - return output - -def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: - """Series length. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'series_length': Wrapper of len(x). - """ - - return {'series_length': len(x)} - -def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: - """Sparsity. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'sparsity': Average obs with zero values. - """ - - return {'sparsity': np.mean(x == 0)} - -def stability(x: np.array, freq: int = 1) -> Dict[str, float]: - """Stability. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'stability': Variance of the means of tiled windows. - """ - if freq == 1: - width = 10 - else: - width = freq - - nr = len(x) - lo = np.arange(0, nr, width) - up = lo + width - nsegs = nr / width - meanx = [np.nanmean(x[lo[idx]:up[idx]]) for idx in np.arange(int(nsegs))] - - if len(x) < 2 * width: - stability = 0 - else: - stability = np.nanvar(meanx, ddof=1) - - return {'stability': stability} - -def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: - """Calculates seasonal trend using loess decomposition. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'nperiods': Number of seasonal periods in x. - 'seasonal_period': Frequency of the time series. - 'trend': Strength of trend. - 'spike': Measures "spikiness" of x. - 'linearity': Linearity of x based on the coefficients of an - orthogonal quadratic regression. - 'curvature': Curvature of x based on the coefficients of an - orthogonal quadratic regression. - 'e_acf1': acfremainder['x_acf1'] - 'e_acf10': acfremainder['x_acf10'] - - Only for sesonal data (freq > 0). - 'seasonal_strength': Strength of seasonality. - 'peak': Strength of peaks. - 'trough': Strength of trough. - """ - m = freq - nperiods = int(m > 1) - # STL fits - if m > 1: - try: - stlfit = STL(x, m, 13).fit() - except: - output = { - 'nperiods': nperiods, - 'seasonal_period': m, - 'trend': np.nan, - 'spike': np.nan, - 'linearity': np.nan, - 'curvature': np.nan, - 'e_acf1': np.nan, - 'e_acf10': np.nan, - 'seasonal_strength': np.nan, - 'peak': np.nan, - 'trough': np.nan - } - - return output - - trend0 = stlfit.trend - remainder = stlfit.resid - seasonal = stlfit.seasonal - else: - deseas = x - t = np.arange(len(x)) + 1 - try: - trend0 = SuperSmoother().fit(t, deseas).predict(t) - except: - output = { - 'nperiods': nperiods, - 'seasonal_period': m, - 'trend': np.nan, - 'spike': np.nan, - 'linearity': np.nan, - 'curvature': np.nan, - 'e_acf1': np.nan, - 'e_acf10': np.nan - } - - return output - - remainder = deseas - trend0 - seasonal = np.zeros(len(x)) - # De-trended and de-seasonalized data - detrend = x - trend0 - deseason = x - seasonal - fits = x - remainder - # Summay stats - n = len(x) - varx = np.nanvar(x, ddof=1) - vare = np.nanvar(remainder, ddof=1) - vardetrend = np.nanvar(detrend, ddof=1) - vardeseason = np.nanvar(deseason, ddof=1) - #Measure of trend strength - if varx < np.finfo(float).eps: - trend = 0 - elif (vardeseason/varx < 1e-10): - trend = 0 - else: - trend = max(0, min(1, 1 - vare/vardeseason)) - # Measure of seasonal strength - if m > 1: - if varx < np.finfo(float).eps: - season = 0 - elif np.nanvar(remainder + seasonal, ddof=1) < np.finfo(float).eps: - season = 0 - else: - season = max(0, min(1, 1 - vare / np.nanvar(remainder + seasonal, ddof=1))) - - peak = (np.argmax(seasonal) + 1) % m - peak = m if peak == 0 else peak - - trough = (np.argmin(seasonal) + 1) % m - trough = m if trough == 0 else trough - # Compute measure of spikiness - d = (remainder - np.nanmean(remainder)) ** 2 - varloo = (vare * (n-1) - d) / (n - 2) - spike = np.nanvar(varloo, ddof=1) - # Compute measures of linearity and curvature - time = np.arange(n) + 1 - poly_m = poly(time, 2) - time_x = add_constant(poly_m) - coefs = OLS(trend0, time_x).fit().params - - try: - linearity = coefs[1] - except: - linearity = np.nan - try: - curvature = -coefs[2] - except: - curvature = np.nan - # ACF features - acfremainder = acf_features(remainder, m) - # Assemble features - output = { - 'nperiods': nperiods, - 'seasonal_period': m, - 'trend': trend, - 'spike': spike, - 'linearity': linearity, - 'curvature': curvature, - 'e_acf1': acfremainder['x_acf1'], - 'e_acf10': acfremainder['x_acf10'] - } - - if m > 1: - output['seasonal_strength'] = season - output['peak'] = peak - output['trough'] = trough - - return output - -def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: - """Unit root kpss. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'unitroot_kpss': Statistic for the Kwiatowski et al unit root test. - """ - n = len(x) - nlags = int(4 * (n / 100) ** (1 / 4)) - - try: - test_kpss, _, _, _ = kpss(x, nlags=nlags) - except: - test_kpss = np.nan - - return {'unitroot_kpss': test_kpss} - -def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: - """Unit root pp. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'unitroot_pp': Statistic for the Phillips-Perron unit root test. - """ - try: - test_pp = ur_pp(x) - except: - test_pp = np.nan - - return {'unitroot_pp': test_pp} - -def statistics(x: np.array, freq: int = 1) -> Dict[str, float]: - """Computes basic statistics of x. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'total_sum': Total sum of the series. - 'mean': Mean value. - 'variance': variance of the time series. - 'median': Median value. - 'p2point5': 2.5 Percentile. - 'p5': 5 percentile. - 'p25': 25 percentile. - 'p75': 75 percentile. - 'p95': 95 percentile. - 'p97point5': 97.5 percentile. - 'max': Max value. - 'min': Min value. - """ - res = dict( - total_sum=np.sum(x), - mean=np.mean(x), - variance=np.var(x, ddof=1), - median=np.median(x), - p2point5=np.quantile(x, q=0.025), - p5=np.quantile(x, q=0.05), - p25=np.quantile(x, q=0.25), - p75=np.quantile(x, q=0.75), - p95=np.quantile(x, q=0.95), - p97point5=np.quantile(x, q=0.975), - max=np.max(x), - min=np.min(x), - ) - - return res - -############################################################################### -#### MAIN FUNCTIONS ########################################################### -############################################################################### - -def _get_feats(index, - ts, - freq, - scale = True, - features = [acf_features, arch_stat, crossing_points, - entropy, flat_spots, heterogeneity, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst], - dict_freqs = FREQS): +from .features import * +from .utils import * + +# %% ../nbs/02_tsfeatures_core.ipynb 6 +def _get_feats( + index, + ts, + freq, + scale=True, + features=[ + acf_features, + arch_stat, + crossing_points, + entropy, + flat_spots, + heterogeneity, + holt_parameters, + lumpiness, + nonlinearity, + pacf_features, + stl_features, + stability, + hw_parameters, + unitroot_kpss, + unitroot_pp, + series_length, + hurst, + ], + dict_freqs=FREQS, +): + print("dict_freq") if freq is None: - inf_freq = pd.infer_freq(ts['ds']) + inf_freq = pd.infer_freq(ts["ds"]) if inf_freq is None: raise Exception( - 'Failed to infer frequency from the `ds` column, ' - 'please provide the frequency using the `freq` argument.' + "Failed to infer frequency from the `ds` column, " + "please provide the frequency using the `freq` argument." ) freq = dict_freqs.get(inf_freq) if freq is None: raise Exception( - 'Error trying to convert infered frequency from the `ds` column ' - 'to integer. Please provide a dictionary with that frequency ' - 'as key and the integer frequency as value. ' - f'Infered frequency: {inf_freq}' + "Error trying to convert infered frequency from the `ds` column " + "to integer. Please provide a dictionary with that frequency " + "as key and the integer frequency as value. " + f"Infered frequency: {inf_freq}" ) - if isinstance(ts, pd.DataFrame): - assert 'y' in ts.columns - ts = ts['y'].values + assert "y" in ts.columns + ts = ts["y"].values if isinstance(ts, pd.Series): ts = ts.values @@ -923,21 +80,39 @@ def _get_feats(index, if scale: ts = scalets(ts) - c_map = ChainMap(*[dict_feat for dict_feat in [func(ts, freq) for func in features]]) - - return pd.DataFrame(dict(c_map), index = [index]) + c_map = ChainMap( + *[dict_feat for dict_feat in [func(ts, freq) for func in features]] + ) -def tsfeatures(ts: pd.DataFrame, - freq: Optional[int] = None, - features: List[Callable] = [acf_features, arch_stat, crossing_points, - entropy, flat_spots, heterogeneity, - holt_parameters, lumpiness, nonlinearity, - pacf_features, stl_features, stability, - hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst], - dict_freqs: Dict[str, int] = FREQS, - scale: bool = True, - threads: Optional[int] = None) -> pd.DataFrame: + return pd.DataFrame(dict(c_map), index=[index]) + +# %% ../nbs/02_tsfeatures_core.ipynb 7 +def tsfeatures( + ts: pd.DataFrame, + freq: Optional[int] = None, + features: List[Callable] = [ + acf_features, + arch_stat, + crossing_points, + entropy, + flat_spots, + heterogeneity, + holt_parameters, + lumpiness, + nonlinearity, + pacf_features, + stl_features, + stability, + hw_parameters, + unitroot_kpss, + unitroot_pp, + series_length, + hurst, + ], + dict_freqs: Dict[str, int] = FREQS, + scale: bool = True, + threads: Optional[int] = None, +) -> pd.DataFrame: """Calculates features for time series. Parameters @@ -964,76 +139,14 @@ def tsfeatures(ts: pd.DataFrame, Pandas DataFrame where each column is a feature and each row a time series. """ - partial_get_feats = partial(_get_feats, freq=freq, scale=scale, - features=features, dict_freqs=dict_freqs) - - with Pool(threads) as pool: - ts_features = pool.starmap(partial_get_feats, ts.groupby('unique_id')) - - ts_features = pd.concat(ts_features).rename_axis('unique_id') - ts_features = ts_features.reset_index() - - return ts_features - -################################################################################ -#### MAIN WIDE FUNCTION ######################################################## -################################################################################ - -def _get_feats_wide(index, - ts, - scale = True, - features = [acf_features, arch_stat, crossing_points, - entropy, flat_spots, heterogeneity, holt_parameters, - lumpiness, nonlinearity, pacf_features, stl_features, - stability, hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst]): - seasonality = ts['seasonality'].item() - y = ts['y'].item() - y = np.array(y) - - if scale: - y = scalets(y) - - c_map = ChainMap(*[dict_feat for dict_feat in [func(y, seasonality) for func in features]]) - - return pd.DataFrame(dict(c_map), index = [index]) - -def tsfeatures_wide(ts: pd.DataFrame, - features: List[Callable] = [acf_features, arch_stat, crossing_points, - entropy, flat_spots, heterogeneity, - holt_parameters, lumpiness, nonlinearity, - pacf_features, stl_features, stability, - hw_parameters, unitroot_kpss, unitroot_pp, - series_length, hurst], - scale: bool = True, - threads: Optional[int] = None) -> pd.DataFrame: - """Calculates features for time series. - - Parameters - ---------- - ts: pandas df - Pandas DataFrame with columns ['unique_id', 'seasonality', 'y']. - Wide panel of time series. - features: iterable - Iterable of features functions. - scale: bool - Whether (mean-std)scale data. - threads: int - Number of threads to use. Use None (default) for parallel processing. - - Returns - ------- - pandas df - Pandas DataFrame where each column is a feature and each row - a time series. - """ - partial_get_feats = partial(_get_feats_wide, scale=scale, - features=features) + partial_get_feats = partial( + _get_feats, freq=freq, scale=scale, features=features, dict_freqs=dict_freqs + ) with Pool(threads) as pool: - ts_features = pool.starmap(partial_get_feats, ts.groupby('unique_id')) + ts_features = pool.starmap(partial_get_feats, ts.groupby("unique_id")) - ts_features = pd.concat(ts_features).rename_axis('unique_id') + ts_features = pd.concat(ts_features).rename_axis("unique_id") ts_features = ts_features.reset_index() return ts_features diff --git a/tsfeatures/tsfeatures_r.py b/tsfeatures/tsfeatures_r.py deleted file mode 100644 index 669f4cf..0000000 --- a/tsfeatures/tsfeatures_r.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from typing import List - -import pandas as pd -import rpy2.robjects as robjects -from rpy2.robjects import pandas2ri - -def tsfeatures_r(ts: pd.DataFrame, - freq: int, - features: List[str] = ["length", "acf_features", "arch_stat", - "crossing_points", "entropy", "flat_spots", - "heterogeneity", "holt_parameters", - "hurst", "hw_parameters", "lumpiness", - "nonlinearity", "pacf_features", "stability", - "stl_features", "unitroot_kpss", "unitroot_pp"], - **kwargs) -> pd.DataFrame: - """tsfeatures wrapper using r. - - Parameters - ---------- - ts: pandas df - Pandas DataFrame with columns ['unique_id', 'ds', 'y']. - Long panel of time series. - freq: int - Frequency of the time series. - features: List[str] - String list of features to calculate. - **kwargs: - Arguments used by the original tsfeatures function. - - References - ---------- - https://pkg.robjhyndman.com/tsfeatures/reference/tsfeatures.html - """ - rstring = """ - function(df, freq, features, ...){ - suppressMessages(library(data.table)) - suppressMessages(library(tsfeatures)) - - dt <- as.data.table(df) - setkey(dt, unique_id) - - series_list <- split(dt, by = "unique_id", keep.by = FALSE) - series_list <- lapply(series_list, - function(serie) serie[, ts(y, frequency = freq)]) - - if("hw_parameters" %in% features){ - features <- setdiff(features, "hw_parameters") - - if(length(features)>0){ - hw_series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) - names(hw_series_features) <- paste0("hw_", names(hw_series_features)) - - series_features <- suppressMessages(tsfeatures(series_list, features, ...)) - series_features <- cbind(series_features, hw_series_features) - } else { - series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) - names(series_features) <- paste0("hw_", names(series_features)) - } - } else { - series_features <- suppressMessages(tsfeatures(series_list, features, ...)) - } - - setDT(series_features) - - series_features[, unique_id := names(series_list)] - - } - """ - pandas2ri.activate() - rfunc = robjects.r(rstring) - - feats = rfunc(ts, freq, features, **kwargs) - pandas2ri.deactivate() - - renamer={'ARCH.LM': 'arch_lm', 'length': 'series_length'} - feats = feats.rename(columns=renamer) - - return feats - -def tsfeatures_r_wide(ts: pd.DataFrame, - features: List[str] = ["length", "acf_features", "arch_stat", - "crossing_points", "entropy", "flat_spots", - "heterogeneity", "holt_parameters", - "hurst", "hw_parameters", "lumpiness", - "nonlinearity", "pacf_features", "stability", - "stl_features", "unitroot_kpss", "unitroot_pp"], - **kwargs) -> pd.DataFrame: - """tsfeatures wrapper using r. - - Parameters - ---------- - ts: pandas df - Pandas DataFrame with columns ['unique_id', 'seasonality', 'y']. - Wide panel of time series. - features: List[str] - String list of features to calculate. - **kwargs: - Arguments used by the original tsfeatures function. - - References - ---------- - https://pkg.robjhyndman.com/tsfeatures/reference/tsfeatures.html - """ - rstring = """ - function(uids, seasonalities, ys, features, ...){ - suppressMessages(library(data.table)) - suppressMessages(library(tsfeatures)) - suppressMessages(library(purrr)) - - series_list <- pmap( - list(uids, seasonalities, ys), - function(uid, seasonality, y) ts(y, frequency=seasonality) - ) - names(series_list) <- uids - - if("hw_parameters" %in% features){ - features <- setdiff(features, "hw_parameters") - - if(length(features)>0){ - hw_series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) - names(hw_series_features) <- paste0("hw_", names(hw_series_features)) - - series_features <- suppressMessages(tsfeatures(series_list, features, ...)) - series_features <- cbind(series_features, hw_series_features) - } else { - series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) - names(series_features) <- paste0("hw_", names(series_features)) - } - } else { - series_features <- suppressMessages(tsfeatures(series_list, features, ...)) - } - - setDT(series_features) - - series_features[, unique_id := names(series_list)] - - } - """ - pandas2ri.activate() - rfunc = robjects.r(rstring) - - uids = ts['unique_id'].to_list() - seasonalities = ts['seasonality'].to_list() - ys = ts['y'].to_list() - - feats = rfunc(uids, seasonalities, ys, features, **kwargs) - pandas2ri.deactivate() - - renamer={'ARCH.LM': 'arch_lm', 'length': 'series_length'} - feats = feats.rename(columns=renamer) - - return feats diff --git a/tsfeatures/utils.py b/tsfeatures/utils.py index 40c8e6f..7bb7e37 100644 --- a/tsfeatures/utils.py +++ b/tsfeatures/utils.py @@ -1,27 +1,39 @@ -#!/usr/bin/env python -# coding: utf-8 +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_utils.ipynb. +# %% auto 0 +__all__ = ['FREQS', 'WWWusage', 'USAccDeaths', 'scalets', 'poly', 'embed', 'terasvirta_test', 'hurst_exponent', 'ur_pp', + 'lambda_coef_var'] + +# %% ../nbs/00_utils.ipynb 3 import numpy as np import statsmodels.api as sm +from fastcore.test import * -from scipy.signal import periodogram, welch -np.seterr(divide='ignore', invalid='ignore') +# from scipy.signal import periodogram, welch -################################################################################ -########### GENERAL UTILS ###################################################### -################################################################################ +# %% ../nbs/00_utils.ipynb 5 +FREQS = {"H": 24, "D": 1, "M": 12, "Q": 4, "W": 1, "Y": 1} -FREQS = {'H': 24, 'D': 1, - 'M': 12, 'Q': 4, - 'W': 1, 'Y': 1} +# %% ../nbs/00_utils.ipynb 6 +def scalets(x: np.array) -> np.array: + """Mean-std scale a time series. -def scalets(x: np.array) -> float: - """Mean-std scale.""" - scaledx = (x - x.mean()) / x.std(ddof=1) + Scales the time series x by removing the mean and dividing by the standard deviation. - return scaledx + Parameters + ---------- + x : np.array + The input time series data. + Returns + ------- + np.array + The scaled time series values. + """ + return (x - x.mean()) / x.std(ddof=1) + +# %% ../nbs/00_utils.ipynb 7 def poly(x: np.array, p: int) -> np.array: """Returns or evaluates orthogonal polynomials of degree 1 to degree over the specified set of points x: @@ -38,10 +50,11 @@ def poly(x: np.array, p: int) -> np.array: ---------- https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/poly """ - X = np.transpose(np.vstack(list((x ** k for k in range(p + 1))))) + X = np.transpose(np.vstack([x**k for k in range(p + 1)])) return np.linalg.qr(X)[0][:, 1:] +# %% ../nbs/00_utils.ipynb 8 def embed(x: np.array, p: int) -> np.array: """Embeds the time series x into a low-dimensional Euclidean space. @@ -56,15 +69,10 @@ def embed(x: np.array, p: int) -> np.array: ---------- https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/embed """ - x = np.transpose(np.vstack(list((np.roll(x, k) for k in range(p))))) - x = x[p - 1:] - - return x - -################################################################################ -####### CUSTOM FUNCS ########################################################### -################################################################################ + x = np.transpose(np.vstack([np.roll(x, k) for k in range(p)])) + return x[p - 1 :] +# %% ../nbs/00_utils.ipynb 9 def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float: """Generically computes Teraesvirta's neural network test for neglected nonlinearity either for the time series x or the regression y~x. @@ -87,7 +95,8 @@ def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float: ---------- https://www.rdocumentation.org/packages/tseries/versions/0.10-47/topics/terasvirta.test """ - if scale: x = scalets(x) + if scale: + x = scalets(x) size_x = len(x) y = embed(x, lag + 1) @@ -100,7 +109,7 @@ def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float: ols = sm.OLS(y, X).fit() u = ols.resid - ssr0 = (u ** 2).sum() + ssr0 = (u**2).sum() X_nn_list = [] @@ -122,12 +131,13 @@ def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float: ols_nn = sm.OLS(u, X_nn).fit() v = ols_nn.resid - ssr = (v ** 2).sum() + ssr = (v**2).sum() stat = size_x * np.log(ssr0 / ssr) return stat +# %% ../nbs/00_utils.ipynb 10 def hurst_exponent(x: np.array) -> float: """Computes hurst exponent. @@ -146,12 +156,10 @@ def hurst_exponent(x: np.array) -> float: y = x.cumsum() # marginally more efficient than: np.cumsum(sig) mean_t = y / t # running mean - s_t = np.sqrt( - np.array([np.mean((x[:i + 1] - mean_t[i]) ** 2) for i in range(n)]) - ) - r_t = np.array([np.ptp(y[:i + 1] - t[:i + 1] * mean_t[i]) for i in range(n)]) + s_t = np.sqrt(np.array([np.mean((x[: i + 1] - mean_t[i]) ** 2) for i in range(n)])) + r_t = np.array([np.ptp(y[: i + 1] - t[: i + 1] * mean_t[i]) for i in range(n)]) - with np.errstate(invalid='ignore'): + with np.errstate(invalid="ignore"): r_s = r_t / s_t r_s = np.log(r_s)[1:] @@ -161,6 +169,7 @@ def hurst_exponent(x: np.array) -> float: return hurst_exponent +# %% ../nbs/00_utils.ipynb 11 def ur_pp(x: np.array) -> float: """Performs the Phillips and Perron unit root test. @@ -179,7 +188,7 @@ def ur_pp(x: np.array) -> float: lmax, _ = divmod(lmax, 1) lmax = int(lmax) - y, y_l1 = x[1:], x[:n - 1] + y, y_l1 = x[1:], x[: n - 1] n -= 1 @@ -187,16 +196,16 @@ def ur_pp(x: np.array) -> float: model = sm.OLS(y, y_l1).fit() my_tstat, res = model.tvalues[0], model.resid - s = 1 / (n * np.sum(res ** 2)) - myybar = (1 / n ** 2) * (((y - y.mean()) ** 2).sum()) - myy = (1 / n ** 2) * ((y ** 2).sum()) + s = 1 / (n * np.sum(res**2)) + myybar = (1 / n**2) * (((y - y.mean()) ** 2).sum()) + myy = (1 / n**2) * ((y**2).sum()) my = (n ** (-3 / 2)) * (y.sum()) idx = np.arange(lmax) coprods = [] for i in idx: - first_del = res[i + 1:] - sec_del = res[:n - i - 1] + first_del = res[i + 1 :] + sec_del = res[: n - i - 1] prod = first_del * sec_del coprods.append(prod.sum()) coprods = np.array(coprods) @@ -212,6 +221,7 @@ def ur_pp(x: np.array) -> float: return test_stat +# %% ../nbs/00_utils.ipynb 12 def lambda_coef_var(lambda_par: float, x: np.array, period: int = 2): """Calculates coefficient of variation for subseries of x. @@ -247,22 +257,181 @@ def lambda_coef_var(lambda_par: float, x: np.array, period: int = 2): return value -################################################################################ -####### TS ##################################################################### -################################################################################ - -WWWusage = [88,84,85,85,84,85,83,85,88,89,91,99,104,112,126, - 138,146,151,150,148,147,149,143,132,131,139,147,150, - 148,145,140,134,131,131,129,126,126,132,137,140,142,150,159, - 167,170,171,172,172,174,175,172,172,174,174,169,165,156,142, - 131,121,112,104,102,99,99,95,88,84,84,87,89,88,85,86,89,91, - 91,94,101,110,121,135,145,149,156,165,171,175,177, - 182,193,204,208,210,215,222,228,226,222,220] - -USAccDeaths = [9007,8106,8928,9137,10017,10826,11317,10744,9713,9938,9161, - 8927,7750,6981,8038,8422,8714,9512,10120,9823,8743,9129,8710, - 8680,8162,7306,8124,7870,9387,9556,10093,9620,8285,8466,8160, - 8034,7717,7461,7767,7925,8623,8945,10078,9179,8037,8488,7874, - 8647,7792,6957,7726,8106,8890,9299,10625,9302,8314, - 8850,8265,8796,7836,6892,7791,8192,9115,9434,10484, - 9827,9110,9070,8633,9240] +# %% ../nbs/00_utils.ipynb 14 +WWWusage = [ + 88, + 84, + 85, + 85, + 84, + 85, + 83, + 85, + 88, + 89, + 91, + 99, + 104, + 112, + 126, + 138, + 146, + 151, + 150, + 148, + 147, + 149, + 143, + 132, + 131, + 139, + 147, + 150, + 148, + 145, + 140, + 134, + 131, + 131, + 129, + 126, + 126, + 132, + 137, + 140, + 142, + 150, + 159, + 167, + 170, + 171, + 172, + 172, + 174, + 175, + 172, + 172, + 174, + 174, + 169, + 165, + 156, + 142, + 131, + 121, + 112, + 104, + 102, + 99, + 99, + 95, + 88, + 84, + 84, + 87, + 89, + 88, + 85, + 86, + 89, + 91, + 91, + 94, + 101, + 110, + 121, + 135, + 145, + 149, + 156, + 165, + 171, + 175, + 177, + 182, + 193, + 204, + 208, + 210, + 215, + 222, + 228, + 226, + 222, + 220, +] + +USAccDeaths = [ + 9007, + 8106, + 8928, + 9137, + 10017, + 10826, + 11317, + 10744, + 9713, + 9938, + 9161, + 8927, + 7750, + 6981, + 8038, + 8422, + 8714, + 9512, + 10120, + 9823, + 8743, + 9129, + 8710, + 8680, + 8162, + 7306, + 8124, + 7870, + 9387, + 9556, + 10093, + 9620, + 8285, + 8466, + 8160, + 8034, + 7717, + 7461, + 7767, + 7925, + 8623, + 8945, + 10078, + 9179, + 8037, + 8488, + 7874, + 8647, + 7792, + 6957, + 7726, + 8106, + 8890, + 9299, + 10625, + 9302, + 8314, + 8850, + 8265, + 8796, + 7836, + 6892, + 7791, + 8192, + 9115, + 9434, + 10484, + 9827, + 9110, + 9070, + 8633, + 9240, +] From 61112fce6dd3e367002bd551afa9240fa3b4a668 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:02:42 +0100 Subject: [PATCH 02/52] remove pre-commit and .ruff --- .pre-commit-config.yaml | 34 --------------------- .ruff.toml | 66 ----------------------------------------- 2 files changed, 100 deletions(-) delete mode 100644 .pre-commit-config.yaml delete mode 100644 .ruff.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 1d115a9..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Configures pre-commit hooks for this repository -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files - - # specific nbdev hooks, cleans notebooks - - repo: https://github.com/fastai/nbdev - rev: 2.3.13 - hooks: - - id: nbdev_clean - - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.1.10 - hooks: - # run the linter with the .ruff.toml file for config - # fix all fixable errors - # - id: ruff - # args: [".", "--config", ".ruff.toml", "--fix"] - - # # run the formatter with the .ruff.toml file for config - - id: ruff-format - args: [".", "--config", ".ruff.toml"] - - # export the relevant files as modified by ruff - - repo: https://github.com/fastai/nbdev - rev: 2.3.13 - hooks: - - id: nbdev_export diff --git a/.ruff.toml b/.ruff.toml deleted file mode 100644 index 99a99d3..0000000 --- a/.ruff.toml +++ /dev/null @@ -1,66 +0,0 @@ -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv", - # custom added below - "tsfeatures/", - "setup.py", - "_modidx.py", -] - -# extende the inclusion list to also include jupyternotebooks -extend-include = ["*.ipynb"] - - -# I -> enable isort -# N -> pep8 naming -# PD -> pandas-vet - -extend-select = ['I', 'N', 'PD'] - -# Same as Black. -line-length = 88 -indent-width = 4 - - -[lint] -# Allow fix for all enabled rules (when `--fix`) is provided. -fixable = ["ALL"] - -ignore = ["F403", "F405"] - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[format] - -# Like Black, use double quotes for strings. -quote-style = "double" - -# Like Black, indent with spaces, rather than tabs. -indent-style = "space" - -# Like Black, respect magic trailing commas. -skip-magic-trailing-comma = false - -# Like Black, automatically detect the appropriate line ending. -line-ending = "auto" From a8d3b1e81dbe44c8a37615885656c182ba9a6387 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:02:49 +0100 Subject: [PATCH 03/52] update lincense --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 3b106e8..c9ab762 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2022, fastai + Copyright 2022 Nixtla Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 527f3d9835d5419de16fbc7e5f4963dc5f2119fa Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:02:59 +0100 Subject: [PATCH 04/52] restore .github folder --- .github/ISSUE_TEMPLATE/bug-report.yml | 62 ++++++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 ++ .../ISSUE_TEMPLATE/documentation-issue.yml | 26 ++++++++ .github/ISSUE_TEMPLATE/feature-request.yml | 25 +++++++ .github/images/y_train.png | Bin 0 -> 54277 bytes .github/release-drafter.yml | 19 ++++++ .github/workflows/python-package.yml | 39 +++++++++++ .github/workflows/python-publish.yml | 31 +++++++++ 8 files changed, 207 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug-report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/documentation-issue.yml create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yml create mode 100644 .github/images/y_train.png create mode 100644 .github/release-drafter.yml create mode 100644 .github/workflows/python-package.yml create mode 100644 .github/workflows/python-publish.yml diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 0000000..f54ec71 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,62 @@ +name: Bug report +title: "[] " +description: Problems and issues with code of the library +labels: [bug] +body: + - type: markdown + attributes: + value: | + Thank you for reporting the problem! + Please make sure what you are reporting is a bug with reproducible steps. To ask questions + or share ideas, please post on our [Slack community](https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ) instead. + + - type: textarea + attributes: + label: What happened + What you expected to happen + description: Describe 1. the bug 2. expected behavior 3. useful information (e.g., logs) + placeholder: > + Please provide the context in which the problem occurred and explain what happened. Further, + please also explain why you think the behaviour is erroneous. It is extremely helpful if you can + copy and paste the fragment of logs showing the exact error messages or wrong behaviour here. + + **NOTE**: please copy and paste texts instead of taking screenshots of them for easy future search. + validations: + required: true + + - type: textarea + attributes: + label: Versions / Dependencies + description: Please specify the versions of the library, Python, OS, and other libraries that are used. + placeholder: > + Please specify the versions of dependencies. + validations: + required: true + + - type: textarea + attributes: + label: Reproduction script + description: > + Please provide a reproducible script. Providing a narrow reproduction (minimal / no external dependencies) will + help us triage and address issues in the timely manner! + placeholder: > + Please provide a short code snippet (less than 50 lines if possible) that can be copy-pasted to + reproduce the issue. The snippet should have **no external library dependencies** + (i.e., use fake or mock data / environments). + + **NOTE**: If the code snippet cannot be run by itself, the issue will be marked as "needs-repro-script" + until the repro instruction is updated. + validations: + required: true + + - type: dropdown + attributes: + label: Issue Severity + description: | + How does this issue affect your experience as user? + multiple: false + options: + - "Low: It annoys or frustrates me." + - "Medium: It is a significant difficulty but I can work around it." + - "High: It blocks me from completing my task." + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..4a9684d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Ask a question or get support + url: https://join.slack.com/t/nixtlacommunity/shared_invite/zt-1h77esh5y-iL1m8N0F7qV1HmH~0KYeAQ + about: Ask a question or request support for using a library of the nixtlaverse diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.yml b/.github/ISSUE_TEMPLATE/documentation-issue.yml new file mode 100644 index 0000000..1015b68 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation-issue.yml @@ -0,0 +1,26 @@ +name: Documentation +title: "[] " +description: Report an issue with the library documentation +labels: [documentation] +body: + - type: markdown + attributes: + value: Thank you for helping us improve the library documentation! + + - type: textarea + attributes: + label: Description + description: | + Tell us about the change you'd like to see. For example, "I'd like to + see more examples of how to use `cross_validation`." + validations: + required: true + + - type: textarea + attributes: + label: Link + description: | + If the problem is related to an existing section, please add a link to + the section. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml new file mode 100644 index 0000000..71415a6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -0,0 +1,25 @@ +name: Library feature request +description: Suggest an idea for a project +title: "[] " +labels: [enhancement, feature] +body: + - type: markdown + attributes: + value: | + Thank you for finding the time to propose a new feature! + We really appreciate the community efforts to improve the nixtlaverse. + + - type: textarea + attributes: + label: Description + description: A short description of your feature + + - type: textarea + attributes: + label: Use case + description: > + Describe the use case of your feature request. It will help us understand and + prioritize the feature request. + placeholder: > + Rather than telling us how you might implement this feature, try to take a + step back and describe what you are trying to achieve. diff --git a/.github/images/y_train.png b/.github/images/y_train.png new file mode 100644 index 0000000000000000000000000000000000000000..18e0d3c1153469bb4de597ea0cdbe2b018b929db GIT binary patch literal 54277 zcmdqIQ+Opo|EQaZZQIU76KAsGWMbQ#*v5)8v2EM-#I|kQPIl(|_TJCAJQwHrUz~N* zPj?r(R##X3>g~5fwE=e!{G|l_ z_49-_`t!Aq<0zu$sAyy2=%R0L3}R|+V`a?XU}$e_Z0+#d#_UW(*f8^5mU0-pKfZ{hP2XR}ru z?oQ|J>UdPV>sVAZ=vZ_tu7m++e@F`vBSV7&EIztct@!KqicnCo#r5=veY>;-eQ%xd z=IqcbuuBekn4+UoTBtik+mlDbkyTUHIbxM{8;cVBfVg}zWc8H{frWN^rgMqlopz>S zzWiTBa)bAtvuCTn48~^Ut`^?mWsI=Im45aD;Ex|t$&jHx`djMS7CU34 z&OO>9ntx&kQ!zO`oi_>+Bfn`OTa*4P>WLN-(Y`QnO>GLlZeV6FQsEI|lDr9Ga)h38 zf#LT?R!EY;lyvY=ouPF!?P zX;Y|e{dCXTIA(MLqr*@#S7c1r+^Fqd0J=(t>9tpm`W53Jzx@v7d@1bX(j+n*4p#s4 zI9S>Y8`SxpkHthL!yfD&AdcXW7fHaIJ^3!`tXe}Ibnpr@*OU5Aef32bm`(j-%xEa@ zTE0Rwq0S~1dF#EImGEm@NUQk${M0jAcr!)gN3$r&+hl zpJv>vx{?EH4H1M#!$DC=(VQGurxEa_OEL#>_fE`3rj&o~1G6Y#3oK&AS9bV{YxSQw zgTny{25ASEKTnpm6bg-_ZEWUtb?fzM8ue-T{(khrE#HXJ>YuAs0UNQ$`l67?ESzX) z^$E4pySwz)CeRV3OF@+;A+ty9vFpTuHY&GC4eY)sS*m`t7bT%NwW1Pz8x=Rod%3>lrs5hP_Tj$GqMILJ8B=m6Bw5keEK+sgw}3MfsBj znvGWcV)Tv#k(e@(VKS`Rmt*WZOX1j7nSH4_d_+#AayK68WHixm&ru{RzZ<*nVrP{= zY`_xxOt z55RUNYp$l;mq>Y;CK>hhr3p(*>doOjtfAH*R=Qg8`8l#N9DO zOfdiO=Z~$R#eAJ)_qk6+Wv?UQ^LZuRD#Cg3fN~t`2Be%4S6#UJY9)!(Qjki>3gYrMdiB zc8C{`<01bL56h#7iGG#l5Q^CW6h88Rm}^CG&6<$+{>BFf4_IsBpXg$q>oXINf%wlW z_ieS&J+0j~m)JXm=0v|Pwo(LK@+slu6h0SXZ;cde8XMY)h9q zzwi8gaH1E-@BP!~gwDk~h*giG0DiqY_F_|%(DSAMVwr`tY5Og8pbu?YX10#~l)bWK zafP-dZFbp89v4D3E<~e0xF#H}dE+Rx)SVE*DdE_J-zAk4rR4;BHS_nYNcurc^g5ki z>5uzu9pz^FiHv8r>0h~Z;LVV*ihyzf)E~9+Xu6uz z@1+yk`8(7bed&&9VytLKM;f0FJ-mFG)g1KkrF!r6AfuOlHRxGWZ3IG0c6Rz4lDYFw zysWlF%5ysAXP|a+&t6a$wwG49su^^B>e2-b*L4I+D(-f(;uot@1f00%Itr5-HEfvB zJ~EV29L|R&OA{NBA{sb`m+nc)?hBy}@?ff64fX?Z?|wjicl|T%N6mf{tCNKqZ%sD` z)hMGrUT)t#in8U}Q(_9>cM`@tZv=^FMLr0=iwCs&*v@Nmco&4r2~342ClnDXZJY1p znNPmwkH3lf`3H_v0UwT@>T7yMk?!0{Ym7WoWxwGK#W(R}5Bs*?5%~W8UdYVsU|N6) zG0%CDy>QTl5_`aJsWIOK5n}2EDtFZ_zFeb(u4%K&z;R9H$0TNUm#R8YztjY}-NXp! z>)(foZDQHy3-A%(yS<91c|B1+QFS9rErH$dIt?Da;yJHKptL}BSi&`6Mc=N_CgvAJ z-)P7Sbw`H>P*5_pTT4&QN@+|DV27k&M_7U=(Lva>3)O%UqhR7KRvy+pC)f97Lj{u_ zVjS_P(>kctR`Yau89>M8C1+IaXRlcW)ZG(Uv`P4POL?ZJl(S(f@zZDxrU$|fMbIZp@Y6j#(C`4&C)R1cC4G}-S>>a7yEw*UhdyAo8pAK^@UHH#y%Dqt z&+3+MN&>kVqq?@iIhVz76QeVxp#Va?;j;_Q!l)i1p1@_iVJC6%X07ZC0(UB?z|~r6 zIgsNY4(TOQZbMR4mDvj`)%asOLlldO7jR_dVsXK6E7)#Eyvze&dmqILsk+&^XWa2m zVyiE#bwN+%tOLzTg%|V&vx~IZ2Jdc99 z*AuChpReyGVj8$AO}^wK;cLfEW_!gau!)77A`0;7USVmN-5n5!F`n1%1e zgA|aZuhcx$DM>nwEQWU97w9T8umaO?$r+!;>MYl_#?Bj{WFz&rV}HwhpI9_y0$|1@ z=;W{3Frhx5XzgpImkn&RmQRYxk-> znGlu%(L!1#2!X6IUFrjspV|Y|%cFu!diY(L?TZQ_cm{$Ou#Rk~Z*ri=DQ@N7Kd_$% zT#y=yRq+PcezvCizp~dcF~?RDlhuMNsN&52h2F{ZPH6Yi6hGSRY^#ypU958$72NU0 z39l*3Uj^_+23GCYaM?k3SWBJGQ$E0bD9411E%i4ZqW(Fu5su|QF(W;jHqv?D)Xx7X zD%t2bdMoL!{4tx)fWLN23zzkWyu1shr7S-Se`;3zDq0*Bvwftym*g zjj~p^XfP0<-5KEteodh@W%Ft`4#O3zGUnlwPNJ_~np=^`{trFK!?m&!YR+xvU-7*Q zpA6mWqKWNaudCloD353MJ3L2p01`-0Uw+%wm?P`>m~`0EZgJZo+Z|H|awo?@ugiDF zHXr)0te~4Ha6@0|h>;EctrS52lZlxB&wrx)|9va~`|E@+$p70@AiY#sp7w24|B4gf zxz+u=*BuWEOZiWUeJBh6sf-ou-!5_AzqJH7H1oft-vr^mYT@@m{{Q$C2)1?8HvquW zh$2#7xn_l2_=J`WPx|iE#$;%S6LmmD@8m=%HZ(B308Y*6Z|Dj~N;}dlOxwAQN`GQk zMkk59}tXAMSgK3HS zX!%Sto0DW*L+7am`k#$b8_!SV`87WFgT3W-C!p{qf{u0*Fzb6r+pLq9udg z9KY#Jsih-u6TSp~HCe_QnSN^KqcvPg`I(fEKe5O2i%d`fa@G+f6=PX``}~wC zZh`5%Rs}Dx;yV7S;$w{xj`TEJ&CN<~2FR#OnlO*}9PiGme+W z`73Zk%dK(mSy_M0Rr#IwDVP)XrpNaSAnqTYPphVi( zYtPpON4ly+1qK{-vGXX$jW5VqE6OoA_8VPr0H#Jj*6x8;^O9U0<>E$7YHo`hYCw{7 zRS@(2@_havghxet7Gn==`Nsn-!UPbi(rLzNEy03n=&1403!*tfD+tmyj&(Ab!* z*Wj5b^9j$d70@EY86b0_a?EJVI#92DQEDmS09Co#%c@_%bPZ0nVvNGd1oGKb`N*97 zsWWG2!rXEb!)|Mp6wy=e?+L-z2(cOq&%=2mMGALQUJQw&I3!2wLwzH{jum1nS?G7z5dyrk6e9p0;&XYp(wonT2=-kY!g)TM^I)p%sC^o`5v>~kGG z5x+WTq+6I5eX^KP&ZAq#i8JPIZPAjlT4%65MG; zsn+b}DUQ@qhmJ&{BGW)(aj`jxMIqg!wsbNPwnUf3e0yx)dAtQi zemLyJ;)=IkPb=ZBublP;&O1Z|(FlxMuO?CK0MSX^IU5dhNo_aj7435h(F$kR4<-g; zVtzd2HYFZQhtXrCtlxCIkQU#*ebbneq~eYDbt!$ETnxI0n-YC{s8-Q*xhC3Q85o58 z{gq;+dQa-SZ*z_G?JA`4hWTCH-}G{}UH$Z0y``?9`~B+z|Bzb^maM$^bY)FHQ@)g$frJA8{VgV}p*JE^ zRkKw-jft8c?ubKBQ++O$!?sLomJi4*QH2#Xw)mj7HgdnhWwlAF`s(Uh^Dw-*o|iGD z(t5JFgvB>K(b5ojGO1#Y1_|~18(40ALIJ|<%#C|l(yEUSh}BiD>U)SJuk)b)Qph|VhRbipqJYqX#X{Bp5Ya8{p8{Wvw z{;&ONY44T6;=t9SaU$Goh&#M8tGB|Bn?uHCVN$28g&!&v&QDFP{E_cMEm1+gE5>vy z8V`S|daa$5@afDVPA-ZV^+{5BHtnlCzv`>DL`)@mALQJ=zUo_jFlkFAd8+_h9;Q6C zmkbuo_X$@7o`Y)KTP6W(*=w3wHf+*7O-_F7|2oRG0batWE{oGP0lVhv;dYJbhIR>q z7lq=g%gEx$GD{!m4MCWc+DKV`RH8sbY@0D45^{STMsj)JbrlQMt|^J!8f@KvN3DY@ zYCdj^?>TvX7IUNu#7hi?b*dNPefJMi+?D||gzn1@tRjv3Inv&WG3h}*hwXTAM^Icl zal%}Eim?0Dw&c4%vlD1KB7P`R6zEfDw4Ve&p~xFHQru%Eb6zklJ`t_$y_ z2j)*mQEaOcnk%TX`d=jcEs^sgW2L2ng_DnF8XPcu*aGJO;0h&!k6e?b+d8l|)^C7P ztD-CMUu>|K(Jb_eRcn2lZBFU)xI%=Ck~Lk5;y+m?ST?l!f#O&N-;XGh)hJls@_Z*_dOfqM zlJ8sc_5q{lPfSA{R6@hNv8$t048erW{Iaw$XERz;OdN>Nb7ohg+w=C9IR7!UT8+LK zoQw;@ysA`>X=Z)YKfkm4?i5@^d0(xPzZwY-MkA?92mC%^Ym)ZimYulS4zZ-N1cF@3 zOWF~;HsvYbWTRnL;GUub-ecZ)z-<4{3<6$sj3nZAduM|r*_gUR>DTci@-ePYee(ur zLny4c!%15H0r6vl6#Zg^FyLohx8Lo!d`){3Q5fva5H_io<>tI|+ehzFRFNGUf7OQf z%kNt;D0Yx9kYN2%-1v~&MDu5aZA$DK%X2c&rF~zP)Ib5WJ2uq#k=q{6N?1SrFug1YONV7}u}Q7PkozRLE>Ng5U$npR z;(9+SRm-OmEjXKkw_auyVumZVyzxBo3M98c(CXgk?M zd`^eD;Hep+A6ww0RP|T0;#y23nGyYHcTmhx(IKilhed0tTp4IaE15^eFDJ1J#Oz$! zrkPCi_%hmT`_bVrPu!U045g|+n_Mfl51!+g5TdBOrLaZk zMUSCc_RdDO{|;i}2kDUx^r0+&BH{$Rxdwz&VMS}w=*wu0kH)Q~pUNa;orq41-*v{G zYQH;5=ACJvFXqc6yfEMIXLvpx!j^WHW$zuaCrbYy)2)f^@=fjx2JXP`Jd zx9ID#3zvPgHXMDqw_Ls_8GN88&n*f)_g8L@K5RC#uMqgtogtF}#5r2HZ}fbaoxlzO zI^)=RQN+ioONHPL_vNE2#8R@{00yh~WGM^zn3Zf+WA{2Dvc@S6zk2gv9Mj5Tq-6h# z3@%#Mif|0B)Y>SII?*CaSMwg~iN&}Red*}_?C|&t)IqJK4mJz>&FmoaPFE2VnK7i| zKoBb_P1uUvu<#>z^wC;@2xSXkspzeQii3F3ddYMzi|~&jQvVm&Yb>ztsUE3CKI2A!%?b2dvF>%t{7v*RtLJ%G=*pc(o3LJu-RcR*5> zV(O<2X&OjemWyimef0$rM??r!57oG%Ts>tVS4si3J$A`yQ}q=Ik|t;8xPgQOVxwqA z$B!Eb#7^vxH=t8m{OE?ZT4`=ou=vkb!}jSN@*V}}7_8*}0fJYW#`NqXF`PIzPKU(_ zqm$owlD9rG9ZV14ax~&ZgK!aDt?b~eYz~^ejOCiKfTDzQEOv$bA>pp}jQQQu%b54^ zKHd%pwB6(<^KWGf2dFI+zA9eXUSUgoLwYjqh>2yQq)C?B^F=+W+^*y)7pU0_xvmZs zTozxG6DZ62zNUq2Ezp)#hB76YW*VGmtmj1;r9ici9hk~bm2|FZ1j%>g_HFsDOxzEl zK&^9@gCTjv)snhz3sHvNlq-NqR+I5lnJI{w%@9wK6&I=)zob6r(&YG%qBh}; zOhw5*L_V+A(>2-O?@suj-6}AfQ>E*J1x7$eW8q%I{JgQyz*hY--sApSEEoMAif7)Z z$W3IOwC;{50xN~WFHcdOC}ZWv%&{P?a&{Bo5VliM z09RxTdvd+qZI4>2O1&Br%T7?5xX_=fxY0!+`Nop3@w-W3XJd--w2p}lBw8UKs+MDd zA`mG^r%9g&Nt{yZ z8QNa;Har`Mc%M!AmHw+C+^tK4%A+z~K3?Y?rFe!@o3gS?jbk=8=S>omp&4HzVNAYh zr-Xf-$C)(zkI}0Juu)YN>~sBqcPp9#SiK*t`Iij1hf4C_OT9fIH}cYd?Iwqh{c#5ONLe*>cZW_W z^2r%U7B_y(+1XNwd4nH+VCQzwpOxxnWNSej^VxX3uV+3?7+_j9{r$U_<`Unz1qhXY zl*~qV*aAa>^8j2N{w;zPHxYeTegC8K%n}w}kCWf7GB?-ZRS=)PUQi54#=J|aakrtF z4)}W6fSR2-a8q%p8E-RspAu7pHK0+#vuU;RA(w{}c8GDzczp!F+`3ao z?E2r>&(ECv2k2h9NFcZ{B7x4&0^zoPT1C~IrEGC)C$2x{Me)862;!rMS}11tgBrcP zH9(qb-8oW=-gtH=0{veAHwPKqJ6tPa=Kby2roFFq*P7J6U*ckhVo%BMZA&UvtnqKv zdg>bvS%`=Dry#fKIi=U!8u2*jLPyS|j6YF?%eVqJC>|HxqWsr*@ZBwB4$AYir65Kz z#kmVSfs0<<8GJQDM!Daa*dNPTJ;wr@73dJ=UzYuQhbVqa0_8MQU8FneqNB-g|Gb~x zZ4?%gYLIf1G+6O=frBYw&OggA@=a7F2%Zkl+rV?f4-HqZjN??{_}z_ zEe;If?^4~R6P(^!hG9a$xQ#Sd=2S z3ljkx-+c&xyL1Gd*F&6&V3YtstE_dUrQjYOnz(@gLj7Mep!QwLIi^e~U9i1fP+Da* z_O;FEX`nY(ftEnBgz89pW$%{Im=N`)Xy!ohHimYNg4<$8XFl!Ejd>e~p-!Z4papSIKLd zb>HNDI63ysR@FY6JSf(;XI#H2WP7B;9;3C*fTu?EX!9Y!vii@%ejklgtbvk5| zqAf%g4TiG$ul_M;k~_J;Wfm8=XBH8+YJTp;p9?q^@^LsIdnu zPOc8S#Pj#pQ2}!u&y)&}zSydkYBKCiT(Y^Z{U&~C#AqosVg5oa%gwA=JE01p>8Jyp zD1AS<16zJE6Gp~GLu=NDwQotWxEw0Jt;2*?N)CMpg-E9NtrhKfar@;;-?+HIm)-h> zY{Usě?#n@=DFFdZqk$Ylj$z062%ZM%GExB^1;P2EoGbT+6BLWB(&2vE7)f>N| zmGVWZ(Il1kOPugXs_l{%DXZFtxe@nO4&VFNG2zaHUf-xtn$HVx5t-fC|}%}If^O{|NNcvRa0c_KlG7!;lG>z2{8G; zNhe59nQGOBHQqn?IW^&UJX5f2%gp+Zi2D8)h5nD=(K90e&R{Cz93_bYR*JQ^7j&x) zaSqbp%~Ia>6UXNxwPQT2bJr8R1z)jXhbea(;&;-!#x8Y?S1~-b7Xx+*HdL}iuc5>c z?+u$S^TVap<+G0?R+-QKZsVc=W~TVyJStYN73ep-y`rFX*1_uBVly0e0a#=Cjqim} zt=fvToOcUpXAHa|{ajGAr~`Z?@ZOw>x}PSXIsYhWVCZA5+wEU@iB2xr2v~0~L2Q;2 z=0YlI;`-nuwW5dEj$33gS!LT!QIV7B;OJ^<%CTgbGmsXxpn4V{Y(lW}?Lv26e7x#* zGwt*_mM=UGTjDV*WmB!x3WzC!@#_Zp>lCw$w?pPAt#}tSxzQ0crPQ^K=*hF;*+ysD zW7tUXi-5272RI@u)yZ{HGU0L59?i_WMC65Q*t(cx@lzNVlQH`N)9ou$Ovhlg$(+BQd$t|e$Ntl>ui2P8=y-a zm!&?DyF@&hfK^PhJaWruxxd7vI_OIEr+Z!ku|jh!v>g_m>aTQP_5rqq0#=-MrO%#N zHWxG*A}xdRTrsg2y}|3ialjgN0B&R_XVzKhLAYGCW@+&;`b4h(YRZOKSEst=^y^@`hlAO~oE8~>n@dnSeAl5jMT(DPU4!F6$KNU>^)R*Z{L75(}1u9B~L|v^Jti?wm=3}A-u*CNs9863_<4>9cvOWzv%Uo z?9JlCrH{d3@Rp{sMtY8h0E9XYxtX=6gVKypR<`1Ilb9rqsA$Z_>$LX*RuV`8nG9_H zqaCob)Pqf?r4l`WUA&8nQJl6~p!pS<=k}>(3g(PBvN^TeIZoyY-DVfa2n}oDHvtR&BQPi^ynum-6CO}>Okfv za^recKbVfPjXc6rbf1jhUA{bxVHa^N=gRTZWX;CLF>&?QtTV(W{7P$s*26GJ{Y zqGq74O&LFznTDQ%W%V`+wJl}g#0rCA?ZhZ-)rY6OjF~JkS061+r=iJQ)(OM# zww`v7iQd_wVd$J=9%qaMtq%@bZU3wXvJnR|c`ZBK3xZ5}fQP9_Q7QSAKGr@T#YTy0 zagPK62y+iM^O89>DZoLf9&Rc{0tw}LcQ@o)GubX(qELQ=6dX}gghox;tJJz2%A*_Q z9<;%H<4+={2KXfea%4 z(N{RRivrlH%Cavn=M$%S)7|))*W+{C{v;*zn5)}In)xaXU98;Rc8krSa~&vT1k?-u zzCEU?!UzsB^HZMp^>y%yqd2~!qs@D7qkF$F(b`x<2SZ_r-un^2fCZ|HY+yS`ipG75 z;mskjKzE6EsW?mRIW)#_w$2h|t1A{s;De7m8ph*XP_+kyS@b1a@lG= z>&*k1@EWM;0$C@dd_Y0^I3(Y5l%ZLhS;Ax}8T@XhxrcvYUIw%0GW? zsKgHQC{=dj^8CXX_$^;fH2h|2SUe}301Z@zC(BD^;YWLi?F;KoiQAa_4Gksytz2Ad z0MivAOnOpDNy+Jg@(AB6JVZ!zvRagNd^EFsfJ8PrQNh@Lq;hDKyfa3APK~ge?XkGf z;LQ*a1o$3qh`HDwNzY*lvCMc`7>ZoTn7(FR_cdm?{O}HcccTA*l#}u7E4nb(gC==^ zu?f>?LgF$2t$Fu!Isj3{_#&|+v*U(3Z!qmec-eCD@=mvRox>*5Iv6uP!}R7q5)wi4 zl6f5nZ!@9qr@n!GUe@wyoEh~kjdR@2vlf-e37l|Djna#g8p%PU(qdq2W`lAv=4mN+ zPa!N%-hvi8+8_&L@K=;>;pt*xm>GXme&nyt{Tetjj^SaF)3tT-R193_P_!oC3Rp6$ zOHN|%uCrNF_uu_IHa=2S4Vs{(rA23QG~DtbMvC$A5QI5~)T+!qOZP6x-T6T~W%`$Y zvQj_@8cc}XmbPxQWheg_=*Vb*Ecfbfnkjm_fUb z_K)5wBvHmkY~Xg$WbF5^$KtDrEopqE%HQ2?#Y)^et9Wd+u6|-|T*r1 zqM05Z9v~@R-g=;m)4R4#7I;e6;tkttI=~ACp)*stdg_LnPz2#G9J!x&KjHqC>^zlK zMV;JZVbO{s@eb3=eor%zlVHDki-c9; zN>`_;;p1o>4dtslK0sxY71uVpVDWKZ-bUpG3JTs$>>KIBWNw&C+rePa+G-12+nG$^ z>)b9RTgvWclb&sd*5p(L=RUA5vY&yP~mPW=*n zXjPdfP=}OzKCT6&)^awMxH$0Qrgoa{wf^G*m7r<7p|Sx$^4&FMZp>Kk{?{yc7ZRL1 zYf+~U$FvADD0eYt{djnDh-DlLYCD%e?FyqyI6BuyPcM#BZFqFPmFMruL%b~e%zX7;_pY(j`8LLD9Nt$Vrd=K2R9sp%61=P2$#h-VcI`XL>ga|~D>>#-_MPvsZ924r zRw&YS0o$)Ktqbdd zJ2Z{<=X^F4T;mV2KTpPJf|51}yy(kQQ_Al%bv5Y-9 zKk&?(_^1rdqPToUaXc+qRXY5BbNBncwv&+bz3r@VixGv}frkgIcDeP`OQMq|Z?9_o zh?O{9y)4tnK0gEF@>@pu(>;hBS0ZyvLVtuwa0X~fyP4#F=_iN~a@=Qi*k5g%Xs_ac zS{sSt`ajRSd0V0;LYB8Jd_OVT>k-*`tBPFk*MEgy zaK!2brxn#5;J)JiT;=x_bk8<1adp`Fj7*Q*`Uq!vbZ`M_pO5lrmHYunQLy*+Q9Nw; zqpq>_?X=M0#?M5$MuOQ`5G|G2Z+e3pzvrlFUan@Ih`geyg*t2rp|;F)QIuKKehB6D45-+5b+k2^;&N$k%}eM2L`*Njh|;x>s0s4!Rv3GcQ0>G zhb02I-e2AunekgfLY8RcV`Pd278#l;$D2aiQY}Cw@tyZ(X9q|%&@P;oYRJD=TQ^R! z?;|IEDQ&<~|HJcFRkL6d@F*J51#RB5K0Y2R%HZ=6g?{CZWfqE!L-c{lh@XSHYTMBb zLpbP1$G#K}6(g88*gb@vt<`&jLuWfEPmVN?b%S51jS>?$D$C!+%Nd)BwRp4_VmWut zbP|F+WMsG@7kV~)kA0lR#4_SwR*!){t0z@ffHQpUruh10$z_jDv)|)TZCvi($Q{t5 zT-BG9M`ceu-u*ky?_E4rJS5#r#3104Hy{ZprVjN*lPTmRTt9nL3%9WluRly-#YqtO zWE3Ro)N7~%>lYCY0qk@`_I+}Qav>2BaTPe>p<LsxRMK;<$h2O{v%vE<;*EqZeF)k z4rCZGb*cmW*)3GA4UA816HIBapRR!W1|q=AzK`+-4?C#iky&xkChV*j=`z1<*M_1Tas{hW$9vTOm zO!-DN`X+Hz^|uohu7o!@;#HNd?FmZ112BDK2Mtj(u+H!7nU2$amkd84)dAycDeNf= z?fdiO7WXENffwt%_U27zrjTLY6y$5U8uZj#Ksz#W{vzOYt7|G5>w+~}a~KqNlj)tj z-W|BH2v5t3mQvpd-w*~Md4EeKunV48>&X?@(H6^7yyayX-_oIRUqy3gJ0eWWniz@=0Qrf%$1T?rM88nM*~?>P=Cbt483ukE zh(Z*<;0sPppHvw!Qw`@O$GkTHm)>D$3 zgNRbEynES5MQlY^W?zsZa_R$On~6L24(F&ED__hRZ3;{kR^8jSN_-ZSwX@tmpCN+! zkdE5HB?6l~-&$0V6cX*j7Cq`+7F|H+B`@o{vbYqGWaVT%e`76LX}{Lu zw1S)i7$m$NNmRK~!a2Y8?HlUNrv-b7=uo)$<^+#_-SW$WyqD>e6HJ#!Pe??{Sh<-z zIGb58YL40JGDh-v+9iHLpN43NVyhR-*TK9!kr(oWw%ri%nfR%cTj1~`^P+Zme2uUx}CZm=7wXF9qO7a>7 zpFjQOYowBy(}wj4PZX2nOpH5(h9WtJ`0i*h5>ZRJpHZIp(W8E{hpF!hr6vlrC8U)C zp-rP(%D`OW{;N>F!6S>Q!F|EOt-}9;!T%Xb_?A{^w!@eK0}k3fiT^*3C;$IsZx)+= z8JLZ6WI)wL;zzWXQ;!_T1{4u0$igcF^)9+i}H`S%v4EFV2N_$+Lwn} z;Fe-UNS{AvAS5r)%9AY6h2NilyfG^NCHU7lQN02HzVnqnmZVR<+kNDPvAin*gKPDr zI>k$ohtUnE(ESA}mZ72Kk26{K!nBGVKTl28g4h{8X4Hefh-7V7ik_jivb=m1t>5NiniP+cyrkratP(|eZ1*NcpPXO#R- zyYh{O%%`?59I56~$ZTlL$BywOq58i#AEtJs&(y(+caxWI_MaRj(!}lV!pF|oe`!`F zMIl@KXt5d*_dIl=uxkXvwFIllxN}UI_vh%SNe8>Oe)#AE*=;B z?>P%0ie)VZBy619Shx&VFb4z{;;`tTaSVRMV1U>LYW*h;jUxA1@)W zCiS=~;0*ix&MhTjET#%IR2k7jRj8OvJ!nGF_Bu`cE)(UfR65q8zht+u2N9I|{5@;U zv;}a2F)oJN#V^Ot=i5qv)nO{+XPyd=YQ30d8h!x+H1rhc3&JdFQ-Z}}>Y7@G*GI6h zmD-oNsi&6Pc!PA?fbvhH97D)?bWba?+3Nf-DGA(*J#eKl}sWHy`jwqv)kY=@9`bd0!`;xxXhW6 zsp$>D)m}_Uvpi)bK;WCkUmt3#k2ce_krox6nq}odLY;Cig)%4N#rjFz@R@Tjqt3>` zorf!n!+NhUt@GXO{uL^qOO&&q=5Z;|Q1?lg=WqeD6rfomja`1|U|gcPU0un; z_BXp`Wk%=_H(t{%@|55qY-SWfKRJ9ces=aOKjyHwlc=*gVsGp&>WH{R40Bas@$Cl=ra(s{3crd2(r2Z(n+%_#VT}V{fHBUJk?i zSTEDoWw3S+hm_`v`uqNh&}~Eam(k0Arctpb#6!Kwq{8_4Z0#SQG9PBH+T!XVHouZd zl?M0t*UEa<;M;Sg2qpc#@{HJMw#M*!Es>c2)kQ#o1iU+^#6nq)slFEuj$f};jNv_(=#a@@vWw!8;inHxVP zg;^8S<8GVp_ecYCFt*&b)O@_KTd#t!wBOv&uMK%Q!Q2OsSrs6^K2w%0uMA_C0V*Si z!xIr4cLx%qaq&UseJc+PR7(r+5cNK4I$LRouwo?FyFvbb&(?DDh*mo5G;Nl1rHF?u zB@Dpb-p)w*heq&X3*m&fXz zm97c;kKmXmvun#2PN>zSXLL?f@nVN4+_65{mWly4VQ*^Rr$pgqg|IK5zm3eJJc82i zA|Yp~`*>F6&%@l+8fQko&RQEIovFBJq@Uc``Y=T}4Kj<77{sla-DA<206>LPq>Q1xogvHD@2h?On+EJJe1!EUhKhJ+oK5>rfK?*A`i7v^Y@T@1&X_ zOm*e{AD$<=zj>SXr~cibX58icsw|P+=h0tCz3Af&X`=ZBojZ02`4-5R-*G0m>XD6vL50-e@^iP9g#Sx zmepv390d-q4y{au>2y%ou2jDt&BkdcGuq;lJSswAj|oS04uc#Z{zuKDLMzytk#AX&1SSyn%#((8&1ImSCx*11?PMvj-nbMqmJBK7(YPfHK!}spotvt$M)l2ad_ue_-_=15!edjwEKf_3d z90Y;bkXoWY0@<|eBt@?Ef}?Vd3Iw;4*aLUmN#q&*+pT=)Z(`;2;j0~WBmL)TldmZa@u(MT)ej>%p|%gY}=0ac>W*b-a zrZQ%O>fP(8=s;FGdcVmdm;eM$1=Yue1m0=CA-gfN!t_pHON z3|nS~6OG3om*mT2^&EX(k|chZQqK|+U9>b|^nd>RSnN3ah=T;9WV!Ed-0Itd7)!Qp=rVPyNnEdNXP2Nu2oU>H^q^UCoc+OQ`ee!I%X^fF6E z&hCx+B#I3C;Ru&~JX2!xjm8p6(Er_~U&;IAcoD(tH))MkXbPePi-Mc8DNQQwc>5SV z=10#UoX)1ntSrB_ANJ(8O=AWt{`2KyQ117Gu24P`F~(8VtTlcyYJl4yCvRcXxLP?i%do`_4Z5 z?lZ3XnF) z#hD#&o{oMy(@z;Q=zpX8nRZE51LR2lsZ)7Q)twhUDhClEcq}@k=S$U{UufcC6+S;?}QKxiu2 zR{;_$ZO-}ogKiPKEUFd!Ssto2U?h1ci1+QCZu{*Mi&wFny8d628kk%F{ykXq0Gi5P z0#FDJR?YFv|8<=AqqqaiXM^!FH-nrX5=P^gFO=)cnD11Taf~m5Ksh((Yr;Pgiu3Bq z#h$bLV_2NLZn9%+baHSeA6w3z4FOItUBCNWPjVV+R}2%+=Ip{926*^5SX#7LUM_YNPOkrm_#)jC zre`X2;D3~sBR_!T%>XaCAUtVfaFRE`=6|22)A$QkeLNp0HGvO6>vP7~#D~&8ntvhs zw9JF?g0yw?kGToIPyKcP_5rNI%XkWvdE3^5H@Wgc*!e;WbpXwTp)@a9ba5HQ(8(B`M}cL&3zAl(L!VliA2Gyn)BSojxAMW0?#t7H%2OVxr zR3bWw{!VgVzl>7mz%JlhVxQ`@tYA9EAjsbGIou}B;uZw$iqxmeF~Ljf>yAt$@{K5z zu$^q5Bf-v}Zqz1Y$eE4J$c?p?*u`m?-E!oB(?6&|SmpuR{35eNiT695`bAmDYC-zl zUyc3p;Qik_<7%fuRVKcs$jrhU3r{xqln(Z$^BFOw{u_1|E=>`Y+AC)-RoaNn9E6KTy;5HLNn?L4YUryj`3*C^s zUVr{)WRgRkH+9jwP%t*pGpD}IUNqqXhk)*>kJRhDG9vH+$sHvQDb!!pdFRC1H8_50 zbFbE>@trA!JgOiqz6PC@KUbLPPS{*AoikLDO?uh^mGYW;AfTc7(85JPCh%{>Ks@KL zz_UAS>%(!z0%AFHFt|Rw{Z0KQJX+pr04t%MPJll9%@!BE$six>3Y&~0fV!FLqHn6( zmO?pI=F6OoXCpwL&jp$4!;06$OVLa{{`pzF>}o=9hB-Yz{XzjWO!+KGU%EjwTuVZq zRa2Qaliy2J$1t`$e*o`X^5{%nkXT7!ntBu9-({vZ(ye#H@}XqWg%NB2s7y;Cv!lLbok58#^I zRuEx23guVsN?L3tKw&hkTdr`Kp5_kXWzx`C`;+-Qk&u(%4$jf;zIaVkMn{jOAUzK-TgZ!@Cd={iJBBwI!z4`q5DxQj zw%6eCnb)4NsISPdoj(NuRe1R2XxEp4Iv=gW67>n4e>q)a=n9yXx0exQjXx2xH<%{u zW1N|iXX!WbbFiMradPm~&qFx;tv>6xMy8ad6c_$`;w5m9mD3F0#1|$z}z^|*HliXmacj%FWNvqM$UFC1PwpF7`>-$Q{XwR z0=9v=_ghYPFlIWRkI-yJjG$YouI_AUl|#R^T2T^QYf4)Ipi^^cZM?Ij$n0wf+P48D z$ioj7v~QmRKf;iPp#8Tt1M;h@&&^9jKaqaN`HYB&*sP}w_w5rY1@0%(kY9$7;Xfe! z^ZNh33Dr9;-vuP*Z<&<+y8F4K{|n`hA_d1!+roI#bEV&@1POy_FEqrJoV12=zGHa+ftMxZT7k_Lq$Gkt&vm*(kQp+S8WKDT#iJNnZ6~WFH zxCh%#vVLr#eWc}+y?hBZ;AP|MjfsN8sM#g!tiuk~Xqb)>aUj&u>6otzZ(aL4T+4Hu z6Sy*18;pkGMBo@k z%WvSDXl!Uuk4feDgz0tHz5d^@yN)}C4efxc7=Hmp=1tM#H9JkFAPmAVuMj*YRU^U1 zIJ?29+ObJ88+Fy^4?W_a@my4f86R|WdD-Ax#PsbMic3v-d21-p&vz#-oXpB5q|bv| zOp|!J^9&1#WlG)`d8}Y3OppQ=e0542YIui2yfm@4TVYN##P?FZsOaVl=U_YOVW3gq zu5ln5DVYBO(W=bG>EYjFiu*pwGqp&L4(V_atqN~^J)w1}4-di|t%%afvg>{-z*Rng zbKz-&^5NDGWCo`LDmDKG2C<`Yk;e47@DDwcO`+q9Ggp0W(iOu?=#yFff=;6BUj&LA z)~pR#OF;Sp{38C=6Gp(3_K5BCNfqS&g~9xqji5M$f+}>*|L{*j+ZyJlAQH_!`V$p= z8z0Dak9Qc(3fOgKHqZqIQibElAFHC*uw|CqmtvhAFsy<{mr@*%eWUdU8u$5YbyZoY zEfQLq>+W_Lh7G%WbQ!ehm$=As#UC0u2`#ZKswtXuC5c$W$D%omkD0#->-2^Fr99=& z=pONqsI~BJUUU+yV>r^(*OQfqz@%h|$6-rhZ^_5bTcy=Z9f^#yJzZ^&sjIVzu(1^N zpfF(mQtRbLyih@>RIk62JjYYLI9;6@Wz5C}LVuS?&}cb4^_{J?m8UM~lXA4hud&T> z>F~AQZz-G{?@9m$+M)K>3KWak4Fs5u<(06OrWsQuHUdz2k6252>P(r;b*$>O!z&_x z3nz8|25C~5xBp8i63pwUt`6A3f&y4 zs_;|P!huW}5Xa>*EsU2p5p_dben(HbqQUFz@Z#w*8bwT5v#HsNThsBBEJfH3Zhav9 z;>+woVHW+@2TcBt{l6NES<#?qV0OK>n-W>@Tt_xWdw2A*yp6TW>CHExA-GKzeAYT3 z0+pBcLpNn$$#5%tl7`7C% zYG+~)BRzJy`Rg#5yOlL8>B%LHT{u)p2<)HylSQ^8zy^h|!S1mfPSl*r;WZ%3>?&ptE^lFxZasJFZ*;`TQw00Ey!G#6g|H2)k5b{&O1SR70r}y0xDwgEKNDC(m3CR>h)CK~!K2xh#AP;1~EE0V__(FuPLLleod7Ve{pIq-ptLJ%9^# z7DN{kn&*<3KlNxJdPOdih8zj_o-m}l^?oxJ_PNR~EqL_`%-_K^{u{d^3D#6sWl+F| z(TLq#KHMM#lxlw-cNT+WX!GB2kMB(5)202gb9tw?yT6KeybsmA^OJH}3c{wlr4^Gf_2L z=jV%7fpT2xDd9GsgbYPE_X1@|(Pt1dgBI-+@Sz(2bhBwc3;#YtTcn$(>(GKE^cR0j zfc$t%reLe9$t^RA^CN?!g}{{(a<033=kZe9nc;9TfQL1n=OMj1Q)~Z^5r0K6#TReq8`~>)6#Ie!rn*{tnu*W-{WgVf}l~DL^3HkFE)}>P~%Us6d zM}Cr3x6dic3h?Rr8ZvpYWF5DR&T-Z>4+6beb!R>qHYs+Mo`aFq11!{>b)&@>(25#S z#V{SJsz4JufRY+ic145T(>f=JGN&hPWw@qyww3)A?Ax$T5L8v_ zMu))re9hBlNj%Ek^Bsz!&ly1HRYU)TbXIcpL>K)8#9)*w*ZorARtcCpfnKM5(vEUp zh#WT`EQ9Xw4n9GQB_XsU8M^pc(V*)F?TzR4#J6kkOdblHBx5&WPj?{7yUgY=1|Kbb zZmQizolsX;M8`n#=F?!*&IS#r1>1uOv*RZ(Tu58?8T1U z1QGo7sy5vLn_bi$Z&2;{c?X_%(yto#5NRVqPmVuX=2%5$=vkusMU~2d!G>$Ill^)g zO?)K~5hCl-yxI!$(o>o6d8DoF4^!087uyi`254ya6f+o4mlju{-EHtzW)p=5h3)SK z>#?0_!2R-RU7@oDK?TR7sieB=uL;0lEVuKAJa4RZcNUmw0%->PEO5n0v?f-L5*6Qn zjiGd5Rf3V>)IxLK{APo9tyZyYsZ+*a3xR533)GRknFgec3zFPj>AJ-w`%1HwUzUC5 zKJV51JXs>>>3Ghg6R8S2a47zDq70xE5kD*Vz}H_Degi2-K-ySpK}2*vW@#qsQX(*6ukEfHDvYksKg4XW7QuyC5r@P})SLGHzCYwYx$!rX? z2KQKmJr+2T&o1u2y#9@JhgoL`gOr5DqN&LrsO~i+*#uODWk3iOX<=ubvW`x0NAdCl zK6uWS`u$gSDIIn#p#_B%{;?*7x4O5chXRyuH&JA*U;b*enK)K4U`0LUe z{5h#6K_rBmk16}W1isIM(wr=55qReoP&Egi87y2swWg-!Hn@L79$jcD1g$Fm2K{Pb zj7bx*nLv^ve^=Fp$GnI`xcMcdc)sV_f;{k8ijYIQYs!cwkW?B7g%|;(N9M6x6c~#R z%Fb}6cmJ(Dk-k`^37qam3-kJ+ffD9U4BAdEtfudIE8zKJ=O2+2;Bv~JP=lY6eKO3y zP4PHA&Y*Ca%q$Jyjz!UEwO*Updy*TmsRLNR8x-i{5b;z)160ATqH%T6c(s9``Q69Ny{HZD4R|iphVP(TmvgOqI!MWdECu&0ero$S+1NE;3 z$qasjh+@jxs2}wtNDl(1KN56-2ZNIuEKLtJ5hH6`zCPlZL|E^l)(pgwNx;6&i6YuE zHrYr%DD8cO>wos^t|{S79NocwuBBxyw41c0L=zdjRugY=jQni^sS*kvdq@+}!=oN` z6JRVyGw^W@3j2Ga`B~^T-o2KXcHrQH*-{76S&()6$6#W~L9N!fXszKwAm|AEOGkc8o>@>HU-efRKwK3T`M@=g{ z#H3w2_>O=-Lx%KpI@Qw#FdHrv#~@RLMD8j}3~ZE%r)Cc6LV)LK}6Cl;1+{MZDROv)$`8 z2><^2Pk;*Td~I==wSOmFh{Et*uo3LuK2aeA2+XN!*4`^45Tl8|{Pd)vq()z~O%P4p zTkZPJ^)3{WmZd8s;Ei#D!*%c>ruM@g^ZPD)Nc+4fgK>+b`CCKoQ3O zKi~-ZC3E#G1?Wl@g7&^s_%EUg3WU(9|0YoMKTs3P+UdualQJs)CsHorMIBoudkv+S ze55xkiien-t~p6#gpLm6k~zAij%zQj^P2BKAF3yRU5Q({cQd#0&rR2>$;(MRSOc~U z=8L-$?`;Ov2SYiw8;k>j8oo(;GNh4~#Bgd!5aV5L*YV7{F1O|gmbP4S zBu?W9ej2XB1*9NnA5VHDc7V_ZW$4QmT(KH})QoYHCfbs-&IP(C(l;J|{>lHPFQ0Z* z^ds!9%@yu4p{-BjQN8oh48Lv=+y7SJ{BHl}-SOwHftvH%pCIt=r{C|B-_6I<7w-QQ zl91hjCY3!~ke}VB1)2PhDpxt-VCJ!;LhAB0j}P7OL1*(k_=q8pm>P5Y&>sYJKZoyw zS7!`>ZO+)2hPUn|%%JXlX(7KWFZB+Cz=IE#l(;;xwN3{&rv6od#uPgi!AFx`kg|ml zV(@o3qf7NJRPl@K-QD@vWv00(NuOelYv`SBBh9p?+r~J&gyt*m6NG@utv1<+IC zh_FhaJ zDhqK)>t{{W^A=e$DPF`<>mL1X-Q!2GO-tl0c0njCG$C&k z2~%IQt#v5paa@}PgUWR7FO9#=I?u=Zo+-9Cs@+`u!DAS&`7YVb<0Z zwW^VF3kmwZ6AjjUTq>!~h;$%26wV$Dg!vqCdg`;r*t)FsFhL#G43CVzlhb};J39Uj z7zQ-%0UlD)ug*z>;acF4iMQo9)XqC+j0bPdh2NQ$1&uGiKIEYfpCbbyB~S?jh6F9g z;2t90&%;J9cSpD{+Hj(8bPZl=3|wj|feS}opjSet;n$-e^m9>2f8i9>uKwC*Zbkm@ zs1IQX53P`)_hL6)dnAb;J>GU<&(VY7TXy|k%bt~c!{7DJ{p@DY9`Zzk*T~J!U2`A9 z7oq>%P%7u5)#IFG?M?aQ8AHC=V?zUt@2EA3e@$#TGT`}U$2*(Q{HWIdkv-+ zOJaLoTTj(qohQRubX-3rwD~^hDzxAeUTi3@#1^jA(w__0L_O^7F2SBZ&@*}2@twT& z{supr%watEzob4G5e+S(q(s%XC9Xf9;O`N02rPFL_3_tF>MCsh8gy@+bRBv19DW7O zUqN=vg`2|U&DGlBM+~G^J^JpgR+)3m8tS$SB3%Pdpyj>d_Tv}uQD+t)O zem;YI{z#05G;u)H7QV*6(P8S)@*Me`@cf!y^)VO-+?6Nhf*hG4&xaHhp+^6k44>;? zka_olTHh~GNmc<*gOG_9K#p%r8suYdb_g+R9eLG0@h&DaAKy4G={}G!HLRU$^nZDH zBzYd30n8E{@#}t}}b9LPF;|Rh-SRM4tro2Yy(pD@9+mPU1 z!~`M}<1mzSToFb#m~E6;RkX&`VPTLb!}dbP^_U6efPvzEkefC_=oUG*E9wGtvttr( zcf2vBprPo8*Zd4UfP7cit3b`dsvY=wkoVe*VBpgft=FB$X+uSyP+N3_zE{Bm$?zBm z#L5i#VsdWjRd_x7>6;HU6Ff>4vcddUnuM|%BE!jYr;x@n1xZlfbb%$!cPjbY<+V6p zPr_+L%UqJ3?>gXk=(C%GtY)NN<$)-|Q8NJ{~8`Q!|)^rIdfp@$>bm6GDo+t7xuUX0uV_OKsh;dGaaH(5ew0naf3@rQ%v7-tWa ze5p*jAM$GI{7)hd8LTNVOW+@?^iU=Y=DH0bwaDV;yZ`y!E1jK&Mgp+h-vBhfHzR~1 zPUuvWQhgyIEs!hUT6&KVIq6YL9E64JVj|A(-&2Dk3au)juvT2U3~qqzUlb zR(>^fkwHESyt!YnE=IZUY)y=`k-*5w+~?d{`@)A!0+0+(QNx2akUs#~9d{4MEqz`N+ zIC_6#4fThCizeUh3{z+$Gk8tZ6!{g*BqH2BUC{ZVLSh(HlymLf(rGpgoqS;jGU5L&o4!~$T2KEktU_$MyH<5_ z)6fBrIj{;751OEs=V{csEK5kY!%Hwhl&odz`xSzU$2S*|-KkZdvhS3nW^{gh2)-m< zzVwqqsdAz8NeDLFtT!cRz-V`;BVD^vS~$xNn!9y#jaAi(xoz#)UEiFWMsQ90g0$8Q z{c@WrTm>Rgsj$J&D6^|w)!*?>;l6T<91Dl!Z>HB#?YC(f;hNMSn=WUgpQb$~bu@=b z?X4N7i5r391sotRu2-ds$rCOEJ(bkBU$6GG;IleepFi0u(flG1A%rh=poou1@zCX2 z&g-ls3NQV!2R}ATf8;H3Spd)AhwH6kG!KuSw)EYZN!o*_|DR1JXz?q5u@^Vyw)evB z04rHQZB-m&_6Taq<>|_+Dn@rms(U%3w zFHK}Q+_pZRQFP~(%+i(d!Q+B4t&h>&>L(s{BTPFK_!SOL_dT?gr73d|Z(; z%@v9q-mn^B^1I77Gs~}46oB@JVwe$jv)Q0L3i4zNkCfkK(_^D!#@o!kNNs+Z! zeR`O}n2fS-<6_5vvo-v@c#Yui!{2sK0t#sexeaw99 zb77cg)Ra4zn969=3O=$m7pRX|eQ^+`|7jl08CZq>MhFB5qBMmjGVFY9yXwm|V{ z)#1isK+sU2%c1x!BKwuBu3pF9xm_m|LRJ(#pDi!|{vkcgZl9zL*M`m`1|Tq&ts`i? zE%rC`KsXg!F+9h7+xmaeyu3J;<6K^t`97`>4ynWQo}}t~)?n;|ZaiOOLi@mJI)3Z_ zYF<4kR2#X9VG`w+4Z)E$blr;#*$y(a^H3%f59`SPYF%AcZN2Ask>5Dy4Kx(xTu0%5 z>C5+95h0-?5v?#Vzqh8|wh+)xf0J6OjT#$#VGf+hCJdEHulw`s*v~|>@y5)21G6-C zE#B8|YQlM%E+#=ZwV;6I1=PEmow|~mBb|3G!qD4RCME(Qvj2;})ObJsxX(u;j3;4A4_yxqrWD9kffloH??K*FLaalL z?Y0bvR1hluD^4y}V(L$t%)}BaZ|F(j?|Fw~@>5Ne`8DlT$lTn@e`S;u9xZ3;78W>K zWSa$y>~WD8s9X_OM@9JPn>l_XbFW<1_s0=*QuXmeGh`P2EmLKWhS*1C>`R)5Ch9Xy zZOZ^v7|s*~gfj<&7Q~$WW{g22#?xHyCz)rw0-&C+5K<+ zWMX|1NSdDYJhnhoJu^Sb5K}eqcRW(PFz7!POm_6)sL!#Nr0n8}$5SB_VKxKp6tmag z&bDb?@AwgxMyJ22CN{ayZ|L@)&+-jOE?i#ko~4J+CzmgG7|~N^jHDH1rY#PnRzdhO z5giWVYt1sRwr9w?4%c;^hPXr%iSc~TBF#KDSt5wFZg^I=U~fF8QoBC>bdqRXVddA? z@^bvvo7RhmUurUK*+4ZJ=+v-+;+1+b@&MM1bQ=TnbO`@COH`BH+`s#x-5VJe8UHgs zKL$qLBicl6VQj)+dkRMbcDbyA$2ET&hG=k;uC!ATh+z&Ku!+fv@68B5QL);TIj%`j ze(bPF9^Q`6zl@WX_tlHBXTf;RBy=CY2e$Xe_bZJJE&fc)o+L>HldzYLW%Q8a6&*H( zV?1G+bcNgA*7N2RiLB&N>`M7P@&g^rt zJ0r*DG@BonDJ%Wq?@jc>x^_26PJme)J$k*ax1rqG|X z%=H#G%>ye3`{adjIo)rc^TH%C(GL@z754fy+U$8ls5;k%4)<)!l>{4hp*Fo%VMaY4 zS_U>dI-XXxA+~dZpba8{@WYbSiwYmfpp>{-xaXc~5f58{;<) z(O2R5!HW|*v)y+{WvAqkxhyt)-#kV<_g&MmEvlJQ>?ChU+9(!QgK#{sIW8VG+T=LN z$WyjddEoZE7zG&25|9+_vo-{94m;zd=3XB*fef4j%j9y^?NW%~^2g=eZi z9;Z9Ilk$!`o{-W|5uT3tDkYN^D?=j4W<*tf$5k`jm%Tb|s_!!^`_^>waNaN)`4^#3 znVJ6n;|S6UU|hvcMX9nyuO!9E2W|n|q+EFu4L_dc8lUR6ol)t@)@M|C)8o@8*&A*N zjG?pXu~k4oKJV&s!~3UwgH%N;77X;{uDQVj_V>D>Vs@2ktGR`yfxqR-(lFW2=a#K! z4TGcjsk&QQk?lz$z!dzAzadTB2K>5ANuSKk_f1B;VmyR-<)2b1Rq5#Rg=GwP3bn@k z*9N0YxLSgWp~E_9)|U*Z8UNv4Z07%HQ~xrle?2CV|IQ{9lAD_wH$O5&ifYwVR8;Ib zn_+{%FVb!ZdkgvR^s}IG?cG1vTe-m&#_7u{-2hQdp(tr^ceAN_!uru349Ox)dX#ho z^J8Bxxzwvi`*&1->fA{FDgFpA&|@{cTeV42*gPgZE-Z?G(k(!iGdv6W#iQ9077+zR7;!KyZ)tI7>u=kNxAu$0>K9PPV>c_jd z_vp)mnIWPN3IkcWSA>HDc;$z%ipaqA-CnW{4JI4yI@X0hXhL304_o}AT~0q9_Qb>e zuRdLIEsccS52N!M<4-8Ai#F=l3yf5|m;|jrffIQqYBPV&sI?$tc|R4gDwL6IqqOir z)~8`CjPS>{3fEIfXRn(Fam`9DLN`PJTt+X2^(fTP0(6zfjp67xMwI>`)Xy5e%xblkNAw6MjF*m3ydwWJ3Sdjm`(yf+!THO8C#+g> zt&!6ABWQW;{}C#nVeIVeII1{$6{+_vSeMO{?YZ$a#JmAp#n)Hl^(3HCUHn8mA!9ZY(1zfgdFY`~)d+m70iA zwYJ_-4JMfyX&emcW&cDp!y>JIU@3{>b8c$Vc=q@D7y>9&iY7+1+S8dv`uR2o_Fn_^8bW z@2{S@U1%T8Nlq-(wwH3mwOwa>@^hC5BgF}5cwme$XMPAY1-CP6KgsJb(Hqe9EYnB# zxusWTPAS?f(5$9U1T8Y`Rd0Ngebe5t|0Yn2Q*ANaDll>^+b5X%!2zU9SevIaq}hrb zr9x`k^VzgI&9~#eIQjhR>yw@4hSqS(A*v>S@mo+5?##vt=2jtoO3bkA`#GJHf_%@Z z+RAEnXXDh!kX~JgUHBQz164d%bF|%)Id51%8FT_ZuKx*#@9%v=JW^=?w?)fSy# zCZ+bm5{`ZouRG^xTYnIu;kBmqFMr7rjwCg+X9=(WT)X*0**-0Hf4(20r787bc#)d$ zCK6~vw`K>ql~q0t4RnsL;u5r_@La(AbbEh+?0I(!fzhTa%Cha|s4r{>MF~E1sdpEB ziNcGz?_SWE8X%*NatzKBN`0vq^5$(TpX#(|17^|(RaU&ro1gyV5hwX2>=)exG^5$# z14ymPiY!V9E=w=Z9$G?NxPRH)8P>nmEx2+I;=mHVkwy=nJp2OEBDib`t05k0Zj2UG zqR7TEJ03g-8<|<;q8hn3{jqKn{DVDNAx|6L z2oKouZZ}c<3G83b5kx`8KXhgx0#;JsRr}?tR2mYW*~Pxm6$)Q9UF4f09Qnbo8$Xn& zo#@-nZV1h5HNGdR4eahK+^n`i`Un*n(Pw5AjST1a7`?+*v#YD_2dsyprbg2*Zm0!zFgWNoi=Gig@zm_~#|H6VnwyNio?g}@3~ zl@dKiXQHEglVBtRy~-;8UR_|QOV*hV*g|)b%eX)eBEeUPMg*4`GuX9!O-)`)y{xJ+Z-Ho^Gm}NjUZil`UR4J4fC)Jzy*dtpX|?k+*NM%a@~)_9H*{`N^w+> z+NH%^0_a!xQa07{ZyQHT& zpptHD!5EYfWV~a3A0$ZU*gPj))^6WiFYhKXNiOCJ*Ldb-cU1fkxdZ-FfU-v_Rx>l> z(KU>YC0%=;mZO0g`;L71u9q<{Je->Z7am#QGX~AVHD8)308OK>po`xYjpXPA&pI zM5Ar0zwF|tk7usLUXB#g)8u%sKiH0R9*pd6I!a808-pn`@3;HD@2pwiTU@#>3NFQ% zX=%f)A8iVb%RO_#yHmS9rp{YWLj|e6mmInfNHcF`NZ2uMS!nAjpmU8yMA979g!Pd!o@2&^pQgj%#s?p?jY?a7g~;^ImUMDKqlW?srnjRWwFAdUPBgRKfF z01ICBb5@rfa6M;f8BLVZ*I460TSI;@^hn*;*=D^G4%0X5bw@tL{H93{lLuV}P=vO~ zJ}7aAnX|H7gi`%*i+@XFeM;u+j%qfT2?Sjt(3??m;=)Uy3;UzjlaP$m!L|f^bF2k^ zcD`Pw?ze$dH+Z`ts?CANik|Qpi z7FoC55v@JB6v&)ohqiXHfG<^tQ%wzkXuYk5J6}3`WM&I|UphOKPHwt*7)C&2U&H#) zFxz1=Avb~H|KPq(^S?EiIMy9e-SxEYC4)qprfHwof*|BW6G_aW(D(~RSQ*X|(}H15 zFs=b*QL$gZ$po5g(+?IX#c+y0haM3O)}7sE`Ns}?xILeMqY4Mv&(XcquAlH_+G~z} za&JjzhZPKLR7OdY7BY^)yC$rx;Gc4iw|XD}rcOwBx=AIvVhaXhFOJml@A2XD3|lK~ z4$3`XpCymRH1$FRhh%3KWK-hqQr(lc_yxVTawAvZ9rg3?oe40}FeV?j321~)yPSU` z{_bsiBR=iqdm;?VMx5naFpnq%l>e%qh^i zvqok^{nUL^v-SmQ4NrXT>t>rJb*$IHa$Mwgr+l?^dV&j$-1(YIW8Al$3uzd6Q7%;W z0>hHxwMZfoPLuV>I|2m}PSm5=g|QeHVtuLOR6Q$>0<%oXuc8t(4bz*5LEJYq)xP`Lp-^O2SDxoixU0|g z%7rXbl*Vaqao?{-!TNa-D19%-9WfN}5X|d$))fB_!fYleFp5e{S-%g{LWAO7+6mP) z28Nx96r##IZ#ttcx-*qhC~;jWQ!#C}QDzcL>I+nFkPNGGgIh4Q0ZMVgqFIO&3XQdF z!1B81p8Z146KQ9KBbs39_yk9pkPn9?zmvZxy$lkWwyPbuxmam#YW08>vd{=8*Y;?H zsU3}on#Mnc0DTKPZ#sZ7e3PVXWk7%=JM}v{4!)J$s z{&}Ga)QkVslOdu2|Dq&zJs3YF3u(b&>}ylsrq3{U3B&v9{%Gp@`n}!ztgb;%>r;7A z-ibV+#Tn1ZbJ9F%;?sB(GfT@;LL(D>%jX53aoef?>JmM03upH`*94g{YmP-@!!Ef^ zb{;?@qGV!csytNoUQ4L2I{cn+btj23c>I-dGHbzkip`b91Url7vb)0E@h$DI5qsjv z=E{6*C56|vkgfMC;xE!1?60E})_D@~3F-Z0x!Lsr717a>iNg(LvVOZfHj-|BU~QA( ztcgZu{9KCG*~kWoa6hB($hNXgY-BN6QujH zQY&e&Q+0PGXPv3N?XA3dk535f9u5kSFm?^jG{*)$4X62AA9sf6oXTPBM^ONHvu( zG>IuHZqY^ycKSKmKz1{&%}Spt*4!40v9ZZdFH1Hl1asBgmMSA*6$!1FI7W-Ar0fpA zZ%I*d771w>+&6)=VMM$n&W|D3qnn}~?9t7%yHXk1-*t!NGK5O`>i}v81#OapZ*p3@ zmpP);6BS>IW3@(gmd!W3pTCBWR%%*{AfS-jOCw6LEB_?GJ@j^+l+l1o-l(ejv(SCp z;H>;NgKb3qiD&C~3|oy=wDixg4N5Dh|C4I;Z?=$26jM|Mj-94La)YnG*uS1sx!Bm7 z#e{3)Xv3XMi_OyL=m%7Ua88+IW2*fIK-#8%aC?wR7DK&Z0qOv$6Ge;+>AnW*_(H7m zr`&{L_+G@JQT`v0HFsHAt>IY_hQQazNAxRMX?ZfHu|dS|dd{eg1q>c!R(PLC`+xxp z+Hn5`!~D0ZLH~9Z;Q#!--1gpFdhC0(Tl4%&au9wac9A%GUZWVBPzIO1hcQ8iLMP;e znajU6T{$f6w|T}QNqj^`yt#e#lZhM!<~Ui^r6j%8=Y$xgMRaZnXQCmufpN2DZ{BJl zf%PlaO<{Aa5IkJ$A@|M6!Cpx&&?3lp0+jUt@)gRxnclq``VaqUZ~Q;G&%e6W*MB$= z#3q0MsQ;@b+O-Pxmc^MaX*#NG$)Rq+rkDr<=hU4$d2!qGv@maIlB8@YJ=v%+9>G6~Dr29wwZmFCiSpnLxm6R!tb>4&k4W`=^eVcQAj`tiL;Ey!z zpi?ZS9Qk{AE)EYz$p&EV@S&G@^IG2h&!P)GqB4H{!2Mk{O(FX3Mss#8)JDRfmB@1j z^W{_CJz_^Mf6IKVlW=ub`yDn0CPjNzqs}I3q7y>Gm+}C!=g-Tpr`}wX5e{goYO(-h zCp}B^5)jc+Z0i(u(eQQ+7odB()-Gf4mU6WY9=%Xo5V?eLPk=pk&B5)^w9D5+AiHdP zsi}XDna2F#4c1maG3+Rk-=OyoPqvhoItO>@`gocgVry^g-OY!}+zWG$1M6$^0eGc+ zD{+wRo_TR%{U$6a;sF2nEz-P`>S~=W^2T3cARnGpT6#aKT|$aCpJ03)s4e_jM?eGp>^b{k6aZsTHrabZ73)({CO_3sl%k`(kG#m zll*&XKz)MUR? zsgFXFbG#J1-*Y;Ps4nZyP6T>=Bt6NzDZO&u57R(B`><{~!4;A|=NEX}5uTQfAO$(p zMJD;v3=rN1H)JgO?!?X=H3hIBdd^S%U_pA|=h|-J&f77W=*#_X_#Bh>QUle8JL$>8 zXL12_K}>=A2mNt`d9Ti_+`WiHuQ+PJXvB$(fnqrSp%yo5_bfeGkIQ7OS57iB%Ki86 z-#xuME+v5#O@8>t0{-5t-I(a;+by7BZUF->8X=s{_X3N?nrLJLpVN^lcfkur<_>tD zgEKxSajo*9+(W6uBl_J18@a`X=B=meWD18jL2x?=Mkn~hZ#x8(m@bv0kKW1l7DwMy zNqd_;DTXOXKw$qMY~Za=bTA&=O;490=)JzNoD-6Rp7VSG-27*7DbM{;s*|iClCu_S zGRX8j{(_KYddGy^c>x^~)i;FCz^#eq9gpCSRSuy|`f9NtklJWyz;iS4jKx$6AKp(Q zp7j0I#y?+|p)!)y*qvz>s&%UlD$cN84awYw)!i9ndI^vHJ?>l?GOz1$V0M2#F?h8b z-9$*+9@xlwi9lPBo;0;-zy8%OHhKLEm6;*CFhKPG(Ds%=aeZIDK7;_lgS#ZSyL)hV zhv4q+?j9V1ySux)yVJP4LnF7pzqxa3rmp;@?!4~mKHcY3@4f2T&sv{~>2u5+^SrF4 zYQMfcfhXlddg;zx*594BOt!Le!p__j6VwC9R#aUYqi;_jtS7TGh&*&AsVko`h?cyd z@8iMBy`|MNb14H5U1LGuN2~J*h2R=w-dn^TU@u{y-h{5v`%N|U9sjJzi{`bElnxf~ z)TQ%x9Chxapz_4rs_LcU+nWn2EU~qwe7qFO_Ftww)L`;b85sLYRD{LKAY|m)ycN~+Z@I*C?ge8dkTd&)0ShoTwV7+2Xe+YpAk+E$4UFR+r2gRzb zt-J5CK!Bj9ymSGrhO`C8HqE|kdUZ!?rYF4d4YZa9&m5jMwg0;4C?9kXmIMLTl_@SU z1k_M|j-#C}Bfkg-17F7Aq1@6RdtE#FhDUxvK?r!H`{L^CW!L_m+eL^e~T9;|_2s|yx0pcIE2Vj?hvDaUjEV9cA6Bov}p zlH$ej{p#=sbL!0oJQ=^uW4#(g5qGD5!;s26)d~EV2yu9V34{Baks{%BLR#j;{YUT@ zn`j)hz_rEPUrlQMC`poQfilfXuLW_eu29&cMPN^VtKE+i{r5jT!V~Gh=ZaTrRKX>6 zU-=EJ+O)$-jdNCk*L0jKw=W^7;#y7NGPcz>eD~=L-?t{UWSw}!wjB@(rgJ(=!i}O> zl&*^qfR{hd)5xS#FnCiX#}?1WjrT^7{7|A-0q<~FdHIYXXoma-d#UM5r7uu2KU|E~ zRIKd<%OP?cQ>s2}=j2g~)^khBzN{;b5aa@kFDKqI4*_2NC76l+2} zL5>|ni_SvE?2iEfH6d??M#4J9y^yM>hjuSt9fXTJyhGpJp6iu#8m|qo^e+`|zn{7^ zf;OUl2=p*Kd=u*xl`%#~+`U0%!jWg(TA$#u;TDTA^OL;SS#~g?O<)gnG4jYE9Qr;} zPF-V8TTM7a3C)V>Pp8B9)k3jtGprWEZY})IBW6*St1J_|gj;F(Ey4!j2#Gf1kLpBO zGp$)O36r~`0no+c!-yqA7D4{l!!sweDd485$}eUwln8fSyPb07DBwHuXe`T}osH?M zS}@u>dW$vK=+o~H^w7=qG)pGz*_ca{#VAIe$81Jnk-_gNECCaPkp5Sl^Mm^xntu6G zOd9lsEV=-BmBjBo-WbbiVCEgiz$+v~Z9zoXn|8vKqyYDM4R>*|PWI>!uemSsv~e3OxfjC`uz8{x=&5h zk+2@0)D(`t&WDW_Y;29ebm}kec((4EuwM87I@#B8DQyDKGzSK_bE36^T45oFp3I}2 zn!YUC)M4)L^$6TLDus1#RFv$MJ)jc+x#oPi!<|Fv^g62fN0I&>AJ-|~KjMkLv8C^l zJpl@5g`nb(XslW{g8qiRL*@_`6b$I0D5khR5;%clXi|2Vx~cSyECw zj^90RknQ#?-@d)xY#L2#4i{dIABx61RHbgnsNT!o6hw!=n-2|rk~m%Km=>my2^JDR z7}ueP$oQ>3As?&e(M6`8mlf60o#U;BU zd6L~!=kwx_T)jTl#hX6Tq9w@xcRFLR-CSL)%$8v6mJO+qWME<@Zp(*rT*J*I8 z=1}ISahgTzRS5R!X^SN7k`$$LVlx+FKDQZnQn^3H+-~2Jw6YZg21bg z&zoP{$jEIz?rC#7TrrEkLF+UzIfNrQJ`kATcCIFbm5?^tG8>EhbS8oJ;kkBz*c-#{ z4BvvgYzleq67YI0Zfp0FlIusWb#Fq2^9tE>qpe4T>-H}*$NaH3B|=>xV`tXIq=R#y z5w8+~GDiz*sD1x%oM(acmw>NZ{#LT9>Eqa7;JBGR=z? zw?p#yQU?!_wnz;1XmmDR5HI=tI;TebK!woFqY{=gSTKW74yuDfNB&n@;lj^9;FX#Q z&fck`{V+)EFU%Kvk2vLpN*NF8^DJ)`c&(sTSo$ZzOzR?@nyKcBGujVatcQF^X-@~d zazwM#5IPgbEh7V4zuUcMmh{=_eR(fQQhQl~^dquYcQfjeuAIe2xr)!Jm1CcKX0SGV zPjo-g8{DnK++2FdS)K*?u<(W?tyLECqycVjB_;D6zecD+Ih;tqdT%^5ngv*)lv>;G zGcZJv!4>kjsEo-9pRuoWaMS<_s|4f#roSbd$9OcHeutp<@N`xezyI6_s)bmY=CIF# z?v~>%u1QD|MUXWhXD%bdfXEqTXvBPFd@b$19uazz* z6C&pDu00n2yG0&ogG+lXre5X)zB4-Im;fx8EaBwaNSKJZ3;AX!M0c}wq~GdgV)xgS z4>|$F9t{lf6j-n~LJ5r44TjB{9H17boTPODQmlrQJ;;i!9Wdorq#L#JR#=@0Vb{>z zGFU6$rQYclqv7n>ZM#5=n8GLieic3kKZs(#b%WOC&`*^H+lbD+bKvlNa}q?(&54XM z-)0-IgA10DnmQdnV541*1r`wxux7ViY*L}7$GwYCGCLbBxqb7cNM@op4_m^=nRi?a z3MUlMr7hx!#a{hQEJe6(sZ&BU=S16!?m$qc5g)Tw=BhrDwM7=fX~+&PNB8=%)ZB$8 z8|oufs=7{I8Pe%$Oz9FWigrs0Cz+mlM&TGZ-bH(4tOCAji=p6LOGw|9cuVq{lUX4BO){=e)}Iw<<&LI=sIKoqHz2#wM{UtL|7b}ii~eH6z1~bHELI3G zWEUUSmGYr5QBft^D>^W+oI9+n1#W&hw_!k#{=wH1t!_U8-1*Bm<|MRWwx0f~_w$D8 zSoJDYE!sRAvt}LgUfe{$DG>6gVqbc+2vkW7oIg~w3Vs0>b4=rF2Wv-}j&d&7g8w>0 zgw#cuGb2ZE3`n%whMytDaYV|QhUWuHu`Vutd?wP~`z)AC1NPr9qphDWh=pzif`R zX>h9Ju+W@|-KKTAanmWI%xAad9h!S`r@s+%h5UeXy9>u9y~5k?ZiA03isv|DC1ZcF zV*nF^O%O|!pVH8H`Gq*hzd%11iy8lJ0glL?O5htYaCVBOcI+`LA)&zdV!JQDqW6n? zvo-x3o4WQCv}+_twEn~}Q{S5U;VFpN3dGgUS%+r0WrS$C*q=_9>5U5k7aeZ&#is8v z78L8{aq|0Q4?VlLC5RoKe{Fxqw0DO+8>&wZsA?~HDThd@meGioCAD1rqKGR;ebE$` zo~FZJ2adn2Yt13+X&TGoT&fG;p^Jq?Ah?XVn8#xFW1zr`zO4lG=LR zw#{2|>hLqKhqe1npjf=~yc9EDsTaj&*pgU)xKq&8^>$kkvv%Z&#aIZ|ewiM;c$^u$ z=%Ec#{i0aijUM2>Z)~VK`0xg7P$eyNIAA>4zTYSgA7IH}9CBurO*EoVmkD9gxooT1T^5Luc7%m>n~e7ptT)8|_aig>1oJ(l#$2}Y_z2hq> zPuz{APx#7Fb};;9g|j5yw}=1>2INmY{?~V|qy~Qzaj&1jj0tn%@SowoADuB(4pjLkdAVYUmtHSB zzh@3W6&LW;rB32bR*X{@j9BzLfT0D350FDK9InEnKse(;*!2SKb;L~*lVyQ8^UHm* zv$6yO9wXCG#TV*w%b(Q-3p48^2m;0GWJgy-T(;^45B;qSI;u39FNQ989tzM=VBE@-9;>;LcK`W!`q#`h zocyd4|NfI9`;9-@ZoectP{nTzeLnF0gnyqh%Y&zBvM4!@{T~}`&h)?1M*B6D&Ht(N zulZfLa0m$2F0CCP0Q+yFyZ)~#aA3cDPQSiYWC8{oLQQLnZ@Q7tzDSy3g=qDMpW}=5 z5eH%=u0S*n;-7LnRq1U}<5HBXh^t>?TZ#s;wu@M=Vr;qFk{2E%WxVOi_q8EA`=a#E zlQWbQJK&QqOKvnKcHS}FEhX-fM3fR*8ULC#S>jh2N_K6oas2#ICYLFq2JaLZVR#)@ zjGev_NIfME=x=~7W}Wo)y~G+n#AgmS*WLvoGQqjn3xksl5v$S}`4GC{S53w3BrR@) zqwk%vh9IoW>?r7o(fX>wt1)$ddiI4Wc&X~38F30Ia)aZ&M z-iJ9^A$;eJq;OP6>V^EOMqiCGw2OOG13yuVIQ%rd=bF(Fdx82NHc7M9vwn4vTTzFz z>>iwc754KoPC*(gMHq?yb1Kp&FOl}jMt^!)tb|V^a!YW~P7Ljdhb4?Wn?P1N#bmBj zrS2d}gXX;fhgV($E<*I63ZdgJJo}8nfF_^7qWz4hH%Emc=ppLon*;d#>~tH)i#KQC z@EGO08!fd5g6V0%r-AYnSn!h68a#d!ZV_ry48HC(8p-MiYS2>X{0xsc@z2XFcssMM zs5v*1d&Rjm8DRZZMitj-7&e=}dg4x=G;A^ZKe%Lx7w8B&`67=ijA-%HoI`17E2N$7dRvY!-p=wy%6j5)WiNwlhCIHlw9_>kuMLa1yFYqz=)>`@s z6qp$WJt7(psa#JiOaSqOk zZr;Y2*d}}fTPooso?&+V>&=ksbyt7Ayx5^+;lf?nZ)%|%d|*5h$4Fj5=vk(N0mSozL(8o!dGS> z&`+BT{w5#e%}-sc&p_9rhN#+lHJ_o-;fqB;=Q@2yRqt}5vhO-7@G(HDmd%`OiZ7g4 zcjHf=_tf4hyS+%!&{T{>;S5Vjo-zhqzDe`|c$fOPMnDBfA71-dPs3jReFe>BY&&UC zIw;rXMBe^;bpIf>eD(1amLOVJ{8v9Tuu5dYqZsdEiR7?KeWXNcJ(JS#WM7&MZ>uLS zTEZ$5>j3DDSv?K-br4I%xR-JbocP*Dmw8yKnB2vXju0xP@%oAp|G^OPI-?}Ds#Tww zq$PUcJHY5-faU4vy3;YlT=l-Mg5iBC+-=A)4$u&vvwj$F=(il5P#1g|aMU)(VmMRJqpTE;4 zW`|x6AF+M=3I_UGm`pc5kg}IX{l$%yKAN=ioZ9!uN$li|%uhl~Mg#iCGjAAcnCmOf zUs?>Y96<@!V5}}*s`*p=PF?C84X@eLZFI)-Di;LAq+M0k_SHE%O+PC2Tol|3JUEkGQ)SQ_ug6$?gE3u+jGikDVy3X9iVXqx1&Eou z2VQI>bu`;xHU=?Ue8j&BfX$G`lxi?z0~gex(rtDXF&2`~jcjQtK*>W!nt=jfc3NMQ zvI#}qz8yo4Q&_rPwUw{jNpNoP`*$GSrl#p_7g_S@M|*pY&&Y>iznr6k#@}|D(ij5E zw_WO7eYatU9{#WgKIYu(El^j>Yov`fC2(up^v;t+Rad8#A_uw|@9F8{c4N5lr?gO0 zzW2fW*x`9q&&YP3MomLjRVxTHcn2g7B@K zeY8&N!b@YPEM~ zVb0>3`3z#Bq7Ol0C{&!2*!+PcpqJLXt&y!fm z8(ah0W`9f*)JX#RbV%8j zl8TDp6$Sy)w=9lsBtftZ6=l^dNaeiC`+2=B)ddPDUi5!xr=|USHR2Ebpy0*jkMgl` z`_|s?xMHzATDpR*v=OQ6UkIhjD+*43+KG|$%SnO%y9>Z;V^c#!5+Nw})%_0!cGz3+ z7ghWtK*kKo16P$Mg8a8nk>(|?WAuWw68wi2{wZLahR&$#GU~?X<$yEtF^Z^q4q4%P z8}b%7ha<|->KTVBw}SE&LS?4;55o;p&X*^QrCCXL?rQi-sV~zppteof>FlVGyL}CT zUc2<}zodS@nza!8rbd-$*_%!p53D?m*({OHN}i$&&Uz=;aS)xcVruZHh zIkR%X#_{&e#$4!MM4rIA7ZBFgzPq`Nm=)2kiyF^7WyRb@Hct3hN0e~^S$9-NO-7vD!p zCLc|a_5#il+9MFAr%l$w>&+TvEu51^y86>V$&!Y9>rJ&P;hAH%DLNBA-k5hGYPYjO z_QKAsFc-?4#?VxpxERAhFH%@rqQ;t_Ll4;2(eIq50Emu4F*(?2Xt+*`<)=O674=;*a-rab-EHUKS zVK4t8qo?BwGLsvN?9K&U@UQFRtD0r8D!NmK==kqZaLXE*6c11%lS-cIEbeUUW$1aL z-FbJ4Qwix^8B~Q3$Yw`!cOlzJSn(+>tlyDsV@mA5!ym|oeEpZ$`@wOs;{dp5WpXze`sizi7H*8U_Ur&&8 z9LOqtTrB@Fs>qZ*Vbt$>vEqLo>f#2GCO2s^?h30QM|*^%5uFak{kv+5>d;huh2c9` z&>-X8L9KAe^VldhKQv6Z{wX3ZTROs7*TsG(PsX*9=PH9MOTI2;A?@(tN$7I?%er*3 zPu1OZ1y9pey#cq?L651H245BQBH@JLy)E_d`hVDxV%e;l1kiwp6e99-6VS*t;OF3n8qC#bG0*f&MVU=++;`r<-3w^uOg0MlQXy$ft!W)^g zE&-MYkPutAn&axvI$=4e)%?YfN}o3jmn(_2l|F7pq}b!Eyu+|_iy5XzXD}qAF)>p` z!s7;s1`l5>mI3k?+JcP&iH!Ett!cizY|`z}RdeI0^E2lH_NT;g;uXD+W8fTj%<-A~ z-X(^F8*~XDXvuO9yK~jezISER$5#$Wg)N^W0quH8r8fibF&9>yL0dvzR(CWKCe0Fly2G!_!6!$Vl zkntJUGod1dg=@xznJwkM{n{0wY4CT;6jNs*3tS%N>`RV9lMS*K;u6Z^^Vx*VqpK;^ z47flj95}8?^|bpW?gID5fQ&yMUk6KJ#7BO!Z9!6?KMQIjbY%uQePxdHozFc}udRPO z><J>ow*5Dud-J2PcqC>VAC0*l?>%X<1(!$LR=>B(v7R`e?N!+KBrz z0U8yPs6~a~(WhmpQZ3fybL3JqXNX@o1NL<%^Za(csv^nCP>%L{oUkH2AU4RGPV9r8z$hLzB&hxoa?F%#wb1Fv{^r1i&_CclCBusys|$UTR9FOd9s< z;};$=43T}(`~r7Un62;x4SA%M0#Mq1-)5m=pE8*(Zv37+Ffhvq?jkSs@RagbVC`nF z1Xp53Z|H*z<=%m9LPt^~kUafk6XTF}U4)hjMvho1cNS{@W>k5A_f)Q&pWijdaF?$wq%`7=DxXEjkZcmJ@INj#S{<{N02ke1<4Wr6 zS&e8;ofyk7 zp{D%4L66_Yc)qxdeF?Nsqs|&W#$GR3@~*FOxtr?K^!WwB8d5vH%rI{Y4YN&Wda{?l zDqqR;h6JttEPqy9O=>(ouW{r2o*-OZS#Vo`2!aBw`FxvW?@FV%hgJzJcA&e5%A}#c zNW>B~@|&08%`JZHD#Oescw{)`dMdAg8jvxSSnG7l!iKj*H?6sr4`)_fR?3%~3!#rR zxsTLM2%1ieqn;A~HUX7U67&>7QjzAKo*0z2FJmPyG2>mI>7xLbyCXa(Y+%RD>Cp05 z6TffQ`)F<(7*tkSpO#n;U3{6|$dZS9DuSspT-NzyXNH#l8yM&s!$??ZhMacAmP&qn z3*V|Q{0Lj^OMDeSumRAu2q-tWNFdg*$1*7L9yx@XpR=<-x+9x1S!c9O3vd7Y-Phg| zyherSs{?8+c=#x-&|vMoN~EQQL9q78_YGOv$MuuFsJ*4uWkiQ$rHOBO=GY~`ohsR< z03p)K9UfKL7YTkT5Xtb_y+u@*(q8?I&3Ej~2On!_y(MwC~F`_#THZWSAT(Q)^F`W+_wD^N4_Z_6K7hkUoA&CGM!>Vk6;3jH{eD6H`k zdqatq{MNhKPC}jFl8psLr6Ja$>fp@0Oq$}inrwtAWu+}Wto%6{A=?BNuv|vzk@r6( z(YE#a%Y0PU#>%eWi0kA~`a@*&`t}rL(h=GR81FKXB%a;iv?3%+c~m7{o_Z#se(n+e z)Zb3v;Up>YE@t6$U1Cpa52?@Vc%j=F5nVJRji{w=Mt=_WanXX8cp_)Cx~D+-Sxw{1pH*6P zYv(?H_4{7442j(h$0-ulWQehaHaz%BVJh|dgsP-OP>h?@>x0Dng45=e1qN=V4gL&)x-}W<|(B+OXlAg%!SiGv^pp5HA($>f+&x zA$#x?P{f^t;wJm`O5&|jtuYCfq&Aqz&W>*OB8-R=6VFD_30D>-?}Rk_)zigx+2}w@ z1f7!z#x_DqIio*}GXv@~Btg&$4`QvReDn`78awfX5M6ly*NL&}RwAlM>ROz5 zOD8c0n|56EH``*Ixg~4X@XWX87?6%EJ$WC+D;5+A1OD6Q|TE7P5>#wuKo=7zrBK;I^^uUqT8gVzl zYXzKwj+kC9-MEzcYi%P(b>$~6-{Dp|$PkpITt8;D4?B!Sx~KNFQykeGnCC}VuL8b9 z?q!;Igq(7eU8py$)x6b)LD*WE*gNbMH-s3Qb0sWk%*r#0J``ScFYnZ7tb8N~$G<0x zu5$#gK!FQZQ|YZeTSdfp-c350>MFY;#B{5#_$>52%0+5)*dvr~;^Ldyb0yqXnAym; zr!%IbIfde%9YLz|112vg7WXHkkn(C@ z4I+P-jMKs6aHl-bYO#+S0Omf_n;EO=?7sw6IvFdh z)65;VplhiGkb4i7XWe`LKP~)C=_WI!Q~x+8GZ-;u-!>=q|ZDOUjyk`bQQe zb2GW>{EJ$4=f%lCiaL8oAorS#x}4ILgXlU_Z(|TptR@_El%;>iL#>N<9v&8Ueebao z>!W7-&a&YCac>Yn{_`L0Gr`ua%Szsh&g)Zb+DaE2x9d^;uKlIkFw2?P)BUFh;HtIz zpWC3;b`)q5VX*%HEOmNqseO=pu(54i`gYxSa80B`2dXJAe17<%XC?J}xc~h5ljq5_ z;tuaE8;Tpap3PH@c(>L4B>grp&wzXC@)9%eS!23Yg{rm@i2j1g3&ZO@4P|L(s$EaI zspVqwLD_7&QNfFqtc;%s_zu+hl$oFWByk1nP@>eFGsPohI6pL@){uw^0&GF{}7^ zZs8>-1#)`xgzHo>%PgO2GeTys{1_0octlaeWrL;^RjktmIjb2|Mf>s~{K>{vwHgCQ zY|c3@VcD`tdowXI^^=%4Fp)dywK>Zcmf~48R4aZ|GhXnV7U9v=^C$CRfdMSI^`{L@ z%9Rdg@RcWEZ1N;hgJH&Pkk{-#7^HBgiS%a9U{0#s&?#b-pg~6e+2g!Wqq*ELq=@8S z8QYCJ+}4EIV3U00BpMLE?Q!E@$HHb{Ab`va1V+42_W9rJ(6IT|g+u`ZEQ{(sU%uh! zx#jSCzc(D=Kp_@p!?raBLG)()Sr@&`^Eyu*P2FtJ*teL+Oq6FXrFO01^aL9sqBF8A z7F^fu>vhc8U{3_!Os7-3l={Jm$D#6TG#lc4V3|i)847Mlq*-=+wXwyF#C{H*EH+#E z6v9Vwc2xRGmtuRV#GbW82cOEw6P-e#w19WTi5sN7uNuMVtC`Zt)X>Vj<|ng`%vi#|6zSmwEo?!?FUXI?VC< zkJ#5STrQ_8--bj~=~+V!60oUalye}8DC$4YVHgc#RJYv_#9~^uyF@KpFZSuQLl&(~ zZr>;eLI?_KRFvG%m6<6|z(LK*>mrlb_`l$tSQns`fIp~NZH_&3yT_l@q!I}r#K*0Q z0zVr>@dd*CUxh$!bef=5PqfpiEO4wL*HZqZWZp?2*ReReC|fQ-kei8j2FWlCLChJF z4MBR@V^WT@r1cwCo58n*0;hIlL+F}pm)!+0e%35p39&}z?Yv8Aojd9_)(n2L8SRve ze|I(GSt%JyrrNXl5sH-{(Au^Lj(76)Ix4BRdu{>)hmaSu*t3lRew>crN|!1`BmVIG zkniQcB0gX@*!=cA=|3a-ZN$WSjhOQAH)TH7(<$D{z6_ho_sb0M#g8RQl%@vKp>Z|E z!quMS@G=0#jkZUFsUT68#cSb#j1s+rYAa}uO_XLx@vM_EK~ z!@^^0YEqz8#a4^o08z-81#~ha`E*Ft5c6JZ;~f zLRz$=MIF%pv~#8GuiDmmN&lG~Rg-vyUsDRJ7P`sv@oud8@9riY8vZFa{X^ukeT7RN zUxuV4hh2x?3!KZInH~A@uxkRZC4NfV&R<-Ku~q{Ic8yu?!34HLDBqGsDtxbH4fCRi zVkZ!x{JqE{$9xgGX~fT*89)VjcRWagy?Pz~JG2VaGHpv&uoqOB z_+d_cmO^j((pGuCULPLSj((s2GNI9dOWX5?ST=+ke>w_T5pH4N=jS!wgwV8 z<&M1m!Ilx-vA+WrdtLiK-F{qi*y(h4NT1l@4_nY@?xw{h__rp+yuVBEJeGrIQ%o3b z91n*9A4p(7c`qD7^@jh!meFVRUtG8az1<4fTi(Xyds1jUn9&j5zic_o=6;0RPNz?g z**Z;}bRFj6Cc03h*mq~jejr0SzFOSY;qset|L+w}2z-g*j~y0Ko_5$s8-wF`85%+h z_yz~wlwjkN{It7T{6+x$tpmC44Xw~!Q~}Luuc;?rzS9v|o#>EZn~48+NTFEax!-_^ z-QnnM7x@;tXJltJp}*m4aleg+eURn*PwaVh9bF#TkXq>=F@2{jGn^ruiqcdAaU)*! zFw*%M>K&ZXizj~SR&iYv243#U0haV3gi>8y zS9ozcySzK4D5$8ga{1Hqt;esK&cd)m#Afng4 z%kLr3+3VA;#h?cdd}@nRPp%`Qsm3>%dWW5>U+|ZrMpZd#(8m8e^F1~n>FcaYjkQ&R7F8*U-z8Rx} z4CDBb-8zf1a%v6!I((`5S?!`}x}K^5X2Z=>3m6!Xo)xwDn?yJ@z3ytm3pH8CCjduh(`B)>xOQr3MVB>lRiVwkV1Kf3BecD|WsHP#| zPulI~-{*QyAKtrZ==J$z)z0c`yQu&*q|;e^oc0`8rYPqtjcPXeEey>|wp;I;n6P85 z=Ip(ePXgnlU73sM9?sH-4{CFI`!{R7aSTmthjTrbFTC4-MQ?^|7J44(k7^PA11n=N z8LU;or^baeSwXV0eJgs@?BS~hCU;Q6>+9_}v!F%xwEjTim-9AT4Ap#B4AnDxgavh` zrTw@?dI38}tjBl6aum06-;~|r!>}kPu{|i5JYn46U zV@}0YUg1Q*kvdhhWAtzt`s>r-e$$`#a>o63p8(2=?$Mo{*;WquPFs)3uhlW{^(*(< zfsz52=@$(HkeN}bSFE!Sd4aIy_$i1X(Dfr)3HGfn*1F6AKcp&;|9P2(MXNnFGf`}b zm@0$XWUYU1#b-@RHT~TWls6CwXk#Ib$W-YMtvUZbFtyRUGyx^J?*=|USHJ3`4J|T= zwTWw347Z@NTJygBzQ*;@%C7OQZC9W3yArp{=lU*ZkISOd0g*^VkTo^Tw}$ZDCsT4w zfVm8?QPpq15V5%G6z;MS^6-3sKX-GVlo>jdJHDzPWqPmWWc*`8aVG#fe^w-C5q4~GY94rVeIgM zn0KdI>*L9Y4$!O##bOvFN#JsTRTBN{OE4L7#B#c^jS#oF35pHwv)UI2R8<|7Z(+!qXVSxDgfxj?Zv1tj46dgm%4; z`|6^gp(8%w6HZc{J?~P^!0q)b)Pr)>-o9pddvcxYZs#bYbC zml!UW{K5~V^~VwZHVXFe1UD>(37&R?*e9N;kJ}AJFIUs}hN~ThWQ_mL5Nma4GTXyw z(Q(FR4%Bs}VE@Gsz0f_`y2g`vjg@__gwJw7hvNN0Lc>fq&Gtd8QxM}k1XCNLmF;QX zRE!gEM$Cp+Eiq0yaIUlm*&Y2MI)!+zmn5dn@;1>VWwu}Xhog+;tN%fX^==Th-OZtO zV;0&+=r&!%5iMe(jQSg&wpVY4yByOzAEg3WVW{)!n)@Xc+Q#NoLydH}Vz*_<9p=tf z-YJcFI5>;AvXeuc(D_9{+|U~f+eLl}%me6T(G20i?3c(W*yrs*52MZE63Jt;>UFjy z8Bxv{hUgcnO<1F|IXO{{6i>r+H9ePgG2p|Oz!x}^L+__FSHRJG*VWyJI-zlz&(Yu< zOt*JNZ>;F-FnLWJ1*M9cTX}YIR#ji-AiCNBG;QX`k|@^{*2?PXB@^+0p2iJ|L*EFu#Mb>{lEx|Lv2aUF)hxdYGA+%CbGm`ql`A)2?s9d&W^fWt|k7h`X&N?Mz;+8rTmrNwo zF)luTsvL!cU7Y9JmlMd!n5v>}He6z)iw%y|q^%@0?{AtdbbZv1ubO7vIHKZ0#cA6S z*p;~+7f{-agyJ9$pb5~(uz)O*pVSIJkNzI>qU6v_-d&8+r%8iZ>a;AImeNBSCswq| ztlK0}X3oHQ9&?!f$_={ngf$jnyD5h{HOGIj`uecA*Pu*eRYC|tcezUe7_}%yD@5QC2$~?ll$JDhjOU`lZ~HypiZs=H(*EV zHKXil=F=K_>{b=g`Sy)b9T8Tl)Z#4PW-s7OiEM3z=&A$zroA`Eca&syB(-Uda>R=J%j({R4>Fr9BWS^gLu zt^!B~rt2%qYq4wx1Hf{k4*Z{g>t|I@o}DqvD0e&z_%kO}>O0xb43Hi^*MdEE!K$sf zB*tTv=tmEazdEL?Ot!=JLL73M5}wCgEI$)Y9hggX6<1)&fNyaltYIi}uT zFV;0-E(AnQqoP~dLZ)K)S<2kzv0@zR&9S|)TpV@=%--!|CQ350vR{ZkOZ>aot}d`@ zqE5)=8Y;UOw0l7Z~m4YE?qO)|nMHl{l zt_-{|^e^iuH$E_4TY!;1$t8VZJ8geAQ`+8`O;m*eH`-{|TB+9vc>bQtd|21LG`A9! z#T)$lJb!0JqV01~NJ8)uZE1vP$6n8yRL1-l7?EvRE+{i`UpOik^@TgsyEe259Ku`) zf&OE`UF-2+P&XewTAgAsGlOR<=(9aijF{S@zmXxAnUUgF{;8ew2iTt_;L4{W>C`XW zi%p*emf4p%NmcRI=c(wQn2hMg*ANh}naW)YT;yND;!!_sPfP5;mifub3Jax-d`c*R}B5&&L z3hBkbD7w$jx}q&~<=M&NCZ#^voBBO)QGkkgW=&Fs@;hPPS!b<#^8xgGL)LM=acI15 zB2g|Xez;(6LPWM3Q-S^N`k`Q-DDF!t_YdeoE!<=8>V)m0Hyo_J_!vkIHqRpjfT`V^ z5jO$$`7DJ6AARBs4F2h`z>gMJANBpSl@v@7Sd~z@IUp|K6i?U4K~wK5Ndz0@T)vMN zwgGT1f4%e>(at;QxY%)-yt}5xf7jL*;xN@joWuRRmCGW_J*I3D`3yhK>8Lui$3pu=eYjKdi_T>wQ zl$ZZCDvewjgoHu$?0RT^4@)$lhfcYE-m6i9%Irj^CI9Hm9kgsHEe(MP9b@&Q5E>+p z`Pmc8TA}b@d+-2ANHn0ND{xgWp-~eCspUM_!Ig~s;ki;%lcWP4ZZ zOt#5;9QX@fHQ|bblGbnK{wrfb#)XD#g|b|s_*9n2jkh>tPs89o$u&ZCSXDGE6V&0$ zx3Y`g`kISY91(oEk2I@}I?SyV0VE-NyNfb?AU>820Gme9n#;ldk)SK_^nE(KYH&Xh zVa2Rk?XJL_$rcefL6plb^!(W5Xl9zFKan&PWp};kW598}c-foK6k*#Zt-jQ@`sOD>^(&M%3EAxI;GJJ*e||R7>rIed zh8u04GH|5)0S`~ZV4(>|J90*~K2}iZ*NZzfE%%(3Q)D`upUi#dj{+UmP;5Q{PwPaWbQ zpX%VYerVEUYO|HmNxq2yaEJ?SO9?ci51N5Kb>EePy^NM6v4po?@C&v4_QoK5O#EkQ zBZU>$@fj_SV`N=%_m(}7rCn^tBqrqOFhznf3D(4DF1-7KB;ESCDL<;=nj0}CMIaq_ z_&#Bwo6xX?eZo--f`i$(16Do%$0Mr~a% zqPKz$KZ+~PJJTuQEO7+L#1^)3gF6cSxA<*xjwW5Px^nU}BbDm5qOV?{vesMu<1l~u z1@*^W1WWVrp$<|d-thmft#E`qJn<-N$GpOJdw$_t%Wlb}XV$gK>wd?E=GU0HM{D^8 z+VN*dm6+=34VgGy|G-c;7WdS$S)ctjuiCs}>dHiKnF?Z{D~*zcoMTg~BB-smUg+M4 z`iz)NOh&YpMm8EAkS)Mh`5MdrQn})J$%(G^sFW>hsy!+ed z^Ev0nfV(g>gJN8 z&dQ)Dla%udk4lM;q)^5ve2p=E{NYra{Lv|exR#2{NpTqFrXdNsp#eG+SeX2iyC6_Z z6v*4Cp+63gbwoB>aZKzHy#U}7g6Z2?)4ULD9MsFA1MKRH$$K0gqf^CT2y(W#pH^;J zU`2BF-CubDbCZa&LbX&#r0BC+6B7$W1P(W!aLHCT11Q}X^(lCppyd*tZi|4%L4r~k zpETr!CCJ-ig>`9J`VLtqtcv-tN{^Ru%Ct9#qIueBxI_~?oxJn1%GY^jW>?X4(I9Ev zYR%my7=zMneX5E|ePIiYo?Q+c2vJ`@*))_d?i#$0(Qqa#?!7{|8C+5f{P{Vq1=HS^ z$k#r+JNfHi3&8(FEm*}c#m&Syv;1X!<*#0co_Ux1N~_w8&)N_ihiuC&FXXee`>zh; zD3)2u=Q!F134ax5{-mTbm{8`PTLLB0PiE%eURHRbg7^{-Vh2I$nA)t(a~15T??0f6 zaqGkT#V?g;-2{8Jaf5V`;d#QE6J_<{Ta|W67nmDpmmYu^DaS`&KW+wi z;47=)1t}ycEXki|4X26K)h4*>@8_jq67J-rP_wod0@p5)396V# zYtsQ>SB&u1p+<$7k_ot_rHw|1D-o%z4~B&s+WEM}aCqkw(F6PwC3|T>Y+QzlIE=^a@1Ud4>k{#vB7i-&f$?c#9j#gU~`-!Ih64rzZ3aD~>!$Tx8 zYOuOjXCj9uk=ULdvCLg%j`3hs=oc7zL1<}D@6xAcru1ReO#Edw;~EB^Fe^k3Ft)(? zVw6fn;Hd}|F{{(V6+WP@V`Z#f=$+VxT zXqL$w*49NllhcBtT~3oi7XRVU)WTva!j!&<8l~Z)D#-EFd^TP-#Ed)w3Kg77FEY1KkL(Cc8J{~T9)Scbr#%JNrPV@BWDZhZ9O$1e$Vjq zy9}p%VAGJ*m{`R~m}5FfmYv^nyUJT0=X6yfm&?*YCkui zE+HX`)`-GB#qI5*>`6}N72%rI8i^U|2V;d1);D97cRQ_oA}@aK zbT`PH(RMS~$~sfHi67P16Wq2|&%mB#wgYw{t}Cl@7a7eZ#9Y$-nQbGvud84E zipCB^^DW=GQ2NBbUsb&VtB*HB7jX?m@jyfAG<)zCD5$O=jc5ft2#`tx?lZuHkbNNO zps>f-r~b=4B literal 0 HcmV?d00001 diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 0000000..7edd7ef --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,19 @@ +name-template: 'v$NEXT_PATCH_VERSION' +tag-template: 'v$NEXT_PATCH_VERSION' +categories: + - title: 'New Features' + label: 'feature' + - title: 'Breaking Change' + label: 'breaking change' + - title: 'Bug Fixes' + label: 'fix' + - title: 'Documentation' + label: 'documentation' + - title: 'Dependencies' + label: 'dependencies' + - title: 'Enhancement' + label: 'enhancement' +change-template: '- $TITLE @$AUTHOR (#$NUMBER)' +template: | + ## Changes + $CHANGES diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..1fd42a8 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,39 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python package + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..b1f2f0b --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} From 3dd8d5f7197eea209ee46b5478cfb0beec28e70e Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:03:25 +0100 Subject: [PATCH 05/52] remove ruff and pre-commit from dev reqs and add ryp2 --- settings.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.ini b/settings.ini index ba87868..8a0f57a 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ user = jope35 ### Optional ### requirements = antropy>=0.1.4 arch>=4.14 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.12.2 supersmoother>=0.4 tqdm -dev_requirements = nbdev ruff pre-commit +dev_requirements = nbdev rpy2 # console_scripts = black_formatting = False From 5e03313a45da66f2f5fc5ec03d976db0d59ac2f8 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:04:43 +0100 Subject: [PATCH 06/52] restore setup.py --- setup.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a847a2b --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="tsfeatures", + version="0.4.5", + description="Calculates various features from time series data.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/Nixtla/tsfeatures", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.7', + install_requires=[ + "antropy>=0.1.4", + "arch>=4.11", + "pandas>=1.0.5", + "scikit-learn>=0.23.1", + "statsmodels>=0.13.2", + "supersmoother>=0.4" + ] +) From 21018827fe881674dc8459bb2781d731a1898cd2 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:08:00 +0100 Subject: [PATCH 07/52] restore .gitignore --- .gitignore | 158 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 133 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 23c7bc4..d722dc0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,29 +1,137 @@ - +# Byte-compiled / optimized / DLL files __pycache__/ -_docs/ -_proc/ -.DS_Store -.gitattributes -.gitconfig -.idea -.idea +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook .ipynb_checkpoints -.luarc.json -.ruff_cache/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env .venv -.vscode +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +*.png +Untitle*.ipynb + +.idea/ + +#files *.csv -*.egg-info -*.gif -*.icloud -*.parquet -build -data -dist -docs/_site -Gemfile.lock -Gemfile* -mlruns/ -nbs/.last_checked -nbs/data -tmp \ No newline at end of file +*.ipynb From 528b54160cb631e2cefb930880cc6e71eeaa4969 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:08:35 +0100 Subject: [PATCH 08/52] create nbdev specific .gitconfig and .gitattributes --- .gitattributes | 1 + .gitconfig | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitconfig diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..753b249 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.ipynb merge=nbdev-merge diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000..9054574 --- /dev/null +++ b/.gitconfig @@ -0,0 +1,11 @@ +# Generated by nbdev_install_hooks +# +# If you need to disable this instrumentation do: +# git config --local --unset include.path +# +# To restore: +# git config --local include.path ../.gitconfig +# +[merge "nbdev-merge"] + name = resolve conflicts with nbdev_fix + driver = nbdev_merge %O %A %B %P From 846508ec075acfe743fd5c3f9a441a41956e2739 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:40:20 +0100 Subject: [PATCH 09/52] remove *.ipynb from gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index d722dc0..8a2fc23 100644 --- a/.gitignore +++ b/.gitignore @@ -134,4 +134,3 @@ Untitle*.ipynb #files *.csv -*.ipynb From bf803831ac222ff8d9d7586ac353432ab9c12912 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:40:43 +0100 Subject: [PATCH 10/52] align settings and setup --- settings.ini | 24 +++++++++++++----------- setup.py | 6 +++--- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/settings.ini b/settings.ini index 8a0f57a..2609ccb 100644 --- a/settings.ini +++ b/settings.ini @@ -3,15 +3,18 @@ # See https://github.com/fastai/nbdev/blob/master/settings.ini for examples. ### Python library ### +host=github repo = tsfeatures lib_name = %(repo)s -version = 1.0.0 -min_python = 3.9 +description = Calculates various features from time series data. Python implementation of the R package tsfeatures. +keywords = time-series feature engineering forecasting +version = 0.4.5 +min_python = 3.7 license = apache2 ### nbdev ### doc_path = _docs -lib_path = tsfeatures +lib_path = %(lib_name)s nbs_path = nbs recursive = True tst_flags = notest @@ -21,27 +24,26 @@ put_version_in_init = True branch = main custom_sidebar = False doc_host = https://%(user)s.github.io -doc_baseurl = /%(repo)s +doc_baseurl = /%(repo)s/ git_url = https://github.com/%(user)s/%(repo)s title = %(lib_name)s ### PyPI ### audience = Developers -author = Joost de Theije -author_email = info@example.com -copyright = 2023 onwards, %(author)s -description = porting tsfeature to nbdev -keywords = nbdev jupyter notebook python +author = Nixtla +author_email = business@nixtla.io +copyright = Nixtla Inc. language = English status = 3 user = jope35 ### Optional ### -requirements = antropy>=0.1.4 arch>=4.14 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.12.2 supersmoother>=0.4 tqdm +requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm dev_requirements = nbdev rpy2 # console_scripts = -black_formatting = False +black_formatting = True jupyter_hooks = True clean_ids = True clear_all = False +readme_nb = index.ipynb diff --git a/setup.py b/setup.py index a847a2b..e7f27fb 100644 --- a/setup.py +++ b/setup.py @@ -16,13 +16,13 @@ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], - python_requires='>=3.7', + python_requires=">=3.7", install_requires=[ "antropy>=0.1.4", "arch>=4.11", "pandas>=1.0.5", "scikit-learn>=0.23.1", "statsmodels>=0.13.2", - "supersmoother>=0.4" - ] + "supersmoother>=0.4", + ], ) From ed0266f5a0bc94c9141095dfae9b896f67ae306a Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:57:36 +0100 Subject: [PATCH 11/52] refactor: Restructure notebooks and utils Renamed and reorganized notebooks for clarity. Moved some utils to hide cells. Updated version and doc links. Added script to compare features with R. Made other minor code tweaks. --- nbs/{01_features.ipynb => features.ipynb} | 0 nbs/{04_m4_data.ipynb => m4_data.ipynb} | 0 nbs/nbdev.yml | 4 +- ...est_features.ipynb => test_features.ipynb} | 0 ...tures_core.ipynb => tsfeatures_core.ipynb} | 0 nbs/{00_utils.ipynb => utils.ipynb} | 74 ++++------------ scripts/compare_with_r.py | 84 +++++++++++++++++++ tsfeatures/__init__.py | 2 +- tsfeatures/_modidx.py | 2 +- tsfeatures/features.py | 56 ++++++------- tsfeatures/m4_data.py | 12 +-- tsfeatures/tsfeatures.py | 12 +-- tsfeatures/utils.py | 27 +++--- 13 files changed, 155 insertions(+), 118 deletions(-) rename nbs/{01_features.ipynb => features.ipynb} (100%) rename nbs/{04_m4_data.ipynb => m4_data.ipynb} (100%) rename nbs/{03_test_features.ipynb => test_features.ipynb} (100%) rename nbs/{02_tsfeatures_core.ipynb => tsfeatures_core.ipynb} (100%) rename nbs/{00_utils.ipynb => utils.ipynb} (93%) create mode 100644 scripts/compare_with_r.py diff --git a/nbs/01_features.ipynb b/nbs/features.ipynb similarity index 100% rename from nbs/01_features.ipynb rename to nbs/features.ipynb diff --git a/nbs/04_m4_data.ipynb b/nbs/m4_data.ipynb similarity index 100% rename from nbs/04_m4_data.ipynb rename to nbs/m4_data.ipynb diff --git a/nbs/nbdev.yml b/nbs/nbdev.yml index eb58aae..1d97e56 100644 --- a/nbs/nbdev.yml +++ b/nbs/nbdev.yml @@ -3,7 +3,7 @@ project: website: title: "tsfeatures" - site-url: "https://jope35.github.io/tsfeatures" - description: "porting tsfeature to nbdev" + site-url: "https://jope35.github.io/tsfeatures/" + description: "Calculates various features from time series data. Python implementation of the R package tsfeatures." repo-branch: main repo-url: "https://github.com/jope35/tsfeatures" diff --git a/nbs/03_test_features.ipynb b/nbs/test_features.ipynb similarity index 100% rename from nbs/03_test_features.ipynb rename to nbs/test_features.ipynb diff --git a/nbs/02_tsfeatures_core.ipynb b/nbs/tsfeatures_core.ipynb similarity index 100% rename from nbs/02_tsfeatures_core.ipynb rename to nbs/tsfeatures_core.ipynb diff --git a/nbs/00_utils.ipynb b/nbs/utils.ipynb similarity index 93% rename from nbs/00_utils.ipynb rename to nbs/utils.ipynb index 57ae6e8..36c10f2 100644 --- a/nbs/00_utils.ipynb +++ b/nbs/utils.ipynb @@ -33,6 +33,7 @@ } ], "source": [ + "#| hide\n", "%load_ext autoreload\n", "%autoreload 2" ] @@ -45,11 +46,18 @@ "source": [ "# |export\n", "import numpy as np\n", - "import statsmodels.api as sm\n", - "from fastcore.test import *\n", - "\n", + "import statsmodels.api as sm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", "\n", - "# from scipy.signal import periodogram, welch" + "from fastcore.test import *" ] }, { @@ -69,6 +77,8 @@ } ], "source": [ + "# |export\n", + "\n", "np.seterr(divide=\"ignore\", invalid=\"ignore\")" ] }, @@ -374,13 +384,6 @@ " return value" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -567,55 +570,6 @@ "]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, diff --git a/scripts/compare_with_r.py b/scripts/compare_with_r.py new file mode 100644 index 0000000..2a1311e --- /dev/null +++ b/scripts/compare_with_r.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# coding: utf-8 + +import argparse +import sys +import time + +from tsfeatures import tsfeatures +from .tsfeatures_r import tsfeatures_r +from .m4_data import prepare_m4_data +from .utils import FREQS + + +def compare_features_m4(dataset_name, directory, num_obs=1000000): + _, y_train_df, _, _ = prepare_m4_data( + dataset_name=dataset_name, directory=directory, num_obs=num_obs + ) + + freq = FREQS[dataset_name[0]] + + print("Calculating python features...") + init = time.time() + py_feats = tsfeatures(y_train_df, freq=freq).set_index("unique_id") + print("Total time: ", time.time() - init) + + print("Calculating r features...") + init = time.time() + r_feats = tsfeatures_r(y_train_df, freq=freq, parallel=True).set_index("unique_id") + print("Total time: ", time.time() - init) + + diff = py_feats.sub(r_feats, 1).abs().sum(0).sort_values() + + return diff + + +def main(args): + if args.num_obs: + num_obs = args.num_obs + else: + num_obs = 100000 + + if args.dataset_name: + datasets = [args.dataset_name] + else: + datasets = ["Daily", "Hourly", "Yearly", "Quarterly", "Weekly", "Monthly"] + + for dataset_name in datasets: + diff = compare_features_m4(dataset_name, args.results_directory, num_obs) + diff.name = "diff" + diff = diff.rename_axis("feature") + diff = diff.reset_index() + diff["diff"] = diff["diff"].map("{:.2f}".format) + save_dir = args.results_directory + "/" + dataset_name + "_comparison_" + save_dir += str(num_obs) + ".csv" + diff.to_csv(save_dir, index=False) + + print("Comparison saved at: ", save_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Get features for M4 data") + + parser.add_argument( + "--results_directory", + required=True, + type=str, + help="directory where M4 data will be downloaded", + ) + parser.add_argument( + "--num_obs", + required=False, + type=int, + help="number of M4 time series to be tested (uses all data by default)", + ) + parser.add_argument( + "--dataset_name", + required=False, + type=str, + help="type of dataset to get features", + ) + + args = parser.parse_args() + + main(args) diff --git a/tsfeatures/__init__.py b/tsfeatures/__init__.py index 5becc17..98a433b 100644 --- a/tsfeatures/__init__.py +++ b/tsfeatures/__init__.py @@ -1 +1 @@ -__version__ = "1.0.0" +__version__ = "0.4.5" diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index b0b9d4c..c4fc05b 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -1,7 +1,7 @@ # Autogenerated by nbdev d = { 'settings': { 'branch': 'main', - 'doc_baseurl': '/tsfeatures', + 'doc_baseurl': '/tsfeatures/', 'doc_host': 'https://jope35.github.io', 'git_url': 'https://github.com/jope35/tsfeatures', 'lib_path': 'tsfeatures'}, diff --git a/tsfeatures/features.py b/tsfeatures/features.py index ea7ee33..454abff 100644 --- a/tsfeatures/features.py +++ b/tsfeatures/features.py @@ -1,25 +1,25 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_features.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/features.ipynb. # %% auto 0 __all__ = ['acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'guerrero', 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', 'nonlinearity', 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', 'unitroot_pp'] -# %% ../nbs/01_features.ipynb 3 +# %% ../nbs/features.ipynb 3 import warnings -# %% ../nbs/01_features.ipynb 4 +# %% ../nbs/features.ipynb 4 warnings.warn = lambda *a, **kw: False -# %% ../nbs/01_features.ipynb 5 +# %% ../nbs/features.ipynb 5 import os -# %% ../nbs/01_features.ipynb 6 +# %% ../nbs/features.ipynb 6 os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1" -# %% ../nbs/01_features.ipynb 7 +# %% ../nbs/features.ipynb 7 from itertools import groupby from math import e # maybe change with numpy e from typing import Dict @@ -46,7 +46,7 @@ ur_pp, ) -# %% ../nbs/01_features.ipynb 8 +# %% ../nbs/features.ipynb 8 def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates autocorrelation function features. @@ -112,7 +112,7 @@ def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/01_features.ipynb 11 +# %% ../nbs/features.ipynb 11 def arch_stat( x: np.array, freq: int = 1, lags: int = 12, demean: bool = True ) -> Dict[str, float]: @@ -147,7 +147,7 @@ def arch_stat( return {"arch_lm": r_squared} -# %% ../nbs/01_features.ipynb 14 +# %% ../nbs/features.ipynb 14 def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: """Count entropy. @@ -168,7 +168,7 @@ def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: return {"count_entropy": entropy} -# %% ../nbs/01_features.ipynb 15 +# %% ../nbs/features.ipynb 15 def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: """Crossing points. @@ -193,7 +193,7 @@ def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: return {"crossing_points": cross.sum()} -# %% ../nbs/01_features.ipynb 16 +# %% ../nbs/features.ipynb 16 def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: """Calculates sample entropy. @@ -217,7 +217,7 @@ def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: return {"entropy": entropy} -# %% ../nbs/01_features.ipynb 17 +# %% ../nbs/features.ipynb 17 def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: """Flat spots. @@ -240,7 +240,7 @@ def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max() -# %% ../nbs/01_features.ipynb 18 +# %% ../nbs/features.ipynb 18 def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: """Frequency. @@ -259,7 +259,7 @@ def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: return {"frequency": freq} -# %% ../nbs/01_features.ipynb 19 +# %% ../nbs/features.ipynb 19 def guerrero( x: np.array, freq: int = 1, lower: int = -1, upper: int = 2 ) -> Dict[str, float]: @@ -294,7 +294,7 @@ def guerrero( return {"guerrero": min_} -# %% ../nbs/01_features.ipynb 20 +# %% ../nbs/features.ipynb 20 def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: """Heterogeneity. @@ -357,7 +357,7 @@ def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/01_features.ipynb 21 +# %% ../nbs/features.ipynb 21 def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: """Fitted parameters of a Holt model. @@ -385,7 +385,7 @@ def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: return params -# %% ../nbs/01_features.ipynb 22 +# %% ../nbs/features.ipynb 22 def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: """Hurst index. @@ -408,7 +408,7 @@ def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: return {"hurst": hurst_index} -# %% ../nbs/01_features.ipynb 23 +# %% ../nbs/features.ipynb 23 def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: """Fitted parameters of a Holt-Winters model. @@ -440,7 +440,7 @@ def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: return params -# %% ../nbs/01_features.ipynb 24 +# %% ../nbs/features.ipynb 24 def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: """Intervals with demand. @@ -464,7 +464,7 @@ def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: return {"intervals_mean": np.mean(y), "intervals_sd": np.std(y, ddof=1)} -# %% ../nbs/01_features.ipynb 25 +# %% ../nbs/features.ipynb 25 def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: """lumpiness. @@ -498,7 +498,7 @@ def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: return {"lumpiness": lumpiness} -# %% ../nbs/01_features.ipynb 26 +# %% ../nbs/features.ipynb 26 def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: """Nonlinearity. @@ -523,7 +523,7 @@ def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: return {"nonlinearity": test} -# %% ../nbs/01_features.ipynb 27 +# %% ../nbs/features.ipynb 27 def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates partial autocorrelation function features. @@ -594,7 +594,7 @@ def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/01_features.ipynb 28 +# %% ../nbs/features.ipynb 28 def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: """Series length. @@ -613,7 +613,7 @@ def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: return {"series_length": len(x)} -# %% ../nbs/01_features.ipynb 29 +# %% ../nbs/features.ipynb 29 def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: """Sparsity. @@ -632,7 +632,7 @@ def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: return {"sparsity": np.mean(x == 0)} -# %% ../nbs/01_features.ipynb 30 +# %% ../nbs/features.ipynb 30 def stability(x: np.array, freq: int = 1) -> Dict[str, float]: """Stability. @@ -666,7 +666,7 @@ def stability(x: np.array, freq: int = 1) -> Dict[str, float]: return {"stability": stability} -# %% ../nbs/01_features.ipynb 31 +# %% ../nbs/features.ipynb 31 def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates seasonal trend using loess decomposition. @@ -807,7 +807,7 @@ def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/01_features.ipynb 32 +# %% ../nbs/features.ipynb 32 def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: """Unit root kpss. @@ -833,7 +833,7 @@ def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: return {"unitroot_kpss": test_kpss} -# %% ../nbs/01_features.ipynb 33 +# %% ../nbs/features.ipynb 33 def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: """Unit root pp. diff --git a/tsfeatures/m4_data.py b/tsfeatures/m4_data.py index 746f202..742464c 100644 --- a/tsfeatures/m4_data.py +++ b/tsfeatures/m4_data.py @@ -1,9 +1,9 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_m4_data.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/m4_data.ipynb. # %% auto 0 __all__ = ['seas_dict', 'SOURCE_URL', 'maybe_download', 'm4_parser', 'prepare_m4_data'] -# %% ../nbs/04_m4_data.ipynb 3 +# %% ../nbs/m4_data.ipynb 3 import os import urllib @@ -18,12 +18,12 @@ "Yearly": {"seasonality": 1, "input_size": 4, "output_size": 6, "freq": "D"}, } -# %% ../nbs/04_m4_data.ipynb 4 +# %% ../nbs/m4_data.ipynb 4 SOURCE_URL = ( "https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/" ) -# %% ../nbs/04_m4_data.ipynb 5 +# %% ../nbs/m4_data.ipynb 5 def maybe_download(filename, directory): """Download the data from M4's website, unless it's already here. @@ -51,7 +51,7 @@ def maybe_download(filename, directory): return filepath -# %% ../nbs/04_m4_data.ipynb 6 +# %% ../nbs/m4_data.ipynb 6 def m4_parser(dataset_name, directory, num_obs=1000000): """Transform M4 data into a panel. @@ -135,7 +135,7 @@ def m4_parser(dataset_name, directory, num_obs=1000000): return X_train_df, y_train_df, X_test_df, y_test_df -# %% ../nbs/04_m4_data.ipynb 7 +# %% ../nbs/m4_data.ipynb 7 def prepare_m4_data(dataset_name, directory, num_obs): """Pipeline that obtains M4 times series, tranforms it and gets naive2 predictions. diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index 6a1e0fb..60e3eca 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -1,20 +1,20 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_tsfeatures_core.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/tsfeatures_core.ipynb. # %% auto 0 __all__ = ['tsfeatures'] -# %% ../nbs/02_tsfeatures_core.ipynb 3 +# %% ../nbs/tsfeatures_core.ipynb 3 import os import warnings -# %% ../nbs/02_tsfeatures_core.ipynb 4 +# %% ../nbs/tsfeatures_core.ipynb 4 warnings.warn = lambda *a, **kw: False os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1" -# %% ../nbs/02_tsfeatures_core.ipynb 5 +# %% ../nbs/tsfeatures_core.ipynb 5 from collections import ChainMap from functools import partial from multiprocessing import Pool @@ -25,7 +25,7 @@ from .features import * from .utils import * -# %% ../nbs/02_tsfeatures_core.ipynb 6 +# %% ../nbs/tsfeatures_core.ipynb 6 def _get_feats( index, ts, @@ -86,7 +86,7 @@ def _get_feats( return pd.DataFrame(dict(c_map), index=[index]) -# %% ../nbs/02_tsfeatures_core.ipynb 7 +# %% ../nbs/tsfeatures_core.ipynb 7 def tsfeatures( ts: pd.DataFrame, freq: Optional[int] = None, diff --git a/tsfeatures/utils.py b/tsfeatures/utils.py index 7bb7e37..d45c250 100644 --- a/tsfeatures/utils.py +++ b/tsfeatures/utils.py @@ -1,21 +1,20 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_utils.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb. # %% auto 0 __all__ = ['FREQS', 'WWWusage', 'USAccDeaths', 'scalets', 'poly', 'embed', 'terasvirta_test', 'hurst_exponent', 'ur_pp', 'lambda_coef_var'] -# %% ../nbs/00_utils.ipynb 3 +# %% ../nbs/utils.ipynb 3 import numpy as np import statsmodels.api as sm -from fastcore.test import * +# %% ../nbs/utils.ipynb 5 +np.seterr(divide="ignore", invalid="ignore") -# from scipy.signal import periodogram, welch - -# %% ../nbs/00_utils.ipynb 5 +# %% ../nbs/utils.ipynb 6 FREQS = {"H": 24, "D": 1, "M": 12, "Q": 4, "W": 1, "Y": 1} -# %% ../nbs/00_utils.ipynb 6 +# %% ../nbs/utils.ipynb 7 def scalets(x: np.array) -> np.array: """Mean-std scale a time series. @@ -33,7 +32,7 @@ def scalets(x: np.array) -> np.array: """ return (x - x.mean()) / x.std(ddof=1) -# %% ../nbs/00_utils.ipynb 7 +# %% ../nbs/utils.ipynb 8 def poly(x: np.array, p: int) -> np.array: """Returns or evaluates orthogonal polynomials of degree 1 to degree over the specified set of points x: @@ -54,7 +53,7 @@ def poly(x: np.array, p: int) -> np.array: return np.linalg.qr(X)[0][:, 1:] -# %% ../nbs/00_utils.ipynb 8 +# %% ../nbs/utils.ipynb 9 def embed(x: np.array, p: int) -> np.array: """Embeds the time series x into a low-dimensional Euclidean space. @@ -72,7 +71,7 @@ def embed(x: np.array, p: int) -> np.array: x = np.transpose(np.vstack([np.roll(x, k) for k in range(p)])) return x[p - 1 :] -# %% ../nbs/00_utils.ipynb 9 +# %% ../nbs/utils.ipynb 10 def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float: """Generically computes Teraesvirta's neural network test for neglected nonlinearity either for the time series x or the regression y~x. @@ -137,7 +136,7 @@ def terasvirta_test(x: np.array, lag: int = 1, scale: bool = True) -> float: return stat -# %% ../nbs/00_utils.ipynb 10 +# %% ../nbs/utils.ipynb 11 def hurst_exponent(x: np.array) -> float: """Computes hurst exponent. @@ -169,7 +168,7 @@ def hurst_exponent(x: np.array) -> float: return hurst_exponent -# %% ../nbs/00_utils.ipynb 11 +# %% ../nbs/utils.ipynb 12 def ur_pp(x: np.array) -> float: """Performs the Phillips and Perron unit root test. @@ -221,7 +220,7 @@ def ur_pp(x: np.array) -> float: return test_stat -# %% ../nbs/00_utils.ipynb 12 +# %% ../nbs/utils.ipynb 13 def lambda_coef_var(lambda_par: float, x: np.array, period: int = 2): """Calculates coefficient of variation for subseries of x. @@ -257,7 +256,7 @@ def lambda_coef_var(lambda_par: float, x: np.array, period: int = 2): return value -# %% ../nbs/00_utils.ipynb 14 +# %% ../nbs/utils.ipynb 14 WWWusage = [ 88, 84, From ddabac9b93155de61ec57b4a5083760e631afdcc Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 17 Jan 2024 12:00:26 +0100 Subject: [PATCH 12/52] update settings with nixtla user --- nbs/nbdev.yml | 4 ++-- settings.ini | 2 +- tsfeatures/_modidx.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nbs/nbdev.yml b/nbs/nbdev.yml index 1d97e56..9085f8f 100644 --- a/nbs/nbdev.yml +++ b/nbs/nbdev.yml @@ -3,7 +3,7 @@ project: website: title: "tsfeatures" - site-url: "https://jope35.github.io/tsfeatures/" + site-url: "https://Nixtla.github.io/tsfeatures/" description: "Calculates various features from time series data. Python implementation of the R package tsfeatures." repo-branch: main - repo-url: "https://github.com/jope35/tsfeatures" + repo-url: "https://github.com/Nixtla/tsfeatures" diff --git a/settings.ini b/settings.ini index 2609ccb..d1515bf 100644 --- a/settings.ini +++ b/settings.ini @@ -35,7 +35,7 @@ author_email = business@nixtla.io copyright = Nixtla Inc. language = English status = 3 -user = jope35 +user = Nixtla ### Optional ### requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index c4fc05b..4313959 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -2,8 +2,8 @@ d = { 'settings': { 'branch': 'main', 'doc_baseurl': '/tsfeatures/', - 'doc_host': 'https://jope35.github.io', - 'git_url': 'https://github.com/jope35/tsfeatures', + 'doc_host': 'https://Nixtla.github.io', + 'git_url': 'https://github.com/Nixtla/tsfeatures', 'lib_path': 'tsfeatures'}, 'syms': { 'tsfeatures.features': { 'tsfeatures.features.acf_features': ('features.html#acf_features', 'tsfeatures/features.py'), 'tsfeatures.features.arch_stat': ('features.html#arch_stat', 'tsfeatures/features.py'), From 5d420e01724ca22d1375c04a89e1b6a7dfe8c63e Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Sun, 4 Feb 2024 19:41:02 +0100 Subject: [PATCH 13/52] merged core and featrues into one notebook --- .gitignore | 3 + README.md | 1 + nbs/test_features.ipynb | 1 - nbs/{features.ipynb => tsfeatures.ipynb} | 181 ++++- nbs/tsfeatures_core.ipynb | 257 ------- tsfeatures/_modidx.py | 52 +- tsfeatures/features.py | 857 ----------------------- tsfeatures/tsfeatures.py | 856 +++++++++++++++++++++- 8 files changed, 1036 insertions(+), 1172 deletions(-) rename nbs/{features.ipynb => tsfeatures.ipynb} (87%) delete mode 100644 nbs/tsfeatures_core.ipynb delete mode 100644 tsfeatures/features.py diff --git a/.gitignore b/.gitignore index 8a2fc23..183357a 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,6 @@ Untitle*.ipynb #files *.csv + + +_proc \ No newline at end of file diff --git a/README.md b/README.md index eadb806..92c9572 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # tsfeatures + # tsfeatures diff --git a/nbs/test_features.ipynb b/nbs/test_features.ipynb index 604063a..9cd8979 100644 --- a/nbs/test_features.ipynb +++ b/nbs/test_features.ipynb @@ -25,7 +25,6 @@ "metadata": {}, "outputs": [], "source": [ - "from tsfeatures.features import *\n", "from tsfeatures.m4_data import *\n", "from tsfeatures.tsfeatures import *\n", "from tsfeatures.utils import *" diff --git a/nbs/features.ipynb b/nbs/tsfeatures.ipynb similarity index 87% rename from nbs/features.ipynb rename to nbs/tsfeatures.ipynb index d33d05f..5789681 100644 --- a/nbs/features.ipynb +++ b/nbs/tsfeatures.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "# |default_exp features\n" + "# |default_exp tsfeatures\n" ] }, { @@ -35,7 +35,14 @@ "outputs": [], "source": [ "# |export\n", - "import warnings" + "import os\n", + "import warnings\n", + "from collections import ChainMap\n", + "from functools import partial\n", + "from multiprocessing import Pool\n", + "from typing import Callable, Dict, List, Optional\n", + "\n", + "import pandas as pd" ] }, { @@ -48,16 +55,6 @@ "warnings.warn = lambda *a, **kw: False" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "import os" - ] - }, { "cell_type": "code", "execution_count": null, @@ -94,14 +91,18 @@ "from statsmodels.tsa.stattools import acf, kpss, pacf\n", "from supersmoother import SuperSmoother\n", "\n", - "from tsfeatures.utils import (\n", - " embed,\n", - " hurst_exponent,\n", - " lambda_coef_var,\n", - " poly,\n", - " terasvirta_test,\n", - " ur_pp,\n", - ")" + "from tsfeatures.utils import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "\n", + "FREQS = {\"H\": 24, \"D\": 1, \"M\": 12, \"Q\": 4, \"W\": 1, \"Y\": 1}" ] }, { @@ -1193,6 +1194,146 @@ " return {\"unitroot_pp\": test_pp}" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def _get_feats(\n", + " index,\n", + " ts,\n", + " freq,\n", + " scale=True,\n", + " features=[\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " heterogeneity,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ],\n", + " dict_freqs=FREQS,\n", + "):\n", + " print(\"dict_freq\")\n", + " if freq is None:\n", + " inf_freq = pd.infer_freq(ts[\"ds\"])\n", + " if inf_freq is None:\n", + " raise Exception(\n", + " \"Failed to infer frequency from the `ds` column, \"\n", + " \"please provide the frequency using the `freq` argument.\"\n", + " )\n", + "\n", + " freq = dict_freqs.get(inf_freq)\n", + " if freq is None:\n", + " raise Exception(\n", + " \"Error trying to convert infered frequency from the `ds` column \"\n", + " \"to integer. Please provide a dictionary with that frequency \"\n", + " \"as key and the integer frequency as value. \"\n", + " f\"Infered frequency: {inf_freq}\"\n", + " )\n", + "\n", + " if isinstance(ts, pd.DataFrame):\n", + " assert \"y\" in ts.columns\n", + " ts = ts[\"y\"].values\n", + "\n", + " if isinstance(ts, pd.Series):\n", + " ts = ts.values\n", + "\n", + " if scale:\n", + " ts = scalets(ts)\n", + "\n", + " c_map = ChainMap(\n", + " *[dict_feat for dict_feat in [func(ts, freq) for func in features]]\n", + " )\n", + "\n", + " return pd.DataFrame(dict(c_map), index=[index])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "def tsfeatures(\n", + " ts: pd.DataFrame,\n", + " freq: Optional[int] = None,\n", + " features: List[Callable] = [\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " heterogeneity,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ],\n", + " dict_freqs: Dict[str, int] = FREQS,\n", + " scale: bool = True,\n", + " threads: Optional[int] = None,\n", + ") -> pd.DataFrame:\n", + " \"\"\"Calculates features for time series.\n", + "\n", + " Parameters\n", + " ----------\n", + " ts: pandas df\n", + " Pandas DataFrame with columns ['unique_id', 'ds', 'y'].\n", + " Long panel of time series.\n", + " freq: int\n", + " Frequency of the time series. If None the frequency of\n", + " each time series is infered and assigns the seasonal periods according to\n", + " dict_freqs.\n", + " features: iterable\n", + " Iterable of features functions.\n", + " scale: bool\n", + " Whether (mean-std)scale data.\n", + " dict_freqs: dict\n", + " Dictionary that maps string frequency of int. Ex: {'D': 7, 'W': 1}\n", + " threads: int\n", + " Number of threads to use. Use None (default) for parallel processing.\n", + "\n", + " Returns\n", + " -------\n", + " pandas df\n", + " Pandas DataFrame where each column is a feature and each row\n", + " a time series.\n", + " \"\"\"\n", + " partial_get_feats = partial(\n", + " _get_feats, freq=freq, scale=scale, features=features, dict_freqs=dict_freqs\n", + " )\n", + "\n", + " with Pool(threads) as pool:\n", + " ts_features = pool.starmap(partial_get_feats, ts.groupby(\"unique_id\"))\n", + "\n", + " ts_features = pd.concat(ts_features).rename_axis(\"unique_id\")\n", + " ts_features = ts_features.reset_index()\n", + "\n", + " return ts_features" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/nbs/tsfeatures_core.ipynb b/nbs/tsfeatures_core.ipynb deleted file mode 100644 index c6883ae..0000000 --- a/nbs/tsfeatures_core.ipynb +++ /dev/null @@ -1,257 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# core\n", - "\n", - "> Fill in a module description here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |default_exp tsfeatures" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "import os\n", - "import warnings" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "\n", - "warnings.warn = lambda *a, **kw: False\n", - "\n", - "os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n", - "os.environ[\"NUMEXPR_NUM_THREADS\"] = \"1\"\n", - "os.environ[\"OMP_NUM_THREADS\"] = \"1\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "\n", - "from collections import ChainMap\n", - "from functools import partial\n", - "from multiprocessing import Pool\n", - "from typing import Callable, Dict, List, Optional\n", - "\n", - "import pandas as pd\n", - "\n", - "from tsfeatures.features import *\n", - "from tsfeatures.utils import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "def _get_feats(\n", - " index,\n", - " ts,\n", - " freq,\n", - " scale=True,\n", - " features=[\n", - " acf_features,\n", - " arch_stat,\n", - " crossing_points,\n", - " entropy,\n", - " flat_spots,\n", - " heterogeneity,\n", - " holt_parameters,\n", - " lumpiness,\n", - " nonlinearity,\n", - " pacf_features,\n", - " stl_features,\n", - " stability,\n", - " hw_parameters,\n", - " unitroot_kpss,\n", - " unitroot_pp,\n", - " series_length,\n", - " hurst,\n", - " ],\n", - " dict_freqs=FREQS,\n", - "):\n", - " print(\"dict_freq\")\n", - " if freq is None:\n", - " inf_freq = pd.infer_freq(ts[\"ds\"])\n", - " if inf_freq is None:\n", - " raise Exception(\n", - " \"Failed to infer frequency from the `ds` column, \"\n", - " \"please provide the frequency using the `freq` argument.\"\n", - " )\n", - "\n", - " freq = dict_freqs.get(inf_freq)\n", - " if freq is None:\n", - " raise Exception(\n", - " \"Error trying to convert infered frequency from the `ds` column \"\n", - " \"to integer. Please provide a dictionary with that frequency \"\n", - " \"as key and the integer frequency as value. \"\n", - " f\"Infered frequency: {inf_freq}\"\n", - " )\n", - "\n", - " if isinstance(ts, pd.DataFrame):\n", - " assert \"y\" in ts.columns\n", - " ts = ts[\"y\"].values\n", - "\n", - " if isinstance(ts, pd.Series):\n", - " ts = ts.values\n", - "\n", - " if scale:\n", - " ts = scalets(ts)\n", - "\n", - " c_map = ChainMap(\n", - " *[dict_feat for dict_feat in [func(ts, freq) for func in features]]\n", - " )\n", - "\n", - " return pd.DataFrame(dict(c_map), index=[index])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "def tsfeatures(\n", - " ts: pd.DataFrame,\n", - " freq: Optional[int] = None,\n", - " features: List[Callable] = [\n", - " acf_features,\n", - " arch_stat,\n", - " crossing_points,\n", - " entropy,\n", - " flat_spots,\n", - " heterogeneity,\n", - " holt_parameters,\n", - " lumpiness,\n", - " nonlinearity,\n", - " pacf_features,\n", - " stl_features,\n", - " stability,\n", - " hw_parameters,\n", - " unitroot_kpss,\n", - " unitroot_pp,\n", - " series_length,\n", - " hurst,\n", - " ],\n", - " dict_freqs: Dict[str, int] = FREQS,\n", - " scale: bool = True,\n", - " threads: Optional[int] = None,\n", - ") -> pd.DataFrame:\n", - " \"\"\"Calculates features for time series.\n", - "\n", - " Parameters\n", - " ----------\n", - " ts: pandas df\n", - " Pandas DataFrame with columns ['unique_id', 'ds', 'y'].\n", - " Long panel of time series.\n", - " freq: int\n", - " Frequency of the time series. If None the frequency of\n", - " each time series is infered and assigns the seasonal periods according to\n", - " dict_freqs.\n", - " features: iterable\n", - " Iterable of features functions.\n", - " scale: bool\n", - " Whether (mean-std)scale data.\n", - " dict_freqs: dict\n", - " Dictionary that maps string frequency of int. Ex: {'D': 7, 'W': 1}\n", - " threads: int\n", - " Number of threads to use. Use None (default) for parallel processing.\n", - "\n", - " Returns\n", - " -------\n", - " pandas df\n", - " Pandas DataFrame where each column is a feature and each row\n", - " a time series.\n", - " \"\"\"\n", - " partial_get_feats = partial(\n", - " _get_feats, freq=freq, scale=scale, features=features, dict_freqs=dict_freqs\n", - " )\n", - "\n", - " with Pool(threads) as pool:\n", - " ts_features = pool.starmap(partial_get_feats, ts.groupby(\"unique_id\"))\n", - "\n", - " ts_features = pd.concat(ts_features).rename_axis(\"unique_id\")\n", - " ts_features = ts_features.reset_index()\n", - "\n", - " return ts_features" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |hide\n", - "from nbdev.showdoc import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |hide\n", - "import nbdev\n", - "\n", - "nbdev.nbdev_export()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "python3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index 4313959..c58ed29 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -5,33 +5,35 @@ 'doc_host': 'https://Nixtla.github.io', 'git_url': 'https://github.com/Nixtla/tsfeatures', 'lib_path': 'tsfeatures'}, - 'syms': { 'tsfeatures.features': { 'tsfeatures.features.acf_features': ('features.html#acf_features', 'tsfeatures/features.py'), - 'tsfeatures.features.arch_stat': ('features.html#arch_stat', 'tsfeatures/features.py'), - 'tsfeatures.features.count_entropy': ('features.html#count_entropy', 'tsfeatures/features.py'), - 'tsfeatures.features.crossing_points': ('features.html#crossing_points', 'tsfeatures/features.py'), - 'tsfeatures.features.entropy': ('features.html#entropy', 'tsfeatures/features.py'), - 'tsfeatures.features.flat_spots': ('features.html#flat_spots', 'tsfeatures/features.py'), - 'tsfeatures.features.frequency': ('features.html#frequency', 'tsfeatures/features.py'), - 'tsfeatures.features.guerrero': ('features.html#guerrero', 'tsfeatures/features.py'), - 'tsfeatures.features.heterogeneity': ('features.html#heterogeneity', 'tsfeatures/features.py'), - 'tsfeatures.features.holt_parameters': ('features.html#holt_parameters', 'tsfeatures/features.py'), - 'tsfeatures.features.hurst': ('features.html#hurst', 'tsfeatures/features.py'), - 'tsfeatures.features.hw_parameters': ('features.html#hw_parameters', 'tsfeatures/features.py'), - 'tsfeatures.features.intervals': ('features.html#intervals', 'tsfeatures/features.py'), - 'tsfeatures.features.lumpiness': ('features.html#lumpiness', 'tsfeatures/features.py'), - 'tsfeatures.features.nonlinearity': ('features.html#nonlinearity', 'tsfeatures/features.py'), - 'tsfeatures.features.pacf_features': ('features.html#pacf_features', 'tsfeatures/features.py'), - 'tsfeatures.features.series_length': ('features.html#series_length', 'tsfeatures/features.py'), - 'tsfeatures.features.sparsity': ('features.html#sparsity', 'tsfeatures/features.py'), - 'tsfeatures.features.stability': ('features.html#stability', 'tsfeatures/features.py'), - 'tsfeatures.features.stl_features': ('features.html#stl_features', 'tsfeatures/features.py'), - 'tsfeatures.features.unitroot_kpss': ('features.html#unitroot_kpss', 'tsfeatures/features.py'), - 'tsfeatures.features.unitroot_pp': ('features.html#unitroot_pp', 'tsfeatures/features.py')}, - 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), + 'syms': { 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.maybe_download': ('m4_data.html#maybe_download', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.prepare_m4_data': ('m4_data.html#prepare_m4_data', 'tsfeatures/m4_data.py')}, - 'tsfeatures.tsfeatures': { 'tsfeatures.tsfeatures._get_feats': ('tsfeatures_core.html#_get_feats', 'tsfeatures/tsfeatures.py'), - 'tsfeatures.tsfeatures.tsfeatures': ('tsfeatures_core.html#tsfeatures', 'tsfeatures/tsfeatures.py')}, + 'tsfeatures.tsfeatures': { 'tsfeatures.tsfeatures._get_feats': ('tsfeatures.html#_get_feats', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.acf_features': ('tsfeatures.html#acf_features', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.arch_stat': ('tsfeatures.html#arch_stat', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.count_entropy': ('tsfeatures.html#count_entropy', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.crossing_points': ( 'tsfeatures.html#crossing_points', + 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.entropy': ('tsfeatures.html#entropy', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.flat_spots': ('tsfeatures.html#flat_spots', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.frequency': ('tsfeatures.html#frequency', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.guerrero': ('tsfeatures.html#guerrero', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.heterogeneity': ('tsfeatures.html#heterogeneity', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.holt_parameters': ( 'tsfeatures.html#holt_parameters', + 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.hurst': ('tsfeatures.html#hurst', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.hw_parameters': ('tsfeatures.html#hw_parameters', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.intervals': ('tsfeatures.html#intervals', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.lumpiness': ('tsfeatures.html#lumpiness', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.nonlinearity': ('tsfeatures.html#nonlinearity', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.pacf_features': ('tsfeatures.html#pacf_features', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.series_length': ('tsfeatures.html#series_length', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.sparsity': ('tsfeatures.html#sparsity', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.stability': ('tsfeatures.html#stability', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.stl_features': ('tsfeatures.html#stl_features', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.tsfeatures': ('tsfeatures.html#tsfeatures', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.unitroot_kpss': ('tsfeatures.html#unitroot_kpss', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.unitroot_pp': ('tsfeatures.html#unitroot_pp', 'tsfeatures/tsfeatures.py')}, 'tsfeatures.utils': { 'tsfeatures.utils.embed': ('utils.html#embed', 'tsfeatures/utils.py'), 'tsfeatures.utils.hurst_exponent': ('utils.html#hurst_exponent', 'tsfeatures/utils.py'), 'tsfeatures.utils.lambda_coef_var': ('utils.html#lambda_coef_var', 'tsfeatures/utils.py'), diff --git a/tsfeatures/features.py b/tsfeatures/features.py deleted file mode 100644 index 454abff..0000000 --- a/tsfeatures/features.py +++ /dev/null @@ -1,857 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/features.ipynb. - -# %% auto 0 -__all__ = ['acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'guerrero', - 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', 'nonlinearity', - 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', 'unitroot_pp'] - -# %% ../nbs/features.ipynb 3 -import warnings - -# %% ../nbs/features.ipynb 4 -warnings.warn = lambda *a, **kw: False - -# %% ../nbs/features.ipynb 5 -import os - -# %% ../nbs/features.ipynb 6 -os.environ["MKL_NUM_THREADS"] = "1" -os.environ["NUMEXPR_NUM_THREADS"] = "1" -os.environ["OMP_NUM_THREADS"] = "1" - -# %% ../nbs/features.ipynb 7 -from itertools import groupby -from math import e # maybe change with numpy e -from typing import Dict - -import numpy as np -import pandas as pd -from antropy import spectral_entropy -from arch import arch_model -from scipy.optimize import minimize_scalar -from sklearn.linear_model import LinearRegression -from statsmodels.api import OLS, add_constant -from statsmodels.tsa.ar_model import AR -from statsmodels.tsa.holtwinters import ExponentialSmoothing -from statsmodels.tsa.seasonal import STL -from statsmodels.tsa.stattools import acf, kpss, pacf -from supersmoother import SuperSmoother - -from tsfeatures.utils import ( - embed, - hurst_exponent, - lambda_coef_var, - poly, - terasvirta_test, - ur_pp, -) - -# %% ../nbs/features.ipynb 8 -def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: - """Calculates autocorrelation function features. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'x_acf1': First autocorrelation coefficient. - 'x_acf10': Sum of squares of first 10 autocorrelation coefficients. - 'diff1_acf1': First autocorrelation ciefficient of differenced series. - 'diff1_acf10': Sum of squared of first 10 autocorrelation coefficients - of differenced series. - 'diff2_acf1': First autocorrelation coefficient of twice-differenced series. - 'diff2_acf10': Sum of squared of first 10 autocorrelation coefficients of - twice-differenced series. - - Only for seasonal data (freq > 1). - 'seas_acf1': Autocorrelation coefficient at the first seasonal lag. - """ - m = freq - size_x = len(x) - - acfx = acf(x, nlags=max(m, 10), fft=False) - if size_x > 10: - acfdiff1x = acf(np.diff(x, n=1), nlags=10, fft=False) - else: - acfdiff1x = [np.nan] * 2 - - if size_x > 11: - acfdiff2x = acf(np.diff(x, n=2), nlags=10, fft=False) - else: - acfdiff2x = [np.nan] * 2 - # first autocorrelation coefficient - acf_1 = acfx[1] - # sum of squares of first 10 autocorrelation coefficients - sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan - # first autocorrelation ciefficient of differenced series - diff1_acf1 = acfdiff1x[1] - # sum of squared of first 10 autocorrelation coefficients of differenced series - diff1_acf10 = np.sum((acfdiff1x[1:11]) ** 2) if size_x > 10 else np.nan - # first autocorrelation coefficient of twice-differenced series - diff2_acf1 = acfdiff2x[1] - # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series - diff2_acf10 = np.sum((acfdiff2x[1:11]) ** 2) if size_x > 11 else np.nan - - output = { - "x_acf1": acf_1, - "x_acf10": sum_of_sq_acf10, - "diff1_acf1": diff1_acf1, - "diff1_acf10": diff1_acf10, - "diff2_acf1": diff2_acf1, - "diff2_acf10": diff2_acf10, - } - - if m > 1: - output["seas_acf1"] = acfx[m] if len(acfx) > m else np.nan - - return output - -# %% ../nbs/features.ipynb 11 -def arch_stat( - x: np.array, freq: int = 1, lags: int = 12, demean: bool = True -) -> Dict[str, float]: - """Arch model features. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'arch_lm': R^2 value of an autoregressive model of order lags applied to x**2. - """ - if len(x) <= lags + 1: - return {"arch_lm": np.nan} - if demean: - x -= np.mean(x) - - size_x = len(x) - mat = embed(x**2, lags + 1) - X = mat[:, 1:] - y = np.vstack(mat[:, 0]) - - try: - r_squared = LinearRegression().fit(X, y).score(X, y) - except: - r_squared = np.nan - - return {"arch_lm": r_squared} - -# %% ../nbs/features.ipynb 14 -def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: - """Count entropy. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'count_entropy': Entropy using only positive data. - """ - entropy = x[x > 0] * np.log(x[x > 0]) - entropy = -entropy.sum() - - return {"count_entropy": entropy} - -# %% ../nbs/features.ipynb 15 -def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: - """Crossing points. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'crossing_points': Number of times that x crosses the median. - """ - midline = np.median(x) - ab = x <= midline - lenx = len(x) - p1 = ab[: (lenx - 1)] - p2 = ab[1:] - cross = (p1 & (~p2)) | (p2 & (~p1)) - - return {"crossing_points": cross.sum()} - -# %% ../nbs/features.ipynb 16 -def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: - """Calculates sample entropy. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'entropy': Wrapper of the function spectral_entropy. - """ - try: - with np.errstate(divide="ignore"): - entropy = spectral_entropy(x, 1, normalize=True) - except: - entropy = np.nan - - return {"entropy": entropy} - -# %% ../nbs/features.ipynb 17 -def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: - """Flat spots. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'flat_spots': Number of flat spots in x. - """ - try: - cutx = pd.cut(x, bins=10, include_lowest=True, labels=False) + 1 - except: - return {"flat_spots": np.nan} - - rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max() - -# %% ../nbs/features.ipynb 18 -def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: - """Frequency. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'frequency': Wrapper of freq. - """ - - return {"frequency": freq} - -# %% ../nbs/features.ipynb 19 -def guerrero( - x: np.array, freq: int = 1, lower: int = -1, upper: int = 2 -) -> Dict[str, float]: - """Applies Guerrero's (1993) method to select the lambda which minimises the - coefficient of variation for subseries of x. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series. - lower: float - The lower bound for lambda. - upper: float - The upper bound for lambda. - - Returns - ------- - dict - 'guerrero': Minimum coefficient of variation for subseries of x. - - References - ---------- - [1] Guerrero, V.M. (1993) Time-series analysis supported by power transformations. - Journal of Forecasting, 12, 37–48. - """ - func_to_min = lambda lambda_par: lambda_coef_var(lambda_par, x=x, period=freq) - - min_ = minimize_scalar(func_to_min, bounds=[lower, upper]) - min_ = min_["fun"] - - return {"guerrero": min_} - -# %% ../nbs/features.ipynb 20 -def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: - """Heterogeneity. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'arch_acf': Sum of squares of the first 12 autocorrelations of the - residuals of the AR model applied to x - 'garch_acf': Sum of squares of the first 12 autocorrelations of the - residuals of the GARCH model applied to x - 'arch_r2': Function arch_stat applied to the residuals of the - AR model applied to x. - 'garch_r2': Function arch_stat applied to the residuals of the GARCH - model applied to x. - """ - m = freq - - size_x = len(x) - order_ar = min(size_x - 1, np.floor(10 * np.log10(size_x))) - order_ar = int(order_ar) - - try: - x_whitened = AR(x).fit(maxlag=order_ar, ic="aic", trend="c").resid - except: - try: - x_whitened = AR(x).fit(maxlag=order_ar, ic="aic", trend="nc").resid - except: - output = { - "arch_acf": np.nan, - "garch_acf": np.nan, - "arch_r2": np.nan, - "garch_r2": np.nan, - } - - return output - # arch and box test - x_archtest = arch_stat(x_whitened, m)["arch_lm"] - LBstat = (acf(x_whitened**2, nlags=12, fft=False)[1:] ** 2).sum() - # Fit garch model - garch_fit = arch_model(x_whitened, vol="GARCH", rescale=False).fit(disp="off") - # compare arch test before and after fitting garch - garch_fit_std = garch_fit.resid - x_garch_archtest = arch_stat(garch_fit_std, m)["arch_lm"] - # compare Box test of squared residuals before and after fittig.garch - LBstat2 = (acf(garch_fit_std**2, nlags=12, fft=False)[1:] ** 2).sum() - - output = { - "arch_acf": LBstat, - "garch_acf": LBstat2, - "arch_r2": x_archtest, - "garch_r2": x_garch_archtest, - } - - return output - -# %% ../nbs/features.ipynb 21 -def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: - """Fitted parameters of a Holt model. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'alpha': Level paramater of the Holt model. - 'beta': Trend parameter of the Hold model. - """ - try: - fit = ExponentialSmoothing(x, trend="add", seasonal=None).fit() - params = { - "alpha": fit.params["smoothing_level"], - "beta": fit.params["smoothing_trend"], - } - except: - params = {"alpha": np.nan, "beta": np.nan} - - return params - -# %% ../nbs/features.ipynb 22 -def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: - """Hurst index. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'hurst': Hurst exponent. - """ - try: - hurst_index = hurst_exponent(x) - except: - hurst_index = np.nan - - return {"hurst": hurst_index} - -# %% ../nbs/features.ipynb 23 -def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: - """Fitted parameters of a Holt-Winters model. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'hw_alpha': Level parameter of the HW model. - 'hw_beta': Trend parameter of the HW model. - 'hw_gamma': Seasonal parameter of the HW model. - """ - try: - fit = ExponentialSmoothing( - x, seasonal_periods=freq, trend="add", seasonal="add" - ).fit() - params = { - "hw_alpha": fit.params["smoothing_level"], - "hw_beta": fit.params["smoothing_trend"], - "hw_gamma": fit.params["smoothing_seasonal"], - } - except: - params = {"hw_alpha": np.nan, "hw_beta": np.nan, "hw_gamma": np.nan} - - return params - -# %% ../nbs/features.ipynb 24 -def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: - """Intervals with demand. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'intervals_mean': Mean of intervals with positive values. - 'intervals_sd': SD of intervals with positive values. - """ - x[x > 0] = 1 - - y = [sum(val) for keys, val in groupby(x, key=lambda k: k != 0) if keys != 0] - y = np.array(y) - - return {"intervals_mean": np.mean(y), "intervals_sd": np.std(y, ddof=1)} - -# %% ../nbs/features.ipynb 25 -def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: - """lumpiness. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'lumpiness': Variance of the variances of tiled windows. - """ - if freq == 1: - width = 10 - else: - width = freq - - nr = len(x) - lo = np.arange(0, nr, width) - up = lo + width - nsegs = nr / width - varx = [np.nanvar(x[lo[idx] : up[idx]], ddof=1) for idx in np.arange(int(nsegs))] - - if len(x) < 2 * width: - lumpiness = 0 - else: - lumpiness = np.nanvar(varx, ddof=1) - - return {"lumpiness": lumpiness} - -# %% ../nbs/features.ipynb 26 -def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: - """Nonlinearity. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'nonlinearity': 10 t**2/len(x) where t is the statistic used in - Terasvirta's test. - """ - try: - test = terasvirta_test(x) - test = 10 * test / len(x) - except: - test = np.nan - - return {"nonlinearity": test} - -# %% ../nbs/features.ipynb 27 -def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: - """Calculates partial autocorrelation function features. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'x_pacf5': Sum of squares of the first 5 partial autocorrelation - coefficients. - 'diff1x_pacf5': Sum of squares of the first 5 partial autocorrelation - coefficients of differenced series. - 'diff2x_pacf5': Sum of squares of the first 5 partial autocorrelation - coefficients of twice-differenced series. - - Only for seasonal data (freq > 1). - 'seas_pacf': Partial autocorrelation - coefficient at the first seasonal lag. - """ - m = freq - - nlags_ = max(m, 5) - - if len(x) > 1: - try: - pacfx = pacf(x, nlags=nlags_, method="ldb") - except: - pacfx = np.nan - else: - pacfx = np.nan - # Sum of first 6 PACs squared - if len(x) > 5 and not np.all(np.isnan(pacfx)): - pacf_5 = np.sum(pacfx[1:6] ** 2) - else: - pacf_5 = np.nan - # Sum of first 5 PACs of difference series squared - if len(x) > 6: - try: - diff1_pacf = pacf(np.diff(x, n=1), nlags=5, method="ldb")[1:6] - diff1_pacf_5 = np.sum(diff1_pacf**2) - except: - diff1_pacf_5 = np.nan - else: - diff1_pacf_5 = np.nan - # Sum of first 5 PACs of twice differenced series squared - if len(x) > 7: - try: - diff2_pacf = pacf(np.diff(x, n=2), nlags=5, method="ldb")[1:6] - diff2_pacf_5 = np.sum(diff2_pacf**2) - except: - diff2_pacf_5 = np.nan - else: - diff2_pacf_5 = np.nan - - output = { - "x_pacf5": pacf_5, - "diff1x_pacf5": diff1_pacf_5, - "diff2x_pacf5": diff2_pacf_5, - } - - if m > 1: - output["seas_pacf"] = pacfx[m] if len(pacfx) > m else np.nan - - return output - -# %% ../nbs/features.ipynb 28 -def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: - """Series length. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'series_length': Wrapper of len(x). - """ - - return {"series_length": len(x)} - -# %% ../nbs/features.ipynb 29 -def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: - """Sparsity. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'sparsity': Average obs with zero values. - """ - - return {"sparsity": np.mean(x == 0)} - -# %% ../nbs/features.ipynb 30 -def stability(x: np.array, freq: int = 1) -> Dict[str, float]: - """Stability. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'stability': Variance of the means of tiled windows. - """ - if freq == 1: - width = 10 - else: - width = freq - - nr = len(x) - lo = np.arange(0, nr, width) - up = lo + width - nsegs = nr / width - meanx = [np.nanmean(x[lo[idx] : up[idx]]) for idx in np.arange(int(nsegs))] - - if len(x) < 2 * width: - stability = 0 - else: - stability = np.nanvar(meanx, ddof=1) - - return {"stability": stability} - -# %% ../nbs/features.ipynb 31 -def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: - """Calculates seasonal trend using loess decomposition. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'nperiods': Number of seasonal periods in x. - 'seasonal_period': Frequency of the time series. - 'trend': Strength of trend. - 'spike': Measures "spikiness" of x. - 'linearity': Linearity of x based on the coefficients of an - orthogonal quadratic regression. - 'curvature': Curvature of x based on the coefficients of an - orthogonal quadratic regression. - 'e_acf1': acfremainder['x_acf1'] - 'e_acf10': acfremainder['x_acf10'] - - Only for sesonal data (freq > 0). - 'seasonal_strength': Strength of seasonality. - 'peak': Strength of peaks. - 'trough': Strength of trough. - """ - m = freq - nperiods = int(m > 1) - # STL fits - if m > 1: - try: - stlfit = STL(x, m, 13).fit() - except: - output = { - "nperiods": nperiods, - "seasonal_period": m, - "trend": np.nan, - "spike": np.nan, - "linearity": np.nan, - "curvature": np.nan, - "e_acf1": np.nan, - "e_acf10": np.nan, - "seasonal_strength": np.nan, - "peak": np.nan, - "trough": np.nan, - } - - return output - - trend0 = stlfit.trend - remainder = stlfit.resid - seasonal = stlfit.seasonal - else: - deseas = x - t = np.arange(len(x)) + 1 - try: - trend0 = SuperSmoother().fit(t, deseas).predict(t) - except: - output = { - "nperiods": nperiods, - "seasonal_period": m, - "trend": np.nan, - "spike": np.nan, - "linearity": np.nan, - "curvature": np.nan, - "e_acf1": np.nan, - "e_acf10": np.nan, - } - - return output - - remainder = deseas - trend0 - seasonal = np.zeros(len(x)) - # De-trended and de-seasonalized data - detrend = x - trend0 - deseason = x - seasonal - fits = x - remainder - # Summay stats - n = len(x) - varx = np.nanvar(x, ddof=1) - vare = np.nanvar(remainder, ddof=1) - vardetrend = np.nanvar(detrend, ddof=1) - vardeseason = np.nanvar(deseason, ddof=1) - # Measure of trend strength - if varx < np.finfo(float).eps: - trend = 0 - elif vardeseason / varx < 1e-10: - trend = 0 - else: - trend = max(0, min(1, 1 - vare / vardeseason)) - # Measure of seasonal strength - if m > 1: - if varx < np.finfo(float).eps: - season = 0 - elif np.nanvar(remainder + seasonal, ddof=1) < np.finfo(float).eps: - season = 0 - else: - season = max(0, min(1, 1 - vare / np.nanvar(remainder + seasonal, ddof=1))) - - peak = (np.argmax(seasonal) + 1) % m - peak = m if peak == 0 else peak - - trough = (np.argmin(seasonal) + 1) % m - trough = m if trough == 0 else trough - # Compute measure of spikiness - d = (remainder - np.nanmean(remainder)) ** 2 - varloo = (vare * (n - 1) - d) / (n - 2) - spike = np.nanvar(varloo, ddof=1) - # Compute measures of linearity and curvature - time = np.arange(n) + 1 - poly_m = poly(time, 2) - time_x = add_constant(poly_m) - coefs = OLS(trend0, time_x).fit().params - - linearity = coefs[1] - curvature = -coefs[2] - # ACF features - acfremainder = acf_features(remainder, m) - # Assemble features - output = { - "nperiods": nperiods, - "seasonal_period": m, - "trend": trend, - "spike": spike, - "linearity": linearity, - "curvature": curvature, - "e_acf1": acfremainder["x_acf1"], - "e_acf10": acfremainder["x_acf10"], - } - - if m > 1: - output["seasonal_strength"] = season - output["peak"] = peak - output["trough"] = trough - - return output - -# %% ../nbs/features.ipynb 32 -def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: - """Unit root kpss. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'unitroot_kpss': Statistic for the Kwiatowski et al unit root test. - """ - n = len(x) - nlags = int(4 * (n / 100) ** (1 / 4)) - - try: - test_kpss, _, _, _ = kpss(x, nlags=nlags) - except: - test_kpss = np.nan - - return {"unitroot_kpss": test_kpss} - -# %% ../nbs/features.ipynb 33 -def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: - """Unit root pp. - - Parameters - ---------- - x: numpy array - The time series. - freq: int - Frequency of the time series - - Returns - ------- - dict - 'unitroot_pp': Statistic for the Phillips-Perron unit root test. - """ - try: - test_pp = ur_pp(x) - except: - test_pp = np.nan - - return {"unitroot_pp": test_pp} diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index 60e3eca..8c1d3cb 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -1,31 +1,863 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/tsfeatures_core.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/tsfeatures.ipynb. # %% auto 0 -__all__ = ['tsfeatures'] +__all__ = ['FREQS', 'acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', + 'guerrero', 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', + 'nonlinearity', 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', + 'unitroot_pp', 'tsfeatures'] -# %% ../nbs/tsfeatures_core.ipynb 3 +# %% ../nbs/tsfeatures.ipynb 3 import os import warnings +from collections import ChainMap +from functools import partial +from multiprocessing import Pool +from typing import Callable, Dict, List, Optional -# %% ../nbs/tsfeatures_core.ipynb 4 +import pandas as pd + +# %% ../nbs/tsfeatures.ipynb 4 warnings.warn = lambda *a, **kw: False +# %% ../nbs/tsfeatures.ipynb 5 os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1" -# %% ../nbs/tsfeatures_core.ipynb 5 -from collections import ChainMap -from functools import partial -from multiprocessing import Pool -from typing import Callable, Dict, List, Optional +# %% ../nbs/tsfeatures.ipynb 6 +from itertools import groupby +from math import e # maybe change with numpy e +from typing import Dict +import numpy as np import pandas as pd +from antropy import spectral_entropy +from arch import arch_model +from scipy.optimize import minimize_scalar +from sklearn.linear_model import LinearRegression +from statsmodels.api import OLS, add_constant +from statsmodels.tsa.ar_model import AR +from statsmodels.tsa.holtwinters import ExponentialSmoothing +from statsmodels.tsa.seasonal import STL +from statsmodels.tsa.stattools import acf, kpss, pacf +from supersmoother import SuperSmoother -from .features import * from .utils import * -# %% ../nbs/tsfeatures_core.ipynb 6 +# %% ../nbs/tsfeatures.ipynb 7 +FREQS = {"H": 24, "D": 1, "M": 12, "Q": 4, "W": 1, "Y": 1} + +# %% ../nbs/tsfeatures.ipynb 8 +def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: + """Calculates autocorrelation function features. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'x_acf1': First autocorrelation coefficient. + 'x_acf10': Sum of squares of first 10 autocorrelation coefficients. + 'diff1_acf1': First autocorrelation ciefficient of differenced series. + 'diff1_acf10': Sum of squared of first 10 autocorrelation coefficients + of differenced series. + 'diff2_acf1': First autocorrelation coefficient of twice-differenced series. + 'diff2_acf10': Sum of squared of first 10 autocorrelation coefficients of + twice-differenced series. + + Only for seasonal data (freq > 1). + 'seas_acf1': Autocorrelation coefficient at the first seasonal lag. + """ + m = freq + size_x = len(x) + + acfx = acf(x, nlags=max(m, 10), fft=False) + if size_x > 10: + acfdiff1x = acf(np.diff(x, n=1), nlags=10, fft=False) + else: + acfdiff1x = [np.nan] * 2 + + if size_x > 11: + acfdiff2x = acf(np.diff(x, n=2), nlags=10, fft=False) + else: + acfdiff2x = [np.nan] * 2 + # first autocorrelation coefficient + acf_1 = acfx[1] + # sum of squares of first 10 autocorrelation coefficients + sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan + # first autocorrelation ciefficient of differenced series + diff1_acf1 = acfdiff1x[1] + # sum of squared of first 10 autocorrelation coefficients of differenced series + diff1_acf10 = np.sum((acfdiff1x[1:11]) ** 2) if size_x > 10 else np.nan + # first autocorrelation coefficient of twice-differenced series + diff2_acf1 = acfdiff2x[1] + # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series + diff2_acf10 = np.sum((acfdiff2x[1:11]) ** 2) if size_x > 11 else np.nan + + output = { + "x_acf1": acf_1, + "x_acf10": sum_of_sq_acf10, + "diff1_acf1": diff1_acf1, + "diff1_acf10": diff1_acf10, + "diff2_acf1": diff2_acf1, + "diff2_acf10": diff2_acf10, + } + + if m > 1: + output["seas_acf1"] = acfx[m] if len(acfx) > m else np.nan + + return output + +# %% ../nbs/tsfeatures.ipynb 11 +def arch_stat( + x: np.array, freq: int = 1, lags: int = 12, demean: bool = True +) -> Dict[str, float]: + """Arch model features. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'arch_lm': R^2 value of an autoregressive model of order lags applied to x**2. + """ + if len(x) <= lags + 1: + return {"arch_lm": np.nan} + if demean: + x -= np.mean(x) + + size_x = len(x) + mat = embed(x**2, lags + 1) + X = mat[:, 1:] + y = np.vstack(mat[:, 0]) + + try: + r_squared = LinearRegression().fit(X, y).score(X, y) + except: + r_squared = np.nan + + return {"arch_lm": r_squared} + +# %% ../nbs/tsfeatures.ipynb 14 +def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: + """Count entropy. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'count_entropy': Entropy using only positive data. + """ + entropy = x[x > 0] * np.log(x[x > 0]) + entropy = -entropy.sum() + + return {"count_entropy": entropy} + +# %% ../nbs/tsfeatures.ipynb 15 +def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: + """Crossing points. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'crossing_points': Number of times that x crosses the median. + """ + midline = np.median(x) + ab = x <= midline + lenx = len(x) + p1 = ab[: (lenx - 1)] + p2 = ab[1:] + cross = (p1 & (~p2)) | (p2 & (~p1)) + + return {"crossing_points": cross.sum()} + +# %% ../nbs/tsfeatures.ipynb 16 +def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: + """Calculates sample entropy. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'entropy': Wrapper of the function spectral_entropy. + """ + try: + with np.errstate(divide="ignore"): + entropy = spectral_entropy(x, 1, normalize=True) + except: + entropy = np.nan + + return {"entropy": entropy} + +# %% ../nbs/tsfeatures.ipynb 17 +def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: + """Flat spots. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'flat_spots': Number of flat spots in x. + """ + try: + cutx = pd.cut(x, bins=10, include_lowest=True, labels=False) + 1 + except: + return {"flat_spots": np.nan} + + rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max() + +# %% ../nbs/tsfeatures.ipynb 18 +def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: + """Frequency. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'frequency': Wrapper of freq. + """ + + return {"frequency": freq} + +# %% ../nbs/tsfeatures.ipynb 19 +def guerrero( + x: np.array, freq: int = 1, lower: int = -1, upper: int = 2 +) -> Dict[str, float]: + """Applies Guerrero's (1993) method to select the lambda which minimises the + coefficient of variation for subseries of x. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series. + lower: float + The lower bound for lambda. + upper: float + The upper bound for lambda. + + Returns + ------- + dict + 'guerrero': Minimum coefficient of variation for subseries of x. + + References + ---------- + [1] Guerrero, V.M. (1993) Time-series analysis supported by power transformations. + Journal of Forecasting, 12, 37–48. + """ + func_to_min = lambda lambda_par: lambda_coef_var(lambda_par, x=x, period=freq) + + min_ = minimize_scalar(func_to_min, bounds=[lower, upper]) + min_ = min_["fun"] + + return {"guerrero": min_} + +# %% ../nbs/tsfeatures.ipynb 20 +def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: + """Heterogeneity. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'arch_acf': Sum of squares of the first 12 autocorrelations of the + residuals of the AR model applied to x + 'garch_acf': Sum of squares of the first 12 autocorrelations of the + residuals of the GARCH model applied to x + 'arch_r2': Function arch_stat applied to the residuals of the + AR model applied to x. + 'garch_r2': Function arch_stat applied to the residuals of the GARCH + model applied to x. + """ + m = freq + + size_x = len(x) + order_ar = min(size_x - 1, np.floor(10 * np.log10(size_x))) + order_ar = int(order_ar) + + try: + x_whitened = AR(x).fit(maxlag=order_ar, ic="aic", trend="c").resid + except: + try: + x_whitened = AR(x).fit(maxlag=order_ar, ic="aic", trend="nc").resid + except: + output = { + "arch_acf": np.nan, + "garch_acf": np.nan, + "arch_r2": np.nan, + "garch_r2": np.nan, + } + + return output + # arch and box test + x_archtest = arch_stat(x_whitened, m)["arch_lm"] + LBstat = (acf(x_whitened**2, nlags=12, fft=False)[1:] ** 2).sum() + # Fit garch model + garch_fit = arch_model(x_whitened, vol="GARCH", rescale=False).fit(disp="off") + # compare arch test before and after fitting garch + garch_fit_std = garch_fit.resid + x_garch_archtest = arch_stat(garch_fit_std, m)["arch_lm"] + # compare Box test of squared residuals before and after fittig.garch + LBstat2 = (acf(garch_fit_std**2, nlags=12, fft=False)[1:] ** 2).sum() + + output = { + "arch_acf": LBstat, + "garch_acf": LBstat2, + "arch_r2": x_archtest, + "garch_r2": x_garch_archtest, + } + + return output + +# %% ../nbs/tsfeatures.ipynb 21 +def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: + """Fitted parameters of a Holt model. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'alpha': Level paramater of the Holt model. + 'beta': Trend parameter of the Hold model. + """ + try: + fit = ExponentialSmoothing(x, trend="add", seasonal=None).fit() + params = { + "alpha": fit.params["smoothing_level"], + "beta": fit.params["smoothing_trend"], + } + except: + params = {"alpha": np.nan, "beta": np.nan} + + return params + +# %% ../nbs/tsfeatures.ipynb 22 +def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: + """Hurst index. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'hurst': Hurst exponent. + """ + try: + hurst_index = hurst_exponent(x) + except: + hurst_index = np.nan + + return {"hurst": hurst_index} + +# %% ../nbs/tsfeatures.ipynb 23 +def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: + """Fitted parameters of a Holt-Winters model. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'hw_alpha': Level parameter of the HW model. + 'hw_beta': Trend parameter of the HW model. + 'hw_gamma': Seasonal parameter of the HW model. + """ + try: + fit = ExponentialSmoothing( + x, seasonal_periods=freq, trend="add", seasonal="add" + ).fit() + params = { + "hw_alpha": fit.params["smoothing_level"], + "hw_beta": fit.params["smoothing_trend"], + "hw_gamma": fit.params["smoothing_seasonal"], + } + except: + params = {"hw_alpha": np.nan, "hw_beta": np.nan, "hw_gamma": np.nan} + + return params + +# %% ../nbs/tsfeatures.ipynb 24 +def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: + """Intervals with demand. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'intervals_mean': Mean of intervals with positive values. + 'intervals_sd': SD of intervals with positive values. + """ + x[x > 0] = 1 + + y = [sum(val) for keys, val in groupby(x, key=lambda k: k != 0) if keys != 0] + y = np.array(y) + + return {"intervals_mean": np.mean(y), "intervals_sd": np.std(y, ddof=1)} + +# %% ../nbs/tsfeatures.ipynb 25 +def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: + """lumpiness. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'lumpiness': Variance of the variances of tiled windows. + """ + if freq == 1: + width = 10 + else: + width = freq + + nr = len(x) + lo = np.arange(0, nr, width) + up = lo + width + nsegs = nr / width + varx = [np.nanvar(x[lo[idx] : up[idx]], ddof=1) for idx in np.arange(int(nsegs))] + + if len(x) < 2 * width: + lumpiness = 0 + else: + lumpiness = np.nanvar(varx, ddof=1) + + return {"lumpiness": lumpiness} + +# %% ../nbs/tsfeatures.ipynb 26 +def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: + """Nonlinearity. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'nonlinearity': 10 t**2/len(x) where t is the statistic used in + Terasvirta's test. + """ + try: + test = terasvirta_test(x) + test = 10 * test / len(x) + except: + test = np.nan + + return {"nonlinearity": test} + +# %% ../nbs/tsfeatures.ipynb 27 +def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: + """Calculates partial autocorrelation function features. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'x_pacf5': Sum of squares of the first 5 partial autocorrelation + coefficients. + 'diff1x_pacf5': Sum of squares of the first 5 partial autocorrelation + coefficients of differenced series. + 'diff2x_pacf5': Sum of squares of the first 5 partial autocorrelation + coefficients of twice-differenced series. + + Only for seasonal data (freq > 1). + 'seas_pacf': Partial autocorrelation + coefficient at the first seasonal lag. + """ + m = freq + + nlags_ = max(m, 5) + + if len(x) > 1: + try: + pacfx = pacf(x, nlags=nlags_, method="ldb") + except: + pacfx = np.nan + else: + pacfx = np.nan + # Sum of first 6 PACs squared + if len(x) > 5 and not np.all(np.isnan(pacfx)): + pacf_5 = np.sum(pacfx[1:6] ** 2) + else: + pacf_5 = np.nan + # Sum of first 5 PACs of difference series squared + if len(x) > 6: + try: + diff1_pacf = pacf(np.diff(x, n=1), nlags=5, method="ldb")[1:6] + diff1_pacf_5 = np.sum(diff1_pacf**2) + except: + diff1_pacf_5 = np.nan + else: + diff1_pacf_5 = np.nan + # Sum of first 5 PACs of twice differenced series squared + if len(x) > 7: + try: + diff2_pacf = pacf(np.diff(x, n=2), nlags=5, method="ldb")[1:6] + diff2_pacf_5 = np.sum(diff2_pacf**2) + except: + diff2_pacf_5 = np.nan + else: + diff2_pacf_5 = np.nan + + output = { + "x_pacf5": pacf_5, + "diff1x_pacf5": diff1_pacf_5, + "diff2x_pacf5": diff2_pacf_5, + } + + if m > 1: + output["seas_pacf"] = pacfx[m] if len(pacfx) > m else np.nan + + return output + +# %% ../nbs/tsfeatures.ipynb 28 +def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: + """Series length. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'series_length': Wrapper of len(x). + """ + + return {"series_length": len(x)} + +# %% ../nbs/tsfeatures.ipynb 29 +def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: + """Sparsity. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'sparsity': Average obs with zero values. + """ + + return {"sparsity": np.mean(x == 0)} + +# %% ../nbs/tsfeatures.ipynb 30 +def stability(x: np.array, freq: int = 1) -> Dict[str, float]: + """Stability. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'stability': Variance of the means of tiled windows. + """ + if freq == 1: + width = 10 + else: + width = freq + + nr = len(x) + lo = np.arange(0, nr, width) + up = lo + width + nsegs = nr / width + meanx = [np.nanmean(x[lo[idx] : up[idx]]) for idx in np.arange(int(nsegs))] + + if len(x) < 2 * width: + stability = 0 + else: + stability = np.nanvar(meanx, ddof=1) + + return {"stability": stability} + +# %% ../nbs/tsfeatures.ipynb 31 +def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: + """Calculates seasonal trend using loess decomposition. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'nperiods': Number of seasonal periods in x. + 'seasonal_period': Frequency of the time series. + 'trend': Strength of trend. + 'spike': Measures "spikiness" of x. + 'linearity': Linearity of x based on the coefficients of an + orthogonal quadratic regression. + 'curvature': Curvature of x based on the coefficients of an + orthogonal quadratic regression. + 'e_acf1': acfremainder['x_acf1'] + 'e_acf10': acfremainder['x_acf10'] + + Only for sesonal data (freq > 0). + 'seasonal_strength': Strength of seasonality. + 'peak': Strength of peaks. + 'trough': Strength of trough. + """ + m = freq + nperiods = int(m > 1) + # STL fits + if m > 1: + try: + stlfit = STL(x, m, 13).fit() + except: + output = { + "nperiods": nperiods, + "seasonal_period": m, + "trend": np.nan, + "spike": np.nan, + "linearity": np.nan, + "curvature": np.nan, + "e_acf1": np.nan, + "e_acf10": np.nan, + "seasonal_strength": np.nan, + "peak": np.nan, + "trough": np.nan, + } + + return output + + trend0 = stlfit.trend + remainder = stlfit.resid + seasonal = stlfit.seasonal + else: + deseas = x + t = np.arange(len(x)) + 1 + try: + trend0 = SuperSmoother().fit(t, deseas).predict(t) + except: + output = { + "nperiods": nperiods, + "seasonal_period": m, + "trend": np.nan, + "spike": np.nan, + "linearity": np.nan, + "curvature": np.nan, + "e_acf1": np.nan, + "e_acf10": np.nan, + } + + return output + + remainder = deseas - trend0 + seasonal = np.zeros(len(x)) + # De-trended and de-seasonalized data + detrend = x - trend0 + deseason = x - seasonal + fits = x - remainder + # Summay stats + n = len(x) + varx = np.nanvar(x, ddof=1) + vare = np.nanvar(remainder, ddof=1) + vardetrend = np.nanvar(detrend, ddof=1) + vardeseason = np.nanvar(deseason, ddof=1) + # Measure of trend strength + if varx < np.finfo(float).eps: + trend = 0 + elif vardeseason / varx < 1e-10: + trend = 0 + else: + trend = max(0, min(1, 1 - vare / vardeseason)) + # Measure of seasonal strength + if m > 1: + if varx < np.finfo(float).eps: + season = 0 + elif np.nanvar(remainder + seasonal, ddof=1) < np.finfo(float).eps: + season = 0 + else: + season = max(0, min(1, 1 - vare / np.nanvar(remainder + seasonal, ddof=1))) + + peak = (np.argmax(seasonal) + 1) % m + peak = m if peak == 0 else peak + + trough = (np.argmin(seasonal) + 1) % m + trough = m if trough == 0 else trough + # Compute measure of spikiness + d = (remainder - np.nanmean(remainder)) ** 2 + varloo = (vare * (n - 1) - d) / (n - 2) + spike = np.nanvar(varloo, ddof=1) + # Compute measures of linearity and curvature + time = np.arange(n) + 1 + poly_m = poly(time, 2) + time_x = add_constant(poly_m) + coefs = OLS(trend0, time_x).fit().params + + linearity = coefs[1] + curvature = -coefs[2] + # ACF features + acfremainder = acf_features(remainder, m) + # Assemble features + output = { + "nperiods": nperiods, + "seasonal_period": m, + "trend": trend, + "spike": spike, + "linearity": linearity, + "curvature": curvature, + "e_acf1": acfremainder["x_acf1"], + "e_acf10": acfremainder["x_acf10"], + } + + if m > 1: + output["seasonal_strength"] = season + output["peak"] = peak + output["trough"] = trough + + return output + +# %% ../nbs/tsfeatures.ipynb 32 +def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: + """Unit root kpss. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'unitroot_kpss': Statistic for the Kwiatowski et al unit root test. + """ + n = len(x) + nlags = int(4 * (n / 100) ** (1 / 4)) + + try: + test_kpss, _, _, _ = kpss(x, nlags=nlags) + except: + test_kpss = np.nan + + return {"unitroot_kpss": test_kpss} + +# %% ../nbs/tsfeatures.ipynb 33 +def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: + """Unit root pp. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'unitroot_pp': Statistic for the Phillips-Perron unit root test. + """ + try: + test_pp = ur_pp(x) + except: + test_pp = np.nan + + return {"unitroot_pp": test_pp} + +# %% ../nbs/tsfeatures.ipynb 34 def _get_feats( index, ts, @@ -86,7 +918,7 @@ def _get_feats( return pd.DataFrame(dict(c_map), index=[index]) -# %% ../nbs/tsfeatures_core.ipynb 7 +# %% ../nbs/tsfeatures.ipynb 35 def tsfeatures( ts: pd.DataFrame, freq: Optional[int] = None, From 5fe958907aa411a2e423884b8fce4e6a4eeaeef0 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:17:54 +0100 Subject: [PATCH 14/52] descriptive stats on cluster results --- nbs/utils.ipynb | 2 -- 1 file changed, 2 deletions(-) diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index 36c10f2..7024390 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -56,7 +56,6 @@ "outputs": [], "source": [ "#| hide\n", - "\n", "from fastcore.test import *" ] }, @@ -78,7 +77,6 @@ ], "source": [ "# |export\n", - "\n", "np.seterr(divide=\"ignore\", invalid=\"ignore\")" ] }, From cb60c1bac0ba62b12c2bcd6814e25244b6da3ca6 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:27:21 +0100 Subject: [PATCH 15/52] add github action for testing --- .github/workflows/ci.yml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..584173d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +defaults: + run: + shell: bash -l {0} + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + windows-tests: + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - name: Clone repo + uses: actions/checkout@v3 + + - name: Set up environment + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + - name: Install the library + run: pip install ".[dev]" + + - name: Run tests + run: nbdev_test --do_print --timing --flags 'matplotlib polars pyarrow scipy' From c0eb8675fe7a02c8c28cc6d3519680dab6717717 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:42:01 +0100 Subject: [PATCH 16/52] modified setup --- nbs/tsfeatures.ipynb | 159 ++++++++++++++++++++++++++++++++++++++----- setup.py | 110 ++++++++++++++++++++++++------ 2 files changed, 230 insertions(+), 39 deletions(-) diff --git a/nbs/tsfeatures.ipynb b/nbs/tsfeatures.ipynb index 5789681..dc7a3b9 100644 --- a/nbs/tsfeatures.ipynb +++ b/nbs/tsfeatures.ipynb @@ -94,17 +94,6 @@ "from tsfeatures.utils import *" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# |export\n", - "\n", - "FREQS = {\"H\": 24, \"D\": 1, \"M\": 12, \"Q\": 4, \"W\": 1, \"Y\": 1}" - ] - }, { "cell_type": "code", "execution_count": null, @@ -153,7 +142,12 @@ " else:\n", " acfdiff2x = [np.nan] * 2\n", " # first autocorrelation coefficient\n", - " acf_1 = acfx[1]\n", + "\n", + " try:\n", + " acf_1 = acfx[1]\n", + " except:\n", + " acf_1 = np.nan\n", + "\n", " # sum of squares of first 10 autocorrelation coefficients\n", " sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan\n", " # first autocorrelation ciefficient of differenced series\n", @@ -255,7 +249,7 @@ " if len(x) <= lags + 1:\n", " return {\"arch_lm\": np.nan}\n", " if demean:\n", - " x -= np.mean(x)\n", + " x = x - np.mean(x)\n", "\n", " size_x = len(x)\n", " mat = embed(x**2, lags + 1)\n", @@ -431,7 +425,9 @@ " except:\n", " return {\"flat_spots\": np.nan}\n", "\n", - " rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max()" + " rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max()\n", + " return {\"flat_spots\": rlex}\n", + "\n" ] }, { @@ -1103,8 +1099,15 @@ " time_x = add_constant(poly_m)\n", " coefs = OLS(trend0, time_x).fit().params\n", "\n", - " linearity = coefs[1]\n", - " curvature = -coefs[2]\n", + "\n", + " try:\n", + " linearity = coefs[1]\n", + " except:\n", + " linearity = np.nan\n", + " try:\n", + " curvature = -coefs[2]\n", + " except:\n", + " curvature = np.nan\n", " # ACF features\n", " acfremainder = acf_features(remainder, m)\n", " # Assemble features\n", @@ -1194,6 +1197,56 @@ " return {\"unitroot_pp\": test_pp}" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def statistics(x: np.array, freq: int = 1) -> Dict[str, float]:\n", + " \"\"\"Computes basic statistics of x.\n", + "\n", + " Parameters\n", + " ----------\n", + " x: numpy array\n", + " The time series.\n", + " freq: int\n", + " Frequency of the time series\n", + "\n", + " Returns\n", + " -------\n", + " dict\n", + " 'total_sum': Total sum of the series.\n", + " 'mean': Mean value.\n", + " 'variance': variance of the time series.\n", + " 'median': Median value.\n", + " 'p2point5': 2.5 Percentile.\n", + " 'p5': 5 percentile.\n", + " 'p25': 25 percentile.\n", + " 'p75': 75 percentile.\n", + " 'p95': 95 percentile.\n", + " 'p97point5': 97.5 percentile.\n", + " 'max': Max value.\n", + " 'min': Min value.\n", + " \"\"\"\n", + " res = dict(\n", + " total_sum=np.sum(x),\n", + " mean=np.mean(x),\n", + " variance=np.var(x, ddof=1),\n", + " median=np.median(x),\n", + " p2point5=np.quantile(x, q=0.025),\n", + " p5=np.quantile(x, q=0.05),\n", + " p25=np.quantile(x, q=0.25),\n", + " p75=np.quantile(x, q=0.75),\n", + " p95=np.quantile(x, q=0.95),\n", + " p97point5=np.quantile(x, q=0.975),\n", + " max=np.max(x),\n", + " min=np.min(x),\n", + " )\n", + "\n", + " return res" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1227,7 +1280,6 @@ " ],\n", " dict_freqs=FREQS,\n", "):\n", - " print(\"dict_freq\")\n", " if freq is None:\n", " inf_freq = pd.infer_freq(ts[\"ds\"])\n", " if inf_freq is None:\n", @@ -1334,6 +1386,79 @@ " return ts_features" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def _get_feats_wide(index,\n", + " ts,\n", + " scale = True,\n", + " features = [acf_features, arch_stat, crossing_points,\n", + " entropy, flat_spots, heterogeneity, holt_parameters,\n", + " lumpiness, nonlinearity, pacf_features, stl_features,\n", + " stability, hw_parameters, unitroot_kpss, unitroot_pp,\n", + " series_length, hurst]):\n", + " seasonality = ts['seasonality'].item()\n", + " y = ts['y'].item()\n", + " y = np.array(y)\n", + "\n", + " if scale:\n", + " y = scalets(y)\n", + "\n", + " c_map = ChainMap(*[dict_feat for dict_feat in [func(y, seasonality) for func in features]])\n", + "\n", + " return pd.DataFrame(dict(c_map), index = [index])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def tsfeatures_wide(ts: pd.DataFrame,\n", + " features: List[Callable] = [acf_features, arch_stat, crossing_points,\n", + " entropy, flat_spots, heterogeneity,\n", + " holt_parameters, lumpiness, nonlinearity,\n", + " pacf_features, stl_features, stability,\n", + " hw_parameters, unitroot_kpss, unitroot_pp,\n", + " series_length, hurst],\n", + " scale: bool = True,\n", + " threads: Optional[int] = None) -> pd.DataFrame:\n", + " \"\"\"Calculates features for time series.\n", + "\n", + " Parameters\n", + " ----------\n", + " ts: pandas df\n", + " Pandas DataFrame with columns ['unique_id', 'seasonality', 'y'].\n", + " Wide panel of time series.\n", + " features: iterable\n", + " Iterable of features functions.\n", + " scale: bool\n", + " Whether (mean-std)scale data.\n", + " threads: int\n", + " Number of threads to use. Use None (default) for parallel processing.\n", + "\n", + " Returns\n", + " -------\n", + " pandas df\n", + " Pandas DataFrame where each column is a feature and each row\n", + " a time series.\n", + " \"\"\"\n", + " partial_get_feats = partial(_get_feats_wide, scale=scale,\n", + " features=features)\n", + "\n", + " with Pool(threads) as pool:\n", + " ts_features = pool.starmap(partial_get_feats, ts.groupby('unique_id'))\n", + "\n", + " ts_features = pd.concat(ts_features).rename_axis('unique_id')\n", + " ts_features = ts_features.reset_index()\n", + "\n", + " return ts_features" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/setup.py b/setup.py index e7f27fb..1737381 100644 --- a/setup.py +++ b/setup.py @@ -1,28 +1,94 @@ +import shlex +from configparser import ConfigParser + import setuptools +from pkg_resources import parse_version + +assert parse_version(setuptools.__version__) >= parse_version("36.2") + +# note: all settings are in settings.ini; edit there, not here +config = ConfigParser(delimiters=["="]) +config.read("settings.ini", encoding="utf-8") +cfg = config["DEFAULT"] + +cfg_keys = "version description keywords author author_email".split() +expected = ( + cfg_keys + + "lib_name user branch license status min_python audience language".split() +) +for o in expected: + assert o in cfg, "missing expected setting: {}".format(o) +setup_cfg = {o: cfg[o] for o in cfg_keys} -with open("README.md", "r") as fh: - long_description = fh.read() +licenses = { + "apache2": ( + "Apache Software License 2.0", + "OSI Approved :: Apache Software License", + ), + "mit": ("MIT License", "OSI Approved :: MIT License"), + "gpl2": ( + "GNU General Public License v2", + "OSI Approved :: GNU General Public License v2 (GPLv2)", + ), + "gpl3": ( + "GNU General Public License v3", + "OSI Approved :: GNU General Public License v3 (GPLv3)", + ), + "agpl3": ( + "GNU Affero General Public License v3", + "OSI Approved :: GNU Affero General Public License (AGPLv3)", + ), + "bsd3": ("BSD License", "OSI Approved :: BSD License"), +} +statuses = [ + "0 - Pre-Planning", + "1 - Planning", + "2 - Pre-Alpha", + "3 - Alpha", + "4 - Beta", + "5 - Production/Stable", + "6 - Mature", + "7 - Inactive", +] +py_versions = "3.7 3.8 3.9 3.10 3.11".split() + +requirements = shlex.split(cfg.get("requirements", "")) +if cfg.get("pip_requirements"): + requirements += shlex.split(cfg.get("pip_requirements", "")) +min_python = cfg["min_python"] +lic = licenses.get(cfg["license"].lower(), (cfg["license"], None)) +dev_requirements = (cfg.get("dev_requirements") or "").split() +project_urls = {} +if cfg.get("doc_host"): + project_urls["Documentation"] = cfg["doc_host"] + cfg.get("doc_baseurl", "") setuptools.setup( - name="tsfeatures", - version="0.4.5", - description="Calculates various features from time series data.", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/Nixtla/tsfeatures", - packages=setuptools.find_packages(), + name=cfg["lib_name"], + license=lic[0], classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires=">=3.7", - install_requires=[ - "antropy>=0.1.4", - "arch>=4.11", - "pandas>=1.0.5", - "scikit-learn>=0.23.1", - "statsmodels>=0.13.2", - "supersmoother>=0.4", - ], + "Development Status :: " + statuses[int(cfg["status"])], + "Intended Audience :: " + cfg["audience"].title(), + "Natural Language :: " + cfg["language"].title(), + ] + + [ + "Programming Language :: Python :: " + o + for o in py_versions[py_versions.index(min_python) :] + ] + + (["License :: " + lic[1]] if lic[1] else []), + url=cfg["git_url"], + packages=setuptools.find_packages(), + include_package_data=True, + install_requires=requirements, + extras_require={"dev": dev_requirements}, + dependency_links=cfg.get("dep_links", "").split(), + python_requires=">=" + cfg["min_python"], + long_description=open("README.md", encoding="utf8").read(), + long_description_content_type="text/markdown", + zip_safe=False, + entry_points={ + "console_scripts": cfg.get("console_scripts", "").split(), + "nbdev": [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d'], + }, + project_urls=project_urls, + **setup_cfg, ) From 0135e1baadd78238aaf735ef86fe2f6c6fc08318 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:48:48 +0100 Subject: [PATCH 17/52] include a pip reqs file --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..643aa32 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +nbdev \ No newline at end of file From 2dd6a6559a517961271c5b2514618c33b8797e75 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:54:56 +0100 Subject: [PATCH 18/52] exclude rpy2 --- settings.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/settings.ini b/settings.ini index d1515bf..9b1c9d6 100644 --- a/settings.ini +++ b/settings.ini @@ -39,7 +39,8 @@ user = Nixtla ### Optional ### requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm -dev_requirements = nbdev rpy2 +dev_requirements = nbdev +; dev_requirements = nbdev rpy2 # console_scripts = black_formatting = True From e9c1b94f885fef265c7604f93bd2fb7e2cc18134 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 9 Feb 2024 22:16:29 +0100 Subject: [PATCH 19/52] run nbdev_export --- tsfeatures/_modidx.py | 4 ++- tsfeatures/tsfeatures.py | 74 ++++++++++++++++++++++------------------ 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index c58ed29..f657af3 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -5,7 +5,8 @@ 'doc_host': 'https://Nixtla.github.io', 'git_url': 'https://github.com/Nixtla/tsfeatures', 'lib_path': 'tsfeatures'}, - 'syms': { 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), + 'syms': { 'tsfeatures.compare_with_r': {}, + 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.maybe_download': ('m4_data.html#maybe_download', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.prepare_m4_data': ('m4_data.html#prepare_m4_data', 'tsfeatures/m4_data.py')}, 'tsfeatures.tsfeatures': { 'tsfeatures.tsfeatures._get_feats': ('tsfeatures.html#_get_feats', 'tsfeatures/tsfeatures.py'), @@ -34,6 +35,7 @@ 'tsfeatures.tsfeatures.tsfeatures': ('tsfeatures.html#tsfeatures', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_kpss': ('tsfeatures.html#unitroot_kpss', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_pp': ('tsfeatures.html#unitroot_pp', 'tsfeatures/tsfeatures.py')}, + 'tsfeatures.tsfeatures_r': {}, 'tsfeatures.utils': { 'tsfeatures.utils.embed': ('utils.html#embed', 'tsfeatures/utils.py'), 'tsfeatures.utils.hurst_exponent': ('utils.html#hurst_exponent', 'tsfeatures/utils.py'), 'tsfeatures.utils.lambda_coef_var': ('utils.html#lambda_coef_var', 'tsfeatures/utils.py'), diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index 8c1d3cb..e905849 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -1,10 +1,10 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/tsfeatures.ipynb. # %% auto 0 -__all__ = ['FREQS', 'acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', - 'guerrero', 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', - 'nonlinearity', 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', - 'unitroot_pp', 'tsfeatures'] +__all__ = ['acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'guerrero', + 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', 'nonlinearity', + 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', 'unitroot_pp', + 'tsfeatures'] # %% ../nbs/tsfeatures.ipynb 3 import os @@ -45,9 +45,6 @@ from .utils import * # %% ../nbs/tsfeatures.ipynb 7 -FREQS = {"H": 24, "D": 1, "M": 12, "Q": 4, "W": 1, "Y": 1} - -# %% ../nbs/tsfeatures.ipynb 8 def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates autocorrelation function features. @@ -87,7 +84,12 @@ def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: else: acfdiff2x = [np.nan] * 2 # first autocorrelation coefficient - acf_1 = acfx[1] + + try: + acf_1 = acfx[1] + except: + acf_1 = np.nan + # sum of squares of first 10 autocorrelation coefficients sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan # first autocorrelation ciefficient of differenced series @@ -113,7 +115,7 @@ def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 11 +# %% ../nbs/tsfeatures.ipynb 10 def arch_stat( x: np.array, freq: int = 1, lags: int = 12, demean: bool = True ) -> Dict[str, float]: @@ -134,7 +136,7 @@ def arch_stat( if len(x) <= lags + 1: return {"arch_lm": np.nan} if demean: - x -= np.mean(x) + x = x - np.mean(x) size_x = len(x) mat = embed(x**2, lags + 1) @@ -148,7 +150,7 @@ def arch_stat( return {"arch_lm": r_squared} -# %% ../nbs/tsfeatures.ipynb 14 +# %% ../nbs/tsfeatures.ipynb 13 def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: """Count entropy. @@ -169,7 +171,7 @@ def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: return {"count_entropy": entropy} -# %% ../nbs/tsfeatures.ipynb 15 +# %% ../nbs/tsfeatures.ipynb 14 def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: """Crossing points. @@ -194,7 +196,7 @@ def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: return {"crossing_points": cross.sum()} -# %% ../nbs/tsfeatures.ipynb 16 +# %% ../nbs/tsfeatures.ipynb 15 def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: """Calculates sample entropy. @@ -218,7 +220,7 @@ def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: return {"entropy": entropy} -# %% ../nbs/tsfeatures.ipynb 17 +# %% ../nbs/tsfeatures.ipynb 16 def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: """Flat spots. @@ -240,8 +242,9 @@ def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: return {"flat_spots": np.nan} rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max() + return {"flat_spots": rlex} -# %% ../nbs/tsfeatures.ipynb 18 +# %% ../nbs/tsfeatures.ipynb 17 def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: """Frequency. @@ -260,7 +263,7 @@ def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: return {"frequency": freq} -# %% ../nbs/tsfeatures.ipynb 19 +# %% ../nbs/tsfeatures.ipynb 18 def guerrero( x: np.array, freq: int = 1, lower: int = -1, upper: int = 2 ) -> Dict[str, float]: @@ -295,7 +298,7 @@ def guerrero( return {"guerrero": min_} -# %% ../nbs/tsfeatures.ipynb 20 +# %% ../nbs/tsfeatures.ipynb 19 def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: """Heterogeneity. @@ -358,7 +361,7 @@ def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 21 +# %% ../nbs/tsfeatures.ipynb 20 def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: """Fitted parameters of a Holt model. @@ -386,7 +389,7 @@ def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: return params -# %% ../nbs/tsfeatures.ipynb 22 +# %% ../nbs/tsfeatures.ipynb 21 def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: """Hurst index. @@ -409,7 +412,7 @@ def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: return {"hurst": hurst_index} -# %% ../nbs/tsfeatures.ipynb 23 +# %% ../nbs/tsfeatures.ipynb 22 def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: """Fitted parameters of a Holt-Winters model. @@ -441,7 +444,7 @@ def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: return params -# %% ../nbs/tsfeatures.ipynb 24 +# %% ../nbs/tsfeatures.ipynb 23 def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: """Intervals with demand. @@ -465,7 +468,7 @@ def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: return {"intervals_mean": np.mean(y), "intervals_sd": np.std(y, ddof=1)} -# %% ../nbs/tsfeatures.ipynb 25 +# %% ../nbs/tsfeatures.ipynb 24 def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: """lumpiness. @@ -499,7 +502,7 @@ def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: return {"lumpiness": lumpiness} -# %% ../nbs/tsfeatures.ipynb 26 +# %% ../nbs/tsfeatures.ipynb 25 def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: """Nonlinearity. @@ -524,7 +527,7 @@ def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: return {"nonlinearity": test} -# %% ../nbs/tsfeatures.ipynb 27 +# %% ../nbs/tsfeatures.ipynb 26 def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates partial autocorrelation function features. @@ -595,7 +598,7 @@ def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 28 +# %% ../nbs/tsfeatures.ipynb 27 def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: """Series length. @@ -614,7 +617,7 @@ def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: return {"series_length": len(x)} -# %% ../nbs/tsfeatures.ipynb 29 +# %% ../nbs/tsfeatures.ipynb 28 def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: """Sparsity. @@ -633,7 +636,7 @@ def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: return {"sparsity": np.mean(x == 0)} -# %% ../nbs/tsfeatures.ipynb 30 +# %% ../nbs/tsfeatures.ipynb 29 def stability(x: np.array, freq: int = 1) -> Dict[str, float]: """Stability. @@ -667,7 +670,7 @@ def stability(x: np.array, freq: int = 1) -> Dict[str, float]: return {"stability": stability} -# %% ../nbs/tsfeatures.ipynb 31 +# %% ../nbs/tsfeatures.ipynb 30 def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates seasonal trend using loess decomposition. @@ -785,8 +788,14 @@ def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: time_x = add_constant(poly_m) coefs = OLS(trend0, time_x).fit().params - linearity = coefs[1] - curvature = -coefs[2] + try: + linearity = coefs[1] + except: + linearity = np.nan + try: + curvature = -coefs[2] + except: + curvature = np.nan # ACF features acfremainder = acf_features(remainder, m) # Assemble features @@ -808,7 +817,7 @@ def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 32 +# %% ../nbs/tsfeatures.ipynb 31 def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: """Unit root kpss. @@ -834,7 +843,7 @@ def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: return {"unitroot_kpss": test_kpss} -# %% ../nbs/tsfeatures.ipynb 33 +# %% ../nbs/tsfeatures.ipynb 32 def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: """Unit root pp. @@ -884,7 +893,6 @@ def _get_feats( ], dict_freqs=FREQS, ): - print("dict_freq") if freq is None: inf_freq = pd.infer_freq(ts["ds"]) if inf_freq is None: From 29c2a98a5160da6526b007a5bd483208fc6d8b7f Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Mon, 19 Feb 2024 14:37:22 +0100 Subject: [PATCH 20/52] incorporate pr feedback --- .github/workflows/ci.yml | 6 +-- README.md | 105 ++------------------------------------- nbs/index.ipynb | 15 ++++++ settings.ini | 2 - 4 files changed, 21 insertions(+), 107 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 584173d..5cd1a78 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,8 +16,8 @@ concurrency: cancel-in-progress: true jobs: - windows-tests: - runs-on: windows-latest + run-tests: + runs-on: [ubuntu-latest, macos-latest, windows-latest] strategy: fail-fast: false matrix: @@ -36,4 +36,4 @@ jobs: run: pip install ".[dev]" - name: Run tests - run: nbdev_test --do_print --timing --flags 'matplotlib polars pyarrow scipy' + run: nbdev_test --do_print --timing diff --git a/README.md b/README.md index 92c9572..bbc0f62 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) ## List of available features | Features | | | -|:----------------|:----------------|:--------------| +| :-------------- | :-------------- | :------------ | | acf_features | heterogeneity | series_length | | arch_stat | holt_parameters | sparsity | | count_entropy | hurst | stability | @@ -113,7 +113,7 @@ of functions. ### Non-seasonal data (100 Daily M4 time series) | feature | diff | feature | diff | feature | diff | feature | diff | -|:----------------|-----:|:----------------|-----:|:--------------|-----:|:----------|------:| +| :-------------- | ---: | :-------------- | ---: | :------------ | ---: | :-------- | ----: | | e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 | | seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 | | nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 | @@ -135,7 +135,7 @@ python -m tsfeatures.compare_with_r --results_directory /some/path ### Sesonal data (100 Hourly M4 time series) | feature | diff | feature | diff | feature | diff | feature | diff | -|:----------------|-----:|:------------------|-----:|:----------|-----:|:--------|------:| +| :-------------- | ---: | :---------------- | ---: | :-------- | ---: | :------ | ----: | | series_length | 0 | seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 | | flat_spots | 0 | x_acf1 | 0 | arch_r2 | 2.29 | hw_beta | 32.39 | | nperiods | 0 | unitroot_kpss | 0 | alpha | 2.52 | trough | 35 | @@ -158,102 +158,3 @@ fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE) -# tsfeatures - -Calculates various features from time series data. Python implementation -of the R package -*[tsfeatures](https://github.com/robjhyndman/tsfeatures)*. - -# Installation - -You can install the *released* version of `tsfeatures` from the [Python -package index](pypi.org) with: - -``` python -pip install tsfeatures -``` - -# Usage - -The `tsfeatures` main function calculates by default the features used -by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their -implementation of the FFORMA -model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). - -``` python -from tsfeatures import tsfeatures -``` - -This function receives a panel pandas df with columns `unique_id`, `ds`, -`y` and optionally the frequency of the data. - - - -``` python -tsfeatures(panel, freq=7) -``` - -By default (`freq=None`) the function will try to infer the frequency of -each time series (using `infer_freq` from `pandas` on the `ds` column) -and assign a seasonal period according to the built-in dictionary -`FREQS`: - -``` python -FREQS = {'H': 24, 'D': 1, - 'M': 12, 'Q': 4, - 'W':1, 'Y': 1} -``` - -You can use your own dictionary using the `dict_freqs` argument: - -``` python -tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) -``` - -## List of available features - -| Features | | | -|:----------------|:----------------|:--------------| -| acf_features | heterogeneity | series_length | -| arch_stat | holt_parameters | sparsity | -| count_entropy | hurst | stability | -| crossing_points | hw_parameters | stl_features | -| entropy | intervals | unitroot_kpss | -| flat_spots | lumpiness | unitroot_pp | -| frequency | nonlinearity | | -| guerrero | pacf_features | | - -See the docs for a description of the features. To use a particular -feature included in the package you need to import it: - -``` python -from tsfeatures import acf_features - -tsfeatures(panel, freq=7, features=[acf_features]) -``` - -You can also define your own function and use it together with the -included features: - -``` python -def number_zeros(x, freq): - - number = (x == 0).sum() - return {'number_zeros': number} - -tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) -``` - -`tsfeatures` can handle functions that receives a numpy array `x` and a -frequency `freq` (this parameter is needed even if you don’t use it) and -returns a dictionary with the feature name as a key and its value. - -# Authors - -- **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza) -- **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier) -- **Cristian Challu** - - [cristianchallu](https://github.com/cristianchallu) -- **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez) -- **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar) -- **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler) diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 3491c78..67cb007 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -135,6 +135,17 @@ "| diff2x_pacf5 | 0 |arch_acf|2.18| entropy | 9.45 |\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![CI](https://github.com/Nixtla/mlforecast/actions/workflows/ci.yaml/badge.svg)](https://github.com/Nixtla/mlforecast/actions/workflows/ci.yaml)\n", + "[![Python](https://img.shields.io/pypi/pyversions/mlforecast)](https://pypi.org/project/mlforecast/)\n", + "[![PyPi](https://img.shields.io/pypi/v/mlforecast?color=blue)](https://pypi.org/project/mlforecast/)\n", + "[![conda-forge](https://img.shields.io/conda/vn/conda-forge/mlforecast?color=blue)](https://anaconda.org/conda-forge/mlforecast)\n", + "[![License](https://img.shields.io/github/license/Nixtla/mlforecast)](https://github.com/Nixtla/mlforecast/blob/main/LICENSE)\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -388,6 +399,10 @@ "display_name": "python3", "language": "python", "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.1" } }, "nbformat": 4, diff --git a/settings.ini b/settings.ini index 9b1c9d6..16db827 100644 --- a/settings.ini +++ b/settings.ini @@ -40,8 +40,6 @@ user = Nixtla ### Optional ### requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm dev_requirements = nbdev -; dev_requirements = nbdev rpy2 -# console_scripts = black_formatting = True jupyter_hooks = True From 022858c7f4322e5c035507e27fe3fe236a5bfacb Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 19:23:54 +0100 Subject: [PATCH 21/52] adding shields --- README.md | 111 +++++++++++++++++++++++++++++++++++++++++++++-- requirements.txt | 1 - 2 files changed, 108 insertions(+), 4 deletions(-) delete mode 100644 requirements.txt diff --git a/README.md b/README.md index bbc0f62..acd4df4 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,12 @@ +[![CI](https://github.com/Nixtla/mlforecast/actions/workflows/ci.yaml/badge.svg)](https://github.com/Nixtla/mlforecast/actions/workflows/ci.yaml) +[![Python](https://img.shields.io/pypi/pyversions/mlforecast.png)](https://pypi.org/project/mlforecast/) +[![PyPi](https://img.shields.io/pypi/v/mlforecast?color=blue.png)](https://pypi.org/project/mlforecast/) +[![conda-forge](https://img.shields.io/conda/vn/conda-forge/mlforecast?color=blue.png)](https://anaconda.org/conda-forge/mlforecast) +[![License](https://img.shields.io/github/license/Nixtla/mlforecast.png)](https://github.com/Nixtla/mlforecast/blob/main/LICENSE) + # tsfeatures Calculates various features from time series data. Python implementation @@ -58,7 +64,7 @@ tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) ## List of available features | Features | | | -| :-------------- | :-------------- | :------------ | +|:----------------|:----------------|:--------------| | acf_features | heterogeneity | series_length | | arch_stat | holt_parameters | sparsity | | count_entropy | hurst | stability | @@ -113,7 +119,7 @@ of functions. ### Non-seasonal data (100 Daily M4 time series) | feature | diff | feature | diff | feature | diff | feature | diff | -| :-------------- | ---: | :-------------- | ---: | :------------ | ---: | :-------- | ----: | +|:----------------|-----:|:----------------|-----:|:--------------|-----:|:----------|------:| | e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 | | seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 | | nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 | @@ -135,7 +141,7 @@ python -m tsfeatures.compare_with_r --results_directory /some/path ### Sesonal data (100 Hourly M4 time series) | feature | diff | feature | diff | feature | diff | feature | diff | -| :-------------- | ---: | :---------------- | ---: | :-------- | ---: | :------ | ----: | +|:----------------|-----:|:------------------|-----:|:----------|-----:|:--------|------:| | series_length | 0 | seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 | | flat_spots | 0 | x_acf1 | 0 | arch_r2 | 2.29 | hw_beta | 32.39 | | nperiods | 0 | unitroot_kpss | 0 | alpha | 2.52 | trough | 35 | @@ -158,3 +164,102 @@ fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE) +# tsfeatures + +Calculates various features from time series data. Python implementation +of the R package +*[tsfeatures](https://github.com/robjhyndman/tsfeatures)*. + +# Installation + +You can install the *released* version of `tsfeatures` from the [Python +package index](pypi.org) with: + +``` python +pip install tsfeatures +``` + +# Usage + +The `tsfeatures` main function calculates by default the features used +by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their +implementation of the FFORMA +model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). + +``` python +from tsfeatures import tsfeatures +``` + +This function receives a panel pandas df with columns `unique_id`, `ds`, +`y` and optionally the frequency of the data. + + + +``` python +tsfeatures(panel, freq=7) +``` + +By default (`freq=None`) the function will try to infer the frequency of +each time series (using `infer_freq` from `pandas` on the `ds` column) +and assign a seasonal period according to the built-in dictionary +`FREQS`: + +``` python +FREQS = {'H': 24, 'D': 1, + 'M': 12, 'Q': 4, + 'W':1, 'Y': 1} +``` + +You can use your own dictionary using the `dict_freqs` argument: + +``` python +tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) +``` + +## List of available features + +| Features | | | +|:----------------|:----------------|:--------------| +| acf_features | heterogeneity | series_length | +| arch_stat | holt_parameters | sparsity | +| count_entropy | hurst | stability | +| crossing_points | hw_parameters | stl_features | +| entropy | intervals | unitroot_kpss | +| flat_spots | lumpiness | unitroot_pp | +| frequency | nonlinearity | | +| guerrero | pacf_features | | + +See the docs for a description of the features. To use a particular +feature included in the package you need to import it: + +``` python +from tsfeatures import acf_features + +tsfeatures(panel, freq=7, features=[acf_features]) +``` + +You can also define your own function and use it together with the +included features: + +``` python +def number_zeros(x, freq): + + number = (x == 0).sum() + return {'number_zeros': number} + +tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) +``` + +`tsfeatures` can handle functions that receives a numpy array `x` and a +frequency `freq` (this parameter is needed even if you don’t use it) and +returns a dictionary with the feature name as a key and its value. + +# Authors + +- **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza) +- **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier) +- **Cristian Challu** - + [cristianchallu](https://github.com/cristianchallu) +- **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez) +- **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar) +- **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 643aa32..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -nbdev \ No newline at end of file From be7efc98e21a86aa12b784b3a6d5ac0059b09fac Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 19:25:36 +0100 Subject: [PATCH 22/52] only run on ubuntu --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5cd1a78..a6dc4ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ concurrency: jobs: run-tests: - runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: [ubuntu-latest] strategy: fail-fast: false matrix: From 74bf82f482969915a6d373b4f9fb9d395a8a3d30 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 19:29:42 +0100 Subject: [PATCH 23/52] explicit ci steps to install nbdev, move test features --- .github/workflows/ci.yml | 3 + nbs/test_features.ipynb | 158 --------------------------------------- nbs/tsfeatures.ipynb | 28 +++++++ 3 files changed, 31 insertions(+), 158 deletions(-) delete mode 100644 nbs/test_features.ipynb diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6dc4ca..f1bd7e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,6 +32,9 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" + - name: Install nbdev + run: pip install nbdev + - name: Install the library run: pip install ".[dev]" diff --git a/nbs/test_features.ipynb b/nbs/test_features.ipynb deleted file mode 100644 index 9cd8979..0000000 --- a/nbs/test_features.ipynb +++ /dev/null @@ -1,158 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tsfeatures.m4_data import *\n", - "from tsfeatures.tsfeatures import *\n", - "from tsfeatures.utils import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def test_pipeline():\n", - " def calculate_features_m4(dataset_name, directory, num_obs=1000000):\n", - " _, y_train_df, _, _ = prepare_m4_data(\n", - " dataset_name=dataset_name, directory=directory, num_obs=num_obs\n", - " )\n", - "\n", - " freq = FREQS[dataset_name[0]]\n", - "\n", - " py_feats = tsfeatures(\n", - " y_train_df, freq=freq, features=[count_entropy]\n", - " ).set_index(\"unique_id\")\n", - "\n", - " calculate_features_m4(\"Hourly\", \"data\", 100)\n", - " calculate_features_m4(\"Daily\", \"data\", 100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unexpected exception formatting exception. Falling back to standard exception\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3526, in run_code\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n", - " File \"/var/folders/ny/_85npg2s1j3_clzbjzxsshn80000gp/T/ipykernel_28927/253115070.py\", line 1, in \n", - " test_pipeline()\n", - " File \"/var/folders/ny/_85npg2s1j3_clzbjzxsshn80000gp/T/ipykernel_28927/632637266.py\", line 13, in test_pipeline\n", - " calculate_features_m4(\"Hourly\", \"data\", 100)\n", - " File \"/var/folders/ny/_85npg2s1j3_clzbjzxsshn80000gp/T/ipykernel_28927/632637266.py\", line 9, in calculate_features_m4\n", - " py_feats = tsfeatures(\n", - " File \"/Users/JdeTheije/Developer/github_juicetea/nbdev_port/tsfeatures_nbdev/tsfeatures/tsfeatures.py\", line 147, in tsfeatures\n", - " ts_features = pool.starmap(partial_get_feats, ts.groupby(\"unique_id\"))\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/pool.py\", line 375, in starmap\n", - " return self._map_async(func, iterable, starmapstar, chunksize).get()\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/pool.py\", line 774, in get\n", - " raise self._value\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/pool.py\", line 540, in _handle_tasks\n", - " put(task)\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/connection.py\", line 206, in send\n", - " self._send_bytes(_ForkingPickler.dumps(obj))\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/multiprocessing/reduction.py\", line 51, in dumps\n", - " cls(buf, protocol).dump(obj)\n", - "_pickle.PicklingError: Can't pickle : it's not the same object as tsfeatures.features.count_entropy\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 2120, in showtraceback\n", - " stb = self.InteractiveTB.structured_traceback(\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1435, in structured_traceback\n", - " return FormattedTB.structured_traceback(\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1326, in structured_traceback\n", - " return VerboseTB.structured_traceback(\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1173, in structured_traceback\n", - " formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1088, in format_exception_as_a_whole\n", - " frames.append(self.format_record(record))\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 970, in format_record\n", - " frame_info.lines, Colors, self.has_colors, lvals\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 792, in lines\n", - " return self._sd.lines\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", - " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/core.py\", line 698, in lines\n", - " pieces = self.included_pieces\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", - " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/core.py\", line 649, in included_pieces\n", - " pos = scope_pieces.index(self.executing_piece)\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", - " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/stack_data/core.py\", line 628, in executing_piece\n", - " return only(\n", - " File \"/Users/JdeTheije/mambaforge/envs/nbdev/lib/python3.10/site-packages/executing/executing.py\", line 164, in only\n", - " raise NotOneValueFound('Expected one value, found 0')\n", - "executing.executing.NotOneValueFound: Expected one value, found 0\n" - ] - } - ], - "source": [ - "test_pipeline()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "python3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/nbs/tsfeatures.ipynb b/nbs/tsfeatures.ipynb index dc7a3b9..facca94 100644 --- a/nbs/tsfeatures.ipynb +++ b/nbs/tsfeatures.ipynb @@ -1392,6 +1392,8 @@ "metadata": {}, "outputs": [], "source": [ + "# |export\n", + "\n", "def _get_feats_wide(index,\n", " ts,\n", " scale = True,\n", @@ -1418,6 +1420,8 @@ "metadata": {}, "outputs": [], "source": [ + "# |export\n", + "\n", "def tsfeatures_wide(ts: pd.DataFrame,\n", " features: List[Callable] = [acf_features, arch_stat, crossing_points,\n", " entropy, flat_spots, heterogeneity,\n", @@ -1459,6 +1463,30 @@ " return ts_features" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test_pipeline():\n", + " def calculate_features_m4(dataset_name, directory, num_obs=1000000):\n", + " _, y_train_df, _, _ = prepare_m4_data(\n", + " dataset_name=dataset_name, directory=directory, num_obs=num_obs\n", + " )\n", + "\n", + " freq = FREQS[dataset_name[0]]\n", + "\n", + " py_feats = tsfeatures(\n", + " y_train_df, freq=freq, features=[count_entropy]\n", + " ).set_index(\"unique_id\")\n", + "\n", + " calculate_features_m4(\"Hourly\", \"data\", 100)\n", + " calculate_features_m4(\"Daily\", \"data\", 100)\n", + "\n", + "test_pipeline()" + ] + }, { "cell_type": "code", "execution_count": null, From b7b671f7b892bbec6475342221644bbb26e7335c Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 19:32:13 +0100 Subject: [PATCH 24/52] ci mod --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1bd7e4..7f5c54f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ concurrency: jobs: run-tests: - runs-on: [ubuntu-latest] + runs-on: ubuntu-latest strategy: fail-fast: false matrix: From 424331c88833d03eead17f6c80e98eef54bc6380 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 19:33:46 +0100 Subject: [PATCH 25/52] no pip cache --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7f5c54f..1b231fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - cache: "pip" - name: Install nbdev run: pip install nbdev From cbdb352f68539b2f6e911d0cf7c2c943ab5a9f34 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 19:37:26 +0100 Subject: [PATCH 26/52] add import stmt --- nbs/index.ipynb | 4 -- nbs/tsfeatures.ipynb | 9 ++++ tsfeatures/_modidx.py | 4 ++ tsfeatures/tsfeatures.py | 95 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 107 insertions(+), 5 deletions(-) diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 67cb007..5998625 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -399,10 +399,6 @@ "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.12.1" } }, "nbformat": 4, diff --git a/nbs/tsfeatures.ipynb b/nbs/tsfeatures.ipynb index facca94..105cbf6 100644 --- a/nbs/tsfeatures.ipynb +++ b/nbs/tsfeatures.ipynb @@ -1463,6 +1463,15 @@ " return ts_features" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tsfeatures.m4_data import *" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index f657af3..f6a4d4f 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -10,6 +10,8 @@ 'tsfeatures.m4_data.maybe_download': ('m4_data.html#maybe_download', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.prepare_m4_data': ('m4_data.html#prepare_m4_data', 'tsfeatures/m4_data.py')}, 'tsfeatures.tsfeatures': { 'tsfeatures.tsfeatures._get_feats': ('tsfeatures.html#_get_feats', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures._get_feats_wide': ( 'tsfeatures.html#_get_feats_wide', + 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.acf_features': ('tsfeatures.html#acf_features', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.arch_stat': ('tsfeatures.html#arch_stat', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.count_entropy': ('tsfeatures.html#count_entropy', 'tsfeatures/tsfeatures.py'), @@ -33,6 +35,8 @@ 'tsfeatures.tsfeatures.stability': ('tsfeatures.html#stability', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.stl_features': ('tsfeatures.html#stl_features', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.tsfeatures': ('tsfeatures.html#tsfeatures', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.tsfeatures_wide': ( 'tsfeatures.html#tsfeatures_wide', + 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_kpss': ('tsfeatures.html#unitroot_kpss', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_pp': ('tsfeatures.html#unitroot_pp', 'tsfeatures/tsfeatures.py')}, 'tsfeatures.tsfeatures_r': {}, diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index e905849..4a15c03 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -4,7 +4,7 @@ __all__ = ['acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'guerrero', 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', 'nonlinearity', 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', 'unitroot_pp', - 'tsfeatures'] + 'tsfeatures', 'tsfeatures_wide'] # %% ../nbs/tsfeatures.ipynb 3 import os @@ -990,3 +990,96 @@ def tsfeatures( ts_features = ts_features.reset_index() return ts_features + +# %% ../nbs/tsfeatures.ipynb 36 +def _get_feats_wide( + index, + ts, + scale=True, + features=[ + acf_features, + arch_stat, + crossing_points, + entropy, + flat_spots, + heterogeneity, + holt_parameters, + lumpiness, + nonlinearity, + pacf_features, + stl_features, + stability, + hw_parameters, + unitroot_kpss, + unitroot_pp, + series_length, + hurst, + ], +): + seasonality = ts["seasonality"].item() + y = ts["y"].item() + y = np.array(y) + + if scale: + y = scalets(y) + + c_map = ChainMap( + *[dict_feat for dict_feat in [func(y, seasonality) for func in features]] + ) + + return pd.DataFrame(dict(c_map), index=[index]) + +# %% ../nbs/tsfeatures.ipynb 37 +def tsfeatures_wide( + ts: pd.DataFrame, + features: List[Callable] = [ + acf_features, + arch_stat, + crossing_points, + entropy, + flat_spots, + heterogeneity, + holt_parameters, + lumpiness, + nonlinearity, + pacf_features, + stl_features, + stability, + hw_parameters, + unitroot_kpss, + unitroot_pp, + series_length, + hurst, + ], + scale: bool = True, + threads: Optional[int] = None, +) -> pd.DataFrame: + """Calculates features for time series. + + Parameters + ---------- + ts: pandas df + Pandas DataFrame with columns ['unique_id', 'seasonality', 'y']. + Wide panel of time series. + features: iterable + Iterable of features functions. + scale: bool + Whether (mean-std)scale data. + threads: int + Number of threads to use. Use None (default) for parallel processing. + + Returns + ------- + pandas df + Pandas DataFrame where each column is a feature and each row + a time series. + """ + partial_get_feats = partial(_get_feats_wide, scale=scale, features=features) + + with Pool(threads) as pool: + ts_features = pool.starmap(partial_get_feats, ts.groupby("unique_id")) + + ts_features = pd.concat(ts_features).rename_axis("unique_id") + ts_features = ts_features.reset_index() + + return ts_features From 3a57f9cc2a73976740d21cb793de4cfa492bb105 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Wed, 21 Feb 2024 20:16:04 +0100 Subject: [PATCH 27/52] add star import --- nbs/index.ipynb | 4 ++++ tsfeatures/__init__.py | 1 + tsfeatures/_modidx.py | 4 +--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 5998625..67cb007 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -399,6 +399,10 @@ "display_name": "python3", "language": "python", "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.1" } }, "nbformat": 4, diff --git a/tsfeatures/__init__.py b/tsfeatures/__init__.py index 98a433b..9632ea4 100644 --- a/tsfeatures/__init__.py +++ b/tsfeatures/__init__.py @@ -1 +1,2 @@ __version__ = "0.4.5" +from .tsfeatures import * diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index f6a4d4f..76fa4e2 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -5,8 +5,7 @@ 'doc_host': 'https://Nixtla.github.io', 'git_url': 'https://github.com/Nixtla/tsfeatures', 'lib_path': 'tsfeatures'}, - 'syms': { 'tsfeatures.compare_with_r': {}, - 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), + 'syms': { 'tsfeatures.m4_data': { 'tsfeatures.m4_data.m4_parser': ('m4_data.html#m4_parser', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.maybe_download': ('m4_data.html#maybe_download', 'tsfeatures/m4_data.py'), 'tsfeatures.m4_data.prepare_m4_data': ('m4_data.html#prepare_m4_data', 'tsfeatures/m4_data.py')}, 'tsfeatures.tsfeatures': { 'tsfeatures.tsfeatures._get_feats': ('tsfeatures.html#_get_feats', 'tsfeatures/tsfeatures.py'), @@ -39,7 +38,6 @@ 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_kpss': ('tsfeatures.html#unitroot_kpss', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_pp': ('tsfeatures.html#unitroot_pp', 'tsfeatures/tsfeatures.py')}, - 'tsfeatures.tsfeatures_r': {}, 'tsfeatures.utils': { 'tsfeatures.utils.embed': ('utils.html#embed', 'tsfeatures/utils.py'), 'tsfeatures.utils.hurst_exponent': ('utils.html#hurst_exponent', 'tsfeatures/utils.py'), 'tsfeatures.utils.lambda_coef_var': ('utils.html#lambda_coef_var', 'tsfeatures/utils.py'), From 296854403ac82a704ecbfd4198816cbe165017bd Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Sat, 24 Feb 2024 14:22:15 +0100 Subject: [PATCH 28/52] small cleanup --- nbs/tsfeatures.ipynb | 2 +- tsfeatures/tsfeatures.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nbs/tsfeatures.ipynb b/nbs/tsfeatures.ipynb index 105cbf6..8b46ac3 100644 --- a/nbs/tsfeatures.ipynb +++ b/nbs/tsfeatures.ipynb @@ -40,7 +40,7 @@ "from collections import ChainMap\n", "from functools import partial\n", "from multiprocessing import Pool\n", - "from typing import Callable, Dict, List, Optional\n", + "from typing import Callable, List, Optional\n", "\n", "import pandas as pd" ] diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index 4a15c03..68598ea 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -12,7 +12,7 @@ from collections import ChainMap from functools import partial from multiprocessing import Pool -from typing import Callable, Dict, List, Optional +from typing import Callable, List, Optional import pandas as pd From 5245275bcaa58b49082c1226d4a44a0e5bf76359 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 29 Feb 2024 21:50:46 +0100 Subject: [PATCH 29/52] cleanup notebooks --- nbs/index.ipynb | 4 - nbs/m4_data.ipynb | 32 +++---- nbs/tsfeatures.ipynb | 179 +++++++++++++++--------------------- nbs/utils.ipynb | 189 ++------------------------------------- tsfeatures/_modidx.py | 1 + tsfeatures/m4_data.py | 9 +- tsfeatures/tsfeatures.py | 96 ++++++++++++++------ tsfeatures/utils.py | 183 ++----------------------------------- 8 files changed, 182 insertions(+), 511 deletions(-) diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 67cb007..5998625 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -399,10 +399,6 @@ "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.12.1" } }, "nbformat": 4, diff --git a/nbs/m4_data.ipynb b/nbs/m4_data.ipynb index ac2994e..350093d 100644 --- a/nbs/m4_data.ipynb +++ b/nbs/m4_data.ipynb @@ -43,11 +43,20 @@ "metadata": {}, "outputs": [], "source": [ - "# | export\n", + "# |export\n", "import os\n", "import urllib\n", "\n", - "import pandas as pd\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", "\n", "seas_dict = {\n", " \"Hourly\": {\"seasonality\": 24, \"input_size\": 24, \"output_size\": 48, \"freq\": \"H\"},\n", @@ -65,7 +74,7 @@ "metadata": {}, "outputs": [], "source": [ - "# | export\n", + "# |export\n", "\n", "SOURCE_URL = (\n", " \"https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/\"\n", @@ -78,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "# | export\n", + "# |export\n", "\n", "\n", "def maybe_download(filename, directory):\n", @@ -115,7 +124,7 @@ "metadata": {}, "outputs": [], "source": [ - "# | export\n", + "# |export\n", "\n", "\n", "def m4_parser(dataset_name, directory, num_obs=1000000):\n", @@ -208,7 +217,7 @@ "metadata": {}, "outputs": [], "source": [ - "# | export\n", + "# |export\n", "\n", "\n", "def prepare_m4_data(dataset_name, directory, num_obs):\n", @@ -254,7 +263,7 @@ "metadata": {}, "outputs": [], "source": [ - "# | hide\n", + "# |hide\n", "from nbdev.showdoc import *" ] }, @@ -264,18 +273,11 @@ "metadata": {}, "outputs": [], "source": [ - "# | hide\n", + "# |hide\n", "import nbdev\n", "\n", "nbdev.nbdev_export()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/nbs/tsfeatures.ipynb b/nbs/tsfeatures.ipynb index 8b46ac3..c7c3859 100644 --- a/nbs/tsfeatures.ipynb +++ b/nbs/tsfeatures.ipynb @@ -100,9 +100,21 @@ "metadata": {}, "outputs": [], "source": [ - "# |export\n", + "from math import isclose\n", "\n", + "from fastcore.test import *\n", "\n", + "from tsfeatures.m4_data import *\n", + "from tsfeatures.utils import USAccDeaths, WWWusage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", "def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Calculates autocorrelation function features.\n", "\n", @@ -174,17 +186,6 @@ " return output" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from math import isclose\n", - "\n", - "from tsfeatures.utils import USAccDeaths, WWWusage" - ] - }, { "cell_type": "code", "execution_count": null, @@ -227,8 +228,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def arch_stat(\n", " x: np.array, freq: int = 1, lags: int = 12, demean: bool = True\n", ") -> Dict[str, float]:\n", @@ -249,7 +248,7 @@ " if len(x) <= lags + 1:\n", " return {\"arch_lm\": np.nan}\n", " if demean:\n", - " x = x - np.mean(x)\n", + " x = x - np.mean(x)\n", "\n", " size_x = len(x)\n", " mat = embed(x**2, lags + 1)\n", @@ -264,17 +263,6 @@ " return {\"arch_lm\": r_squared}" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fastcore.test import *\n", - "\n", - "from tsfeatures.utils import USAccDeaths, WWWusage" - ] - }, { "cell_type": "code", "execution_count": null, @@ -306,8 +294,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Count entropy.\n", "\n", @@ -336,8 +322,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Crossing points.\n", "\n", @@ -370,8 +354,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]:\n", " \"\"\"Calculates sample entropy.\n", "\n", @@ -403,8 +385,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Flat spots.\n", "\n", @@ -426,8 +406,7 @@ " return {\"flat_spots\": np.nan}\n", "\n", " rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max()\n", - " return {\"flat_spots\": rlex}\n", - "\n" + " return {\"flat_spots\": rlex}" ] }, { @@ -437,8 +416,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def frequency(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Frequency.\n", "\n", @@ -465,8 +442,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def guerrero(\n", " x: np.array, freq: int = 1, lower: int = -1, upper: int = 2\n", ") -> Dict[str, float]:\n", @@ -509,8 +484,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Heterogeneity.\n", "\n", @@ -581,8 +554,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Fitted parameters of a Holt model.\n", "\n", @@ -618,8 +589,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def hurst(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Hurst index.\n", "\n", @@ -650,8 +619,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Fitted parameters of a Holt-Winters model.\n", "\n", @@ -691,8 +658,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def intervals(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Intervals with demand.\n", "\n", @@ -724,8 +689,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"lumpiness.\n", "\n", @@ -767,8 +730,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Nonlinearity.\n", "\n", @@ -801,8 +762,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Calculates partial autocorrelation function features.\n", "\n", @@ -881,8 +840,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def series_length(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Series length.\n", "\n", @@ -909,8 +866,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Sparsity.\n", "\n", @@ -937,8 +892,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def stability(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Stability.\n", "\n", @@ -980,8 +933,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Calculates seasonal trend using loess decomposition.\n", "\n", @@ -1099,7 +1050,6 @@ " time_x = add_constant(poly_m)\n", " coefs = OLS(trend0, time_x).fit().params\n", "\n", - "\n", " try:\n", " linearity = coefs[1]\n", " except:\n", @@ -1137,8 +1087,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Unit root kpss.\n", "\n", @@ -1172,8 +1120,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "\n", "def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Unit root pp.\n", "\n", @@ -1203,6 +1149,7 @@ "metadata": {}, "outputs": [], "source": [ + "# |export\n", "def statistics(x: np.array, freq: int = 1) -> Dict[str, float]:\n", " \"\"\"Computes basic statistics of x.\n", "\n", @@ -1393,25 +1340,42 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "def _get_feats_wide(index,\n", - " ts,\n", - " scale = True,\n", - " features = [acf_features, arch_stat, crossing_points,\n", - " entropy, flat_spots, heterogeneity, holt_parameters,\n", - " lumpiness, nonlinearity, pacf_features, stl_features,\n", - " stability, hw_parameters, unitroot_kpss, unitroot_pp,\n", - " series_length, hurst]):\n", - " seasonality = ts['seasonality'].item()\n", - " y = ts['y'].item()\n", + "def _get_feats_wide(\n", + " index,\n", + " ts,\n", + " scale=True,\n", + " features=[\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " heterogeneity,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ],\n", + "):\n", + " seasonality = ts[\"seasonality\"].item()\n", + " y = ts[\"y\"].item()\n", " y = np.array(y)\n", "\n", " if scale:\n", " y = scalets(y)\n", "\n", - " c_map = ChainMap(*[dict_feat for dict_feat in [func(y, seasonality) for func in features]])\n", + " c_map = ChainMap(\n", + " *[dict_feat for dict_feat in [func(y, seasonality) for func in features]]\n", + " )\n", "\n", - " return pd.DataFrame(dict(c_map), index = [index])\n" + " return pd.DataFrame(dict(c_map), index=[index])" ] }, { @@ -1421,16 +1385,30 @@ "outputs": [], "source": [ "# |export\n", - "\n", - "def tsfeatures_wide(ts: pd.DataFrame,\n", - " features: List[Callable] = [acf_features, arch_stat, crossing_points,\n", - " entropy, flat_spots, heterogeneity,\n", - " holt_parameters, lumpiness, nonlinearity,\n", - " pacf_features, stl_features, stability,\n", - " hw_parameters, unitroot_kpss, unitroot_pp,\n", - " series_length, hurst],\n", - " scale: bool = True,\n", - " threads: Optional[int] = None) -> pd.DataFrame:\n", + "def tsfeatures_wide(\n", + " ts: pd.DataFrame,\n", + " features: List[Callable] = [\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " heterogeneity,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ],\n", + " scale: bool = True,\n", + " threads: Optional[int] = None,\n", + ") -> pd.DataFrame:\n", " \"\"\"Calculates features for time series.\n", "\n", " Parameters\n", @@ -1451,27 +1429,17 @@ " Pandas DataFrame where each column is a feature and each row\n", " a time series.\n", " \"\"\"\n", - " partial_get_feats = partial(_get_feats_wide, scale=scale,\n", - " features=features)\n", + " partial_get_feats = partial(_get_feats_wide, scale=scale, features=features)\n", "\n", " with Pool(threads) as pool:\n", - " ts_features = pool.starmap(partial_get_feats, ts.groupby('unique_id'))\n", + " ts_features = pool.starmap(partial_get_feats, ts.groupby(\"unique_id\"))\n", "\n", - " ts_features = pd.concat(ts_features).rename_axis('unique_id')\n", + " ts_features = pd.concat(ts_features).rename_axis(\"unique_id\")\n", " ts_features = ts_features.reset_index()\n", "\n", " return ts_features" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tsfeatures.m4_data import *" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1493,6 +1461,7 @@ " calculate_features_m4(\"Hourly\", \"data\", 100)\n", " calculate_features_m4(\"Daily\", \"data\", 100)\n", "\n", + "\n", "test_pipeline()" ] }, diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index 7024390..481920a 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -33,7 +33,7 @@ } ], "source": [ - "#| hide\n", + "# |hide\n", "%load_ext autoreload\n", "%autoreload 2" ] @@ -55,7 +55,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# |hide\n", "from fastcore.test import *" ] }, @@ -87,7 +87,6 @@ "outputs": [], "source": [ "# |export\n", - "\n", "FREQS = {\"H\": 24, \"D\": 1, \"M\": 12, \"Q\": 4, \"W\": 1, \"Y\": 1}" ] }, @@ -389,183 +388,13 @@ "outputs": [], "source": [ "# |export\n", - "WWWusage = [\n", - " 88,\n", - " 84,\n", - " 85,\n", - " 85,\n", - " 84,\n", - " 85,\n", - " 83,\n", - " 85,\n", - " 88,\n", - " 89,\n", - " 91,\n", - " 99,\n", - " 104,\n", - " 112,\n", - " 126,\n", - " 138,\n", - " 146,\n", - " 151,\n", - " 150,\n", - " 148,\n", - " 147,\n", - " 149,\n", - " 143,\n", - " 132,\n", - " 131,\n", - " 139,\n", - " 147,\n", - " 150,\n", - " 148,\n", - " 145,\n", - " 140,\n", - " 134,\n", - " 131,\n", - " 131,\n", - " 129,\n", - " 126,\n", - " 126,\n", - " 132,\n", - " 137,\n", - " 140,\n", - " 142,\n", - " 150,\n", - " 159,\n", - " 167,\n", - " 170,\n", - " 171,\n", - " 172,\n", - " 172,\n", - " 174,\n", - " 175,\n", - " 172,\n", - " 172,\n", - " 174,\n", - " 174,\n", - " 169,\n", - " 165,\n", - " 156,\n", - " 142,\n", - " 131,\n", - " 121,\n", - " 112,\n", - " 104,\n", - " 102,\n", - " 99,\n", - " 99,\n", - " 95,\n", - " 88,\n", - " 84,\n", - " 84,\n", - " 87,\n", - " 89,\n", - " 88,\n", - " 85,\n", - " 86,\n", - " 89,\n", - " 91,\n", - " 91,\n", - " 94,\n", - " 101,\n", - " 110,\n", - " 121,\n", - " 135,\n", - " 145,\n", - " 149,\n", - " 156,\n", - " 165,\n", - " 171,\n", - " 175,\n", - " 177,\n", - " 182,\n", - " 193,\n", - " 204,\n", - " 208,\n", - " 210,\n", - " 215,\n", - " 222,\n", - " 228,\n", - " 226,\n", - " 222,\n", - " 220,\n", - "]\n", - "\n", - "USAccDeaths = [\n", - " 9007,\n", - " 8106,\n", - " 8928,\n", - " 9137,\n", - " 10017,\n", - " 10826,\n", - " 11317,\n", - " 10744,\n", - " 9713,\n", - " 9938,\n", - " 9161,\n", - " 8927,\n", - " 7750,\n", - " 6981,\n", - " 8038,\n", - " 8422,\n", - " 8714,\n", - " 9512,\n", - " 10120,\n", - " 9823,\n", - " 8743,\n", - " 9129,\n", - " 8710,\n", - " 8680,\n", - " 8162,\n", - " 7306,\n", - " 8124,\n", - " 7870,\n", - " 9387,\n", - " 9556,\n", - " 10093,\n", - " 9620,\n", - " 8285,\n", - " 8466,\n", - " 8160,\n", - " 8034,\n", - " 7717,\n", - " 7461,\n", - " 7767,\n", - " 7925,\n", - " 8623,\n", - " 8945,\n", - " 10078,\n", - " 9179,\n", - " 8037,\n", - " 8488,\n", - " 7874,\n", - " 8647,\n", - " 7792,\n", - " 6957,\n", - " 7726,\n", - " 8106,\n", - " 8890,\n", - " 9299,\n", - " 10625,\n", - " 9302,\n", - " 8314,\n", - " 8850,\n", - " 8265,\n", - " 8796,\n", - " 7836,\n", - " 6892,\n", - " 7791,\n", - " 8192,\n", - " 9115,\n", - " 9434,\n", - " 10484,\n", - " 9827,\n", - " 9110,\n", - " 9070,\n", - " 8633,\n", - " 9240,\n", - "]" + "\n", + "# fmt: off\n", + "WWWusage = [88, 84, 85, 85, 84, 85, 83, 85, 88, 89, 91, 99, 104, 112, 126, 138, 146, 151, 150, 148, 147, 149, 143, 132, 131, 139, 147, 150, 148, 145, 140, 134, 131, 131, 129, 126, 126, 132, 137, 140, 142, 150, 159, 167, 170, 171, 172, 172, 174, 175, 172, 172, 174, 174, 169, 165, 156, 142, 131, 121, 112, 104, 102, 99, 99, 95, 88, 84, 84, 87, 89, 88, 85, 86, 89, 91, 91, 94, 101, 110, 121, 135, 145, 149, 156, 165, 171, 175, 177, 182, 193, 204, 208, 210, 215, 222, 228, 226, 222, 220,]\n", + "\n", + "USAccDeaths = [9007,8106,8928,9137,10017,10826,11317,10744,9713,9938,9161,8927,7750,6981,8038,8422,8714,9512,10120,9823,8743,9129,8710,8680,8162,7306,8124,7870,9387,9556,10093,9620,8285,8466,8160,8034,7717,7461,7767,7925,8623,8945,10078,9179,8037,8488,7874,8647,7792,6957,7726,8106,8890,9299,10625,9302,8314,8850,8265,8796,7836,6892,7791,8192,9115,9434,10484,9827,9110,9070,8633,9240,]\n", + "\n", + "# fmt: on" ] }, { diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index 76fa4e2..6b19795 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -32,6 +32,7 @@ 'tsfeatures.tsfeatures.series_length': ('tsfeatures.html#series_length', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.sparsity': ('tsfeatures.html#sparsity', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.stability': ('tsfeatures.html#stability', 'tsfeatures/tsfeatures.py'), + 'tsfeatures.tsfeatures.statistics': ('tsfeatures.html#statistics', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.stl_features': ('tsfeatures.html#stl_features', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.tsfeatures': ('tsfeatures.html#tsfeatures', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.tsfeatures_wide': ( 'tsfeatures.html#tsfeatures_wide', diff --git a/tsfeatures/m4_data.py b/tsfeatures/m4_data.py index 742464c..5c52123 100644 --- a/tsfeatures/m4_data.py +++ b/tsfeatures/m4_data.py @@ -9,6 +9,7 @@ import pandas as pd +# %% ../nbs/m4_data.ipynb 4 seas_dict = { "Hourly": {"seasonality": 24, "input_size": 24, "output_size": 48, "freq": "H"}, "Daily": {"seasonality": 7, "input_size": 7, "output_size": 14, "freq": "D"}, @@ -18,12 +19,12 @@ "Yearly": {"seasonality": 1, "input_size": 4, "output_size": 6, "freq": "D"}, } -# %% ../nbs/m4_data.ipynb 4 +# %% ../nbs/m4_data.ipynb 5 SOURCE_URL = ( "https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/" ) -# %% ../nbs/m4_data.ipynb 5 +# %% ../nbs/m4_data.ipynb 6 def maybe_download(filename, directory): """Download the data from M4's website, unless it's already here. @@ -51,7 +52,7 @@ def maybe_download(filename, directory): return filepath -# %% ../nbs/m4_data.ipynb 6 +# %% ../nbs/m4_data.ipynb 7 def m4_parser(dataset_name, directory, num_obs=1000000): """Transform M4 data into a panel. @@ -135,7 +136,7 @@ def m4_parser(dataset_name, directory, num_obs=1000000): return X_train_df, y_train_df, X_test_df, y_test_df -# %% ../nbs/m4_data.ipynb 7 +# %% ../nbs/m4_data.ipynb 8 def prepare_m4_data(dataset_name, directory, num_obs): """Pipeline that obtains M4 times series, tranforms it and gets naive2 predictions. diff --git a/tsfeatures/tsfeatures.py b/tsfeatures/tsfeatures.py index 68598ea..60abede 100644 --- a/tsfeatures/tsfeatures.py +++ b/tsfeatures/tsfeatures.py @@ -4,7 +4,7 @@ __all__ = ['acf_features', 'arch_stat', 'count_entropy', 'crossing_points', 'entropy', 'flat_spots', 'frequency', 'guerrero', 'heterogeneity', 'holt_parameters', 'hurst', 'hw_parameters', 'intervals', 'lumpiness', 'nonlinearity', 'pacf_features', 'series_length', 'sparsity', 'stability', 'stl_features', 'unitroot_kpss', 'unitroot_pp', - 'tsfeatures', 'tsfeatures_wide'] + 'statistics', 'tsfeatures', 'tsfeatures_wide'] # %% ../nbs/tsfeatures.ipynb 3 import os @@ -44,7 +44,7 @@ from .utils import * -# %% ../nbs/tsfeatures.ipynb 7 +# %% ../nbs/tsfeatures.ipynb 8 def acf_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates autocorrelation function features. @@ -150,7 +150,7 @@ def arch_stat( return {"arch_lm": r_squared} -# %% ../nbs/tsfeatures.ipynb 13 +# %% ../nbs/tsfeatures.ipynb 12 def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: """Count entropy. @@ -171,7 +171,7 @@ def count_entropy(x: np.array, freq: int = 1) -> Dict[str, float]: return {"count_entropy": entropy} -# %% ../nbs/tsfeatures.ipynb 14 +# %% ../nbs/tsfeatures.ipynb 13 def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: """Crossing points. @@ -196,7 +196,7 @@ def crossing_points(x: np.array, freq: int = 1) -> Dict[str, float]: return {"crossing_points": cross.sum()} -# %% ../nbs/tsfeatures.ipynb 15 +# %% ../nbs/tsfeatures.ipynb 14 def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: """Calculates sample entropy. @@ -220,7 +220,7 @@ def entropy(x: np.array, freq: int = 1, base: float = e) -> Dict[str, float]: return {"entropy": entropy} -# %% ../nbs/tsfeatures.ipynb 16 +# %% ../nbs/tsfeatures.ipynb 15 def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: """Flat spots. @@ -244,7 +244,7 @@ def flat_spots(x: np.array, freq: int = 1) -> Dict[str, float]: rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max() return {"flat_spots": rlex} -# %% ../nbs/tsfeatures.ipynb 17 +# %% ../nbs/tsfeatures.ipynb 16 def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: """Frequency. @@ -263,7 +263,7 @@ def frequency(x: np.array, freq: int = 1) -> Dict[str, float]: return {"frequency": freq} -# %% ../nbs/tsfeatures.ipynb 18 +# %% ../nbs/tsfeatures.ipynb 17 def guerrero( x: np.array, freq: int = 1, lower: int = -1, upper: int = 2 ) -> Dict[str, float]: @@ -298,7 +298,7 @@ def guerrero( return {"guerrero": min_} -# %% ../nbs/tsfeatures.ipynb 19 +# %% ../nbs/tsfeatures.ipynb 18 def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: """Heterogeneity. @@ -361,7 +361,7 @@ def heterogeneity(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 20 +# %% ../nbs/tsfeatures.ipynb 19 def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: """Fitted parameters of a Holt model. @@ -389,7 +389,7 @@ def holt_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: return params -# %% ../nbs/tsfeatures.ipynb 21 +# %% ../nbs/tsfeatures.ipynb 20 def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: """Hurst index. @@ -412,7 +412,7 @@ def hurst(x: np.array, freq: int = 1) -> Dict[str, float]: return {"hurst": hurst_index} -# %% ../nbs/tsfeatures.ipynb 22 +# %% ../nbs/tsfeatures.ipynb 21 def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: """Fitted parameters of a Holt-Winters model. @@ -444,7 +444,7 @@ def hw_parameters(x: np.array, freq: int = 1) -> Dict[str, float]: return params -# %% ../nbs/tsfeatures.ipynb 23 +# %% ../nbs/tsfeatures.ipynb 22 def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: """Intervals with demand. @@ -468,7 +468,7 @@ def intervals(x: np.array, freq: int = 1) -> Dict[str, float]: return {"intervals_mean": np.mean(y), "intervals_sd": np.std(y, ddof=1)} -# %% ../nbs/tsfeatures.ipynb 24 +# %% ../nbs/tsfeatures.ipynb 23 def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: """lumpiness. @@ -502,7 +502,7 @@ def lumpiness(x: np.array, freq: int = 1) -> Dict[str, float]: return {"lumpiness": lumpiness} -# %% ../nbs/tsfeatures.ipynb 25 +# %% ../nbs/tsfeatures.ipynb 24 def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: """Nonlinearity. @@ -527,7 +527,7 @@ def nonlinearity(x: np.array, freq: int = 1) -> Dict[str, float]: return {"nonlinearity": test} -# %% ../nbs/tsfeatures.ipynb 26 +# %% ../nbs/tsfeatures.ipynb 25 def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates partial autocorrelation function features. @@ -598,7 +598,7 @@ def pacf_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 27 +# %% ../nbs/tsfeatures.ipynb 26 def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: """Series length. @@ -617,7 +617,7 @@ def series_length(x: np.array, freq: int = 1) -> Dict[str, float]: return {"series_length": len(x)} -# %% ../nbs/tsfeatures.ipynb 28 +# %% ../nbs/tsfeatures.ipynb 27 def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: """Sparsity. @@ -636,7 +636,7 @@ def sparsity(x: np.array, freq: int = 1) -> Dict[str, float]: return {"sparsity": np.mean(x == 0)} -# %% ../nbs/tsfeatures.ipynb 29 +# %% ../nbs/tsfeatures.ipynb 28 def stability(x: np.array, freq: int = 1) -> Dict[str, float]: """Stability. @@ -670,7 +670,7 @@ def stability(x: np.array, freq: int = 1) -> Dict[str, float]: return {"stability": stability} -# %% ../nbs/tsfeatures.ipynb 30 +# %% ../nbs/tsfeatures.ipynb 29 def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: """Calculates seasonal trend using loess decomposition. @@ -817,7 +817,7 @@ def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]: return output -# %% ../nbs/tsfeatures.ipynb 31 +# %% ../nbs/tsfeatures.ipynb 30 def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: """Unit root kpss. @@ -843,7 +843,7 @@ def unitroot_kpss(x: np.array, freq: int = 1) -> Dict[str, float]: return {"unitroot_kpss": test_kpss} -# %% ../nbs/tsfeatures.ipynb 32 +# %% ../nbs/tsfeatures.ipynb 31 def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: """Unit root pp. @@ -866,7 +866,51 @@ def unitroot_pp(x: np.array, freq: int = 1) -> Dict[str, float]: return {"unitroot_pp": test_pp} -# %% ../nbs/tsfeatures.ipynb 34 +# %% ../nbs/tsfeatures.ipynb 32 +def statistics(x: np.array, freq: int = 1) -> Dict[str, float]: + """Computes basic statistics of x. + + Parameters + ---------- + x: numpy array + The time series. + freq: int + Frequency of the time series + + Returns + ------- + dict + 'total_sum': Total sum of the series. + 'mean': Mean value. + 'variance': variance of the time series. + 'median': Median value. + 'p2point5': 2.5 Percentile. + 'p5': 5 percentile. + 'p25': 25 percentile. + 'p75': 75 percentile. + 'p95': 95 percentile. + 'p97point5': 97.5 percentile. + 'max': Max value. + 'min': Min value. + """ + res = dict( + total_sum=np.sum(x), + mean=np.mean(x), + variance=np.var(x, ddof=1), + median=np.median(x), + p2point5=np.quantile(x, q=0.025), + p5=np.quantile(x, q=0.05), + p25=np.quantile(x, q=0.25), + p75=np.quantile(x, q=0.75), + p95=np.quantile(x, q=0.95), + p97point5=np.quantile(x, q=0.975), + max=np.max(x), + min=np.min(x), + ) + + return res + +# %% ../nbs/tsfeatures.ipynb 33 def _get_feats( index, ts, @@ -926,7 +970,7 @@ def _get_feats( return pd.DataFrame(dict(c_map), index=[index]) -# %% ../nbs/tsfeatures.ipynb 35 +# %% ../nbs/tsfeatures.ipynb 34 def tsfeatures( ts: pd.DataFrame, freq: Optional[int] = None, @@ -991,7 +1035,7 @@ def tsfeatures( return ts_features -# %% ../nbs/tsfeatures.ipynb 36 +# %% ../nbs/tsfeatures.ipynb 35 def _get_feats_wide( index, ts, @@ -1029,7 +1073,7 @@ def _get_feats_wide( return pd.DataFrame(dict(c_map), index=[index]) -# %% ../nbs/tsfeatures.ipynb 37 +# %% ../nbs/tsfeatures.ipynb 36 def tsfeatures_wide( ts: pd.DataFrame, features: List[Callable] = [ diff --git a/tsfeatures/utils.py b/tsfeatures/utils.py index d45c250..0fc2983 100644 --- a/tsfeatures/utils.py +++ b/tsfeatures/utils.py @@ -257,180 +257,9 @@ def lambda_coef_var(lambda_par: float, x: np.array, period: int = 2): return value # %% ../nbs/utils.ipynb 14 -WWWusage = [ - 88, - 84, - 85, - 85, - 84, - 85, - 83, - 85, - 88, - 89, - 91, - 99, - 104, - 112, - 126, - 138, - 146, - 151, - 150, - 148, - 147, - 149, - 143, - 132, - 131, - 139, - 147, - 150, - 148, - 145, - 140, - 134, - 131, - 131, - 129, - 126, - 126, - 132, - 137, - 140, - 142, - 150, - 159, - 167, - 170, - 171, - 172, - 172, - 174, - 175, - 172, - 172, - 174, - 174, - 169, - 165, - 156, - 142, - 131, - 121, - 112, - 104, - 102, - 99, - 99, - 95, - 88, - 84, - 84, - 87, - 89, - 88, - 85, - 86, - 89, - 91, - 91, - 94, - 101, - 110, - 121, - 135, - 145, - 149, - 156, - 165, - 171, - 175, - 177, - 182, - 193, - 204, - 208, - 210, - 215, - 222, - 228, - 226, - 222, - 220, -] - -USAccDeaths = [ - 9007, - 8106, - 8928, - 9137, - 10017, - 10826, - 11317, - 10744, - 9713, - 9938, - 9161, - 8927, - 7750, - 6981, - 8038, - 8422, - 8714, - 9512, - 10120, - 9823, - 8743, - 9129, - 8710, - 8680, - 8162, - 7306, - 8124, - 7870, - 9387, - 9556, - 10093, - 9620, - 8285, - 8466, - 8160, - 8034, - 7717, - 7461, - 7767, - 7925, - 8623, - 8945, - 10078, - 9179, - 8037, - 8488, - 7874, - 8647, - 7792, - 6957, - 7726, - 8106, - 8890, - 9299, - 10625, - 9302, - 8314, - 8850, - 8265, - 8796, - 7836, - 6892, - 7791, - 8192, - 9115, - 9434, - 10484, - 9827, - 9110, - 9070, - 8633, - 9240, -] +# fmt: off +WWWusage = [88, 84, 85, 85, 84, 85, 83, 85, 88, 89, 91, 99, 104, 112, 126, 138, 146, 151, 150, 148, 147, 149, 143, 132, 131, 139, 147, 150, 148, 145, 140, 134, 131, 131, 129, 126, 126, 132, 137, 140, 142, 150, 159, 167, 170, 171, 172, 172, 174, 175, 172, 172, 174, 174, 169, 165, 156, 142, 131, 121, 112, 104, 102, 99, 99, 95, 88, 84, 84, 87, 89, 88, 85, 86, 89, 91, 91, 94, 101, 110, 121, 135, 145, 149, 156, 165, 171, 175, 177, 182, 193, 204, 208, 210, 215, 222, 228, 226, 222, 220,] + +USAccDeaths = [9007,8106,8928,9137,10017,10826,11317,10744,9713,9938,9161,8927,7750,6981,8038,8422,8714,9512,10120,9823,8743,9129,8710,8680,8162,7306,8124,7870,9387,9556,10093,9620,8285,8466,8160,8034,7717,7461,7767,7925,8623,8945,10078,9179,8037,8488,7874,8647,7792,6957,7726,8106,8890,9299,10625,9302,8314,8850,8265,8796,7836,6892,7791,8192,9115,9434,10484,9827,9110,9070,8633,9240,] + +# fmt: on From 00de7343a40f54a91b25e76720365d3830af01c0 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 29 Feb 2024 21:55:51 +0100 Subject: [PATCH 30/52] add os to runs-on --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b231fc..046bf77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ concurrency: jobs: run-tests: - runs-on: ubuntu-latest + runs-on: [ubuntu-latest, macos-latest, windows-latest] strategy: fail-fast: false matrix: From 28f400d904c715be48a938ff555645818d0f75f2 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 29 Feb 2024 21:58:13 +0100 Subject: [PATCH 31/52] change order of runs-on --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 046bf77..9f5d6d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ concurrency: jobs: run-tests: - runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: [macos-latest, windows-latest, ubuntu-latest] strategy: fail-fast: false matrix: From 4cd2e2cd5ff9c3da40994be3e69a53cb2067ff20 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 29 Feb 2024 22:05:52 +0100 Subject: [PATCH 32/52] matrix python version and os --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9f5d6d7..13c4724 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,11 +17,12 @@ concurrency: jobs: run-tests: - runs-on: [macos-latest, windows-latest, ubuntu-latest] strategy: fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] + os: [macos-latest, windows-latest, ubuntu-latest] + runs-on: ${{ matrix.os }} steps: - name: Clone repo uses: actions/checkout@v3 From cf7721878421537da8367bfef18c6cc377afa9d1 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 29 Feb 2024 22:51:04 +0100 Subject: [PATCH 33/52] optimize ci file --- .github/workflows/ci.yml | 8 +---- .github/workflows/python-package.yml | 45 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 13c4724..4cc0a2b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,10 +7,6 @@ on: branches: [main] workflow_dispatch: -defaults: - run: - shell: bash -l {0} - concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -31,9 +27,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - - name: Install nbdev - run: pip install nbdev + cache: pip - name: Install the library run: pip install ".[dev]" diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1fd42a8..d579549 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,35 +5,34 @@ name: Python package on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] jobs: build: - runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest From 824d7cedc34af392f2e4594ef4df60170377f7e3 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Thu, 29 Feb 2024 22:52:23 +0100 Subject: [PATCH 34/52] no cache --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4cc0a2b..65ed8ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - cache: pip - name: Install the library run: pip install ".[dev]" From 88ae6b7e0d8d462e50752e49845cbc72a0326542 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:18:08 +0200 Subject: [PATCH 35/52] include tests and R CI --- .github/workflows/ci.yml | 5 + README.md | 265 ----------------------------------- nbs/m4_data.ipynb | 2 +- nbs/tsfeatures.ipynb | 255 ++++++++++++++++++++++++++++++++- nbs/tsfeatures_r.ipynb | 279 +++++++++++++++++++++++++++++++++++++ nbs/utils.ipynb | 2 +- settings.ini | 2 +- tsfeatures/_modidx.py | 1 + tsfeatures/tsfeatures_r.py | 11 ++ 9 files changed, 553 insertions(+), 269 deletions(-) create mode 100644 nbs/tsfeatures_r.ipynb create mode 100644 tsfeatures/tsfeatures_r.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 65ed8ee..1246177 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,11 @@ jobs: - name: Clone repo uses: actions/checkout@v3 + - name: setup R + uses: r-lib/actions/setup-r@v2 + with: + r-version: "4.3.3" + - name: Set up environment uses: actions/setup-python@v4 with: diff --git a/README.md b/README.md index acd4df4..e69de29 100644 --- a/README.md +++ b/README.md @@ -1,265 +0,0 @@ -# tsfeatures - - - - -[![CI](https://github.com/Nixtla/mlforecast/actions/workflows/ci.yaml/badge.svg)](https://github.com/Nixtla/mlforecast/actions/workflows/ci.yaml) -[![Python](https://img.shields.io/pypi/pyversions/mlforecast.png)](https://pypi.org/project/mlforecast/) -[![PyPi](https://img.shields.io/pypi/v/mlforecast?color=blue.png)](https://pypi.org/project/mlforecast/) -[![conda-forge](https://img.shields.io/conda/vn/conda-forge/mlforecast?color=blue.png)](https://anaconda.org/conda-forge/mlforecast) -[![License](https://img.shields.io/github/license/Nixtla/mlforecast.png)](https://github.com/Nixtla/mlforecast/blob/main/LICENSE) - -# tsfeatures - -Calculates various features from time series data. Python implementation -of the R package -*[tsfeatures](https://github.com/robjhyndman/tsfeatures)*. - -# Installation - -You can install the *released* version of `tsfeatures` from the [Python -package index](pypi.org) with: - -``` python -pip install tsfeatures -``` - -# Usage - -The `tsfeatures` main function calculates by default the features used -by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their -implementation of the FFORMA -model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). - -``` python -from tsfeatures import tsfeatures -``` - -This function receives a panel pandas df with columns `unique_id`, `ds`, -`y` and optionally the frequency of the data. - - - -``` python -tsfeatures(panel, freq=7) -``` - -By default (`freq=None`) the function will try to infer the frequency of -each time series (using `infer_freq` from `pandas` on the `ds` column) -and assign a seasonal period according to the built-in dictionary -`FREQS`: - -``` python -FREQS = {'H': 24, 'D': 1, - 'M': 12, 'Q': 4, - 'W':1, 'Y': 1} -``` - -You can use your own dictionary using the `dict_freqs` argument: - -``` python -tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) -``` - -## List of available features - -| Features | | | -|:----------------|:----------------|:--------------| -| acf_features | heterogeneity | series_length | -| arch_stat | holt_parameters | sparsity | -| count_entropy | hurst | stability | -| crossing_points | hw_parameters | stl_features | -| entropy | intervals | unitroot_kpss | -| flat_spots | lumpiness | unitroot_pp | -| frequency | nonlinearity | | -| guerrero | pacf_features | | - -See the docs for a description of the features. To use a particular -feature included in the package you need to import it: - -``` python -from tsfeatures import acf_features - -tsfeatures(panel, freq=7, features=[acf_features]) -``` - -You can also define your own function and use it together with the -included features: - -``` python -def number_zeros(x, freq): - - number = (x == 0).sum() - return {'number_zeros': number} - -tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) -``` - -`tsfeatures` can handle functions that receives a numpy array `x` and a -frequency `freq` (this parameter is needed even if you don’t use it) and -returns a dictionary with the feature name as a key and its value. - -## R implementation - -You can use this package to call `tsfeatures` from R inside python (you -need to have installed R, the packages `forecast` and `tsfeatures`; also -the python package `rpy2`): - -``` python -from tsfeatures.tsfeatures_r import tsfeatures_r - -tsfeatures_r(panel, freq=7, features=["acf_features"]) -``` - -Observe that this function receives a list of strings instead of a list -of functions. - -## Comparison with the R implementation (sum of absolute differences) - -### Non-seasonal data (100 Daily M4 time series) - -| feature | diff | feature | diff | feature | diff | feature | diff | -|:----------------|-----:|:----------------|-----:|:--------------|-----:|:----------|------:| -| e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 | -| seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 | -| nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 | -| linearity | 0 | crossing_points | 0 | nonlinearity | 0 | garch_r2 | 4.74 | -| hw_gamma | 0 | lumpiness | 0 | diff2x_pacf5 | 0 | hurst | 5.45 | -| hw_beta | 0 | diff1x_pacf5 | 0 | unitroot_kpss | 0 | garch_acf | 5.53 | -| hw_alpha | 0 | diff1_acf10 | 0 | x_pacf5 | 0 | entropy | 11.65 | -| trend | 0 | arch_lm | 0 | x_acf10 | 0 | | | -| flat_spots | 0 | diff1_acf1 | 0 | unitroot_pp | 0 | | | -| series_length | 0 | stability | 0 | arch_r2 | 1.37 | | | - -To replicate this results use: - -``` console -python -m tsfeatures.compare_with_r --results_directory /some/path - --dataset_name Daily --num_obs 100 -``` - -### Sesonal data (100 Hourly M4 time series) - -| feature | diff | feature | diff | feature | diff | feature | diff | -|:----------------|-----:|:------------------|-----:|:----------|-----:|:--------|------:| -| series_length | 0 | seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 | -| flat_spots | 0 | x_acf1 | 0 | arch_r2 | 2.29 | hw_beta | 32.39 | -| nperiods | 0 | unitroot_kpss | 0 | alpha | 2.52 | trough | 35 | -| crossing_points | 0 | nonlinearity | 0 | beta | 3.67 | peak | 69 | -| seasonal_period | 0 | diff1_acf10 | 0 | linearity | 3.97 | | | -| lumpiness | 0 | x_acf10 | 0 | curvature | 4.8 | | | -| stability | 0 | seas_pacf | 0 | e_acf10 | 7.05 | | | -| arch_lm | 0 | unitroot_pp | 0 | garch_r2 | 7.32 | | | -| diff2_acf1 | 0 | spike | 0 | hw_gamma | 7.32 | | | -| diff2_acf10 | 0 | seasonal_strength | 0.79 | hw_alpha | 7.47 | | | -| diff1_acf1 | 0 | e_acf1 | 1.67 | garch_acf | 7.53 | | | -| diff2x_pacf5 | 0 | arch_acf | 2.18 | entropy | 9.45 | | | - -[![Build](https://github.com/FedericoGarza/tsfeatures/workflows/Python%20package/badge.svg)](https://github.com/FedericoGarza/tsfeatures/tree/master) -[![PyPI version -fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/tsfeatures/) -[![Downloads](https://pepy.tech/badge/tsfeatures.png)](https://pepy.tech/project/tsfeatures) -[![Python -3.6+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370+/) -[![License: -MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE) - -# tsfeatures - -Calculates various features from time series data. Python implementation -of the R package -*[tsfeatures](https://github.com/robjhyndman/tsfeatures)*. - -# Installation - -You can install the *released* version of `tsfeatures` from the [Python -package index](pypi.org) with: - -``` python -pip install tsfeatures -``` - -# Usage - -The `tsfeatures` main function calculates by default the features used -by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their -implementation of the FFORMA -model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). - -``` python -from tsfeatures import tsfeatures -``` - -This function receives a panel pandas df with columns `unique_id`, `ds`, -`y` and optionally the frequency of the data. - - - -``` python -tsfeatures(panel, freq=7) -``` - -By default (`freq=None`) the function will try to infer the frequency of -each time series (using `infer_freq` from `pandas` on the `ds` column) -and assign a seasonal period according to the built-in dictionary -`FREQS`: - -``` python -FREQS = {'H': 24, 'D': 1, - 'M': 12, 'Q': 4, - 'W':1, 'Y': 1} -``` - -You can use your own dictionary using the `dict_freqs` argument: - -``` python -tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) -``` - -## List of available features - -| Features | | | -|:----------------|:----------------|:--------------| -| acf_features | heterogeneity | series_length | -| arch_stat | holt_parameters | sparsity | -| count_entropy | hurst | stability | -| crossing_points | hw_parameters | stl_features | -| entropy | intervals | unitroot_kpss | -| flat_spots | lumpiness | unitroot_pp | -| frequency | nonlinearity | | -| guerrero | pacf_features | | - -See the docs for a description of the features. To use a particular -feature included in the package you need to import it: - -``` python -from tsfeatures import acf_features - -tsfeatures(panel, freq=7, features=[acf_features]) -``` - -You can also define your own function and use it together with the -included features: - -``` python -def number_zeros(x, freq): - - number = (x == 0).sum() - return {'number_zeros': number} - -tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) -``` - -`tsfeatures` can handle functions that receives a numpy array `x` and a -frequency `freq` (this parameter is needed even if you don’t use it) and -returns a dictionary with the feature name as a key and its value. - -# Authors - -- **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza) -- **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier) -- **Cristian Challu** - - [cristianchallu](https://github.com/cristianchallu) -- **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez) -- **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar) -- **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler) diff --git a/nbs/m4_data.ipynb b/nbs/m4_data.ipynb index 350093d..03089b2 100644 --- a/nbs/m4_data.ipynb +++ b/nbs/m4_data.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "# |default_exp m4_data\n" + "# |default_exp m4_data" ] }, { diff --git a/nbs/tsfeatures.ipynb b/nbs/tsfeatures.ipynb index c7c3859..d4b3371 100644 --- a/nbs/tsfeatures.ipynb +++ b/nbs/tsfeatures.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "# |default_exp tsfeatures\n" + "# |default_exp tsfeatures" ] }, { @@ -1446,6 +1446,7 @@ "metadata": {}, "outputs": [], "source": [ + "# test_pipeline\n", "def test_pipeline():\n", " def calculate_features_m4(dataset_name, directory, num_obs=1000000):\n", " _, y_train_df, _, _ = prepare_m4_data(\n", @@ -1465,6 +1466,258 @@ "test_pipeline()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_statistics\n", + "# TODO apply an assert to the test case\n", + "def test_scale():\n", + " z = np.zeros(10)\n", + " z[-1] = 1\n", + " df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 11), \"y\": z})\n", + " features = tsfeatures(df, freq=7, scale=True, features=[statistics])\n", + " print(features)\n", + "\n", + "\n", + "# TODO apply an assert to the test case\n", + "def test_no_scale():\n", + " z = np.zeros(10)\n", + " z[-1] = 1\n", + " df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 11), \"y\": z})\n", + " features = tsfeatures(df, freq=7, scale=False, features=[statistics])\n", + " print(features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sparsity\n", + "def test_non_zero_sparsity():\n", + " # if we scale the data, the sparsity should be zero\n", + " z = np.zeros(10)\n", + " z[-1] = 1\n", + " df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 11), \"y\": z})\n", + " features = tsfeatures(df, freq=7, scale=True, features=[sparsity])\n", + " z_sparsity = features[\"sparsity\"].values[0]\n", + " assert z_sparsity == 0.0\n", + "\n", + "\n", + "def test_sparsity():\n", + " z = np.zeros(10)\n", + " z[-1] = 1\n", + " df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 11), \"y\": z})\n", + " features = tsfeatures(df, freq=7, scale=False, features=[sparsity])\n", + " print(features)\n", + " z_sparsity = features[\"sparsity\"].values[0]\n", + " assert z_sparsity == 0.9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_small_ts\n", + "# TODO apply an assert to the test case\n", + "def test_small():\n", + " z = np.zeros(2)\n", + " z[-1] = 1\n", + " z_df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 3), \"y\": z})\n", + " feats = [\n", + " sparsity,\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " statistics,\n", + " ]\n", + " feats_df = tsfeatures(z_df, freq=12, features=feats, scale=False)\n", + "\n", + "\n", + "# TODO apply an assert to the test case\n", + "def test_small_1():\n", + " z = np.zeros(1)\n", + " z[-1] = 1\n", + " z_df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 2), \"y\": z})\n", + " feats = [\n", + " sparsity,\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " statistics,\n", + " ]\n", + " feats_df = tsfeatures(z_df, freq=12, features=feats, scale=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_pacf_features\n", + "# TODO apply an assert to the test case\n", + "def test_pacf_features_seasonal_short():\n", + " z = np.random.normal(size=15)\n", + " pacf_features(z, freq=7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_mutability\n", + "def test_mutability():\n", + " z = np.zeros(100)\n", + " z[-1] = 1\n", + " z_df = pd.DataFrame({\"unique_id\": 1, \"ds\": range(1, 101), \"y\": z})\n", + " feats = [\n", + " sparsity,\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " ]\n", + " feats_2 = [\n", + " acf_features,\n", + " arch_stat,\n", + " crossing_points,\n", + " entropy,\n", + " flat_spots,\n", + " holt_parameters,\n", + " lumpiness,\n", + " nonlinearity,\n", + " pacf_features,\n", + " stl_features,\n", + " stability,\n", + " hw_parameters,\n", + " unitroot_kpss,\n", + " unitroot_pp,\n", + " series_length,\n", + " hurst,\n", + " sparsity,\n", + " ]\n", + " feats_df = tsfeatures(z_df, freq=7, features=feats, scale=False)\n", + " feats_2_df = tsfeatures(z_df, freq=7, features=feats_2, scale=False)\n", + " pd.testing.assert_frame_equal(feats_df, feats_2_df[feats_df.columns])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_holt_parameters\n", + "def test_holt_parameters_seasonal():\n", + " z = holt_parameters(USAccDeaths, 12)\n", + " assert isclose(len(z), 2)\n", + " assert isclose(z[\"alpha\"], 0.96, abs_tol=0.07)\n", + " assert isclose(z[\"beta\"], 0.00, abs_tol=0.1)\n", + "\n", + "\n", + "def test_holt_parameters_non_seasonal():\n", + " z = holt_parameters(WWWusage, 1)\n", + " assert isclose(len(z), 2)\n", + " assert isclose(z[\"alpha\"], 0.99, abs_tol=0.02)\n", + " assert isclose(z[\"beta\"], 0.99, abs_tol=0.02)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_arch_stat\n", + "def test_arch_stat_seasonal():\n", + " z = arch_stat(USAccDeaths, 12)\n", + " assert isclose(len(z), 1)\n", + " assert isclose(z[\"arch_lm\"], 0.54, abs_tol=0.01)\n", + "\n", + "\n", + "def test_arch_stat_non_seasonal():\n", + " z = arch_stat(WWWusage, 12)\n", + " assert isclose(len(z), 1)\n", + " assert isclose(z[\"arch_lm\"], 0.98, abs_tol=0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test_acf_features\n", + "def test_acf_features_seasonal():\n", + " z = acf_features(USAccDeaths, 12)\n", + " assert isclose(len(z), 7)\n", + " assert isclose(z[\"x_acf1\"], 0.70, abs_tol=0.01)\n", + " assert isclose(z[\"x_acf10\"], 1.20, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf1\"], 0.023, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf10\"], 0.27, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf1\"], -0.48, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf10\"], 0.74, abs_tol=0.01)\n", + " assert isclose(z[\"seas_acf1\"], 0.62, abs_tol=0.01)\n", + "\n", + "\n", + "def test_acf_features_non_seasonal():\n", + " z = acf_features(WWWusage, 1)\n", + " assert isclose(len(z), 6)\n", + " assert isclose(z[\"x_acf1\"], 0.96, abs_tol=0.01)\n", + " assert isclose(z[\"x_acf10\"], 4.19, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf1\"], 0.79, abs_tol=0.01)\n", + " assert isclose(z[\"diff1_acf10\"], 1.40, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf1\"], 0.17, abs_tol=0.01)\n", + " assert isclose(z[\"diff2_acf10\"], 0.33, abs_tol=0.01)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/nbs/tsfeatures_r.ipynb b/nbs/tsfeatures_r.ipynb new file mode 100644 index 0000000..5815558 --- /dev/null +++ b/nbs/tsfeatures_r.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# utils\n", + "\n", + "> supporting utils for tsfeatures\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |default_exp tsfeatures_r" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "# |hide\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |export\n", + "from typing import List\n", + "\n", + "import pandas as pd\n", + "import rpy2.robjects as robjects\n", + "from rpy2.robjects import pandas2ri" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def tsfeatures_r(\n", + " ts: pd.DataFrame,\n", + " freq: int,\n", + " features: List[str] = [\n", + " \"length\",\n", + " \"acf_features\",\n", + " \"arch_stat\",\n", + " \"crossing_points\",\n", + " \"entropy\",\n", + " \"flat_spots\",\n", + " \"heterogeneity\",\n", + " \"holt_parameters\",\n", + " \"hurst\",\n", + " \"hw_parameters\",\n", + " \"lumpiness\",\n", + " \"nonlinearity\",\n", + " \"pacf_features\",\n", + " \"stability\",\n", + " \"stl_features\",\n", + " \"unitroot_kpss\",\n", + " \"unitroot_pp\",\n", + " ],\n", + " **kwargs\n", + ") -> pd.DataFrame:\n", + " \"\"\"tsfeatures wrapper using r.\n", + "\n", + " Parameters\n", + " ----------\n", + " ts: pandas df\n", + " Pandas DataFrame with columns ['unique_id', 'ds', 'y'].\n", + " Long panel of time series.\n", + " freq: int\n", + " Frequency of the time series.\n", + " features: List[str]\n", + " String list of features to calculate.\n", + " **kwargs:\n", + " Arguments used by the original tsfeatures function.\n", + "\n", + " References\n", + " ----------\n", + " https://pkg.robjhyndman.com/tsfeatures/reference/tsfeatures.html\n", + " \"\"\"\n", + " rstring = \"\"\"\n", + " function(df, freq, features, ...){\n", + " suppressMessages(library(data.table))\n", + " suppressMessages(library(tsfeatures))\n", + "\n", + " dt <- as.data.table(df)\n", + " setkey(dt, unique_id)\n", + "\n", + " series_list <- split(dt, by = \"unique_id\", keep.by = FALSE)\n", + " series_list <- lapply(series_list,\n", + " function(serie) serie[, ts(y, frequency = freq)])\n", + "\n", + " if(\"hw_parameters\" %in% features){\n", + " features <- setdiff(features, \"hw_parameters\")\n", + "\n", + " if(length(features)>0){\n", + " hw_series_features <- suppressMessages(tsfeatures(series_list, \"hw_parameters\", ...))\n", + " names(hw_series_features) <- paste0(\"hw_\", names(hw_series_features))\n", + "\n", + " series_features <- suppressMessages(tsfeatures(series_list, features, ...))\n", + " series_features <- cbind(series_features, hw_series_features)\n", + " } else {\n", + " series_features <- suppressMessages(tsfeatures(series_list, \"hw_parameters\", ...))\n", + " names(series_features) <- paste0(\"hw_\", names(series_features))\n", + " }\n", + " } else {\n", + " series_features <- suppressMessages(tsfeatures(series_list, features, ...))\n", + " }\n", + "\n", + " setDT(series_features)\n", + "\n", + " series_features[, unique_id := names(series_list)]\n", + "\n", + " }\n", + " \"\"\"\n", + " pandas2ri.activate()\n", + " rfunc = robjects.r(rstring)\n", + "\n", + " feats = rfunc(ts, freq, features, **kwargs)\n", + " pandas2ri.deactivate()\n", + "\n", + " renamer = {\"ARCH.LM\": \"arch_lm\", \"length\": \"series_length\"}\n", + " feats = feats.rename(columns=renamer)\n", + "\n", + " return feats\n", + "\n", + "\n", + "def tsfeatures_r_wide(\n", + " ts: pd.DataFrame,\n", + " features: List[str] = [\n", + " \"length\",\n", + " \"acf_features\",\n", + " \"arch_stat\",\n", + " \"crossing_points\",\n", + " \"entropy\",\n", + " \"flat_spots\",\n", + " \"heterogeneity\",\n", + " \"holt_parameters\",\n", + " \"hurst\",\n", + " \"hw_parameters\",\n", + " \"lumpiness\",\n", + " \"nonlinearity\",\n", + " \"pacf_features\",\n", + " \"stability\",\n", + " \"stl_features\",\n", + " \"unitroot_kpss\",\n", + " \"unitroot_pp\",\n", + " ],\n", + " **kwargs\n", + ") -> pd.DataFrame:\n", + " \"\"\"tsfeatures wrapper using r.\n", + "\n", + " Parameters\n", + " ----------\n", + " ts: pandas df\n", + " Pandas DataFrame with columns ['unique_id', 'seasonality', 'y'].\n", + " Wide panel of time series.\n", + " features: List[str]\n", + " String list of features to calculate.\n", + " **kwargs:\n", + " Arguments used by the original tsfeatures function.\n", + "\n", + " References\n", + " ----------\n", + " https://pkg.robjhyndman.com/tsfeatures/reference/tsfeatures.html\n", + " \"\"\"\n", + " rstring = \"\"\"\n", + " function(uids, seasonalities, ys, features, ...){\n", + " suppressMessages(library(data.table))\n", + " suppressMessages(library(tsfeatures))\n", + " suppressMessages(library(purrr))\n", + "\n", + " series_list <- pmap(\n", + " list(uids, seasonalities, ys),\n", + " function(uid, seasonality, y) ts(y, frequency=seasonality)\n", + " )\n", + " names(series_list) <- uids\n", + "\n", + " if(\"hw_parameters\" %in% features){\n", + " features <- setdiff(features, \"hw_parameters\")\n", + "\n", + " if(length(features)>0){\n", + " hw_series_features <- suppressMessages(tsfeatures(series_list, \"hw_parameters\", ...))\n", + " names(hw_series_features) <- paste0(\"hw_\", names(hw_series_features))\n", + "\n", + " series_features <- suppressMessages(tsfeatures(series_list, features, ...))\n", + " series_features <- cbind(series_features, hw_series_features)\n", + " } else {\n", + " series_features <- suppressMessages(tsfeatures(series_list, \"hw_parameters\", ...))\n", + " names(series_features) <- paste0(\"hw_\", names(series_features))\n", + " }\n", + " } else {\n", + " series_features <- suppressMessages(tsfeatures(series_list, features, ...))\n", + " }\n", + "\n", + " setDT(series_features)\n", + "\n", + " series_features[, unique_id := names(series_list)]\n", + "\n", + " }\n", + " \"\"\"\n", + " pandas2ri.activate()\n", + " rfunc = robjects.r(rstring)\n", + "\n", + " uids = ts[\"unique_id\"].to_list()\n", + " seasonalities = ts[\"seasonality\"].to_list()\n", + " ys = ts[\"y\"].to_list()\n", + "\n", + " feats = rfunc(uids, seasonalities, ys, features, **kwargs)\n", + " pandas2ri.deactivate()\n", + "\n", + " renamer = {\"ARCH.LM\": \"arch_lm\", \"length\": \"series_length\"}\n", + " feats = feats.rename(columns=renamer)\n", + "\n", + " return feats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# |hide\n", + "import nbdev\n", + "\n", + "nbdev.nbdev_export()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index 481920a..a782375 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -6,7 +6,7 @@ "source": [ "# utils\n", "\n", - "> supporting utils for tsfeatures" + "> supporting utils for tsfeatures\n" ] }, { diff --git a/settings.ini b/settings.ini index 16db827..9dd8fc6 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ status = 3 user = Nixtla ### Optional ### -requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm +requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 rpy2>=3.5.16 tqdm dev_requirements = nbdev black_formatting = True diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index 6b19795..b87cd70 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -39,6 +39,7 @@ 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_kpss': ('tsfeatures.html#unitroot_kpss', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_pp': ('tsfeatures.html#unitroot_pp', 'tsfeatures/tsfeatures.py')}, + 'tsfeatures.tsfeatures_r': {}, 'tsfeatures.utils': { 'tsfeatures.utils.embed': ('utils.html#embed', 'tsfeatures/utils.py'), 'tsfeatures.utils.hurst_exponent': ('utils.html#hurst_exponent', 'tsfeatures/utils.py'), 'tsfeatures.utils.lambda_coef_var': ('utils.html#lambda_coef_var', 'tsfeatures/utils.py'), diff --git a/tsfeatures/tsfeatures_r.py b/tsfeatures/tsfeatures_r.py new file mode 100644 index 0000000..ba9e423 --- /dev/null +++ b/tsfeatures/tsfeatures_r.py @@ -0,0 +1,11 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/tsfeatures_r.ipynb. + +# %% auto 0 +__all__ = [] + +# %% ../nbs/tsfeatures_r.ipynb 3 +from typing import List + +import pandas as pd +import rpy2.robjects as robjects +from rpy2.robjects import pandas2ri From 87042978b1943854187a96691f1ee4dc136eb0d4 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:19:22 +0200 Subject: [PATCH 36/52] update readme --- README.md | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/README.md b/README.md index e69de29..5faa87a 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,150 @@ +[![Build](https://github.com/FedericoGarza/tsfeatures/workflows/Python%20package/badge.svg)](https://github.com/FedericoGarza/tsfeatures/tree/master) +[![PyPI version fury.io](https://badge.fury.io/py/tsfeatures.svg)](https://pypi.python.org/pypi/tsfeatures/) +[![Downloads](https://pepy.tech/badge/tsfeatures)](https://pepy.tech/project/tsfeatures) +[![Python 3.6+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370+/) +[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/FedericoGarza/tsfeatures/blob/master/LICENSE) + +# tsfeatures + +Calculates various features from time series data. Python implementation of the R package _[tsfeatures](https://github.com/robjhyndman/tsfeatures)_. + +# Installation + +You can install the *released* version of `tsfeatures` from the [Python package index](pypi.org) with: + +``` python +pip install tsfeatures +``` + +# Usage + +The `tsfeatures` main function calculates by default the features used by Montero-Manso, Talagala, Hyndman and Athanasopoulos in [their implementation of the FFORMA model](https://htmlpreview.github.io/?https://github.com/robjhyndman/M4metalearning/blob/master/docs/M4_methodology.html#features). + +```python +from tsfeatures import tsfeatures +``` + +This function receives a panel pandas df with columns `unique_id`, `ds`, `y` and optionally the frequency of the data. + + + +```python +tsfeatures(panel, freq=7) +``` + +By default (`freq=None`) the function will try to infer the frequency of each time series (using `infer_freq` from `pandas` on the `ds` column) and assign a seasonal period according to the built-in dictionary `FREQS`: + +```python +FREQS = {'H': 24, 'D': 1, + 'M': 12, 'Q': 4, + 'W':1, 'Y': 1} +``` + +You can use your own dictionary using the `dict_freqs` argument: + +```python +tsfeatures(panel, dict_freqs={'D': 7, 'W': 52}) +``` + +## List of available features + +| Features | | | +| :-------------- | :-------------- | :------------ | +| acf_features | heterogeneity | series_length | +| arch_stat | holt_parameters | sparsity | +| count_entropy | hurst | stability | +| crossing_points | hw_parameters | stl_features | +| entropy | intervals | unitroot_kpss | +| flat_spots | lumpiness | unitroot_pp | +| frequency | nonlinearity | | +| guerrero | pacf_features | | + +See the docs for a description of the features. To use a particular feature included in the package you need to import it: + +```python +from tsfeatures import acf_features + +tsfeatures(panel, freq=7, features=[acf_features]) +``` + +You can also define your own function and use it together with the included features: + +```python +def number_zeros(x, freq): + + number = (x == 0).sum() + return {'number_zeros': number} + +tsfeatures(panel, freq=7, features=[acf_features, number_zeros]) +``` + +`tsfeatures` can handle functions that receives a numpy array `x` and a frequency `freq` (this parameter is needed even if you don't use it) and returns a dictionary with the feature name as a key and its value. + +## R implementation + +You can use this package to call `tsfeatures` from R inside python (you need to have installed R, the packages `forecast` and `tsfeatures`; also the python package `rpy2`): + +```python +from tsfeatures.tsfeatures_r import tsfeatures_r + +tsfeatures_r(panel, freq=7, features=["acf_features"]) +``` + +Observe that this function receives a list of strings instead of a list of functions. + +## Comparison with the R implementation (sum of absolute differences) + +### Non-seasonal data (100 Daily M4 time series) + +| feature | diff | feature | diff | feature | diff | feature | diff | +| :-------------- | ---: | :-------------- | ---: | :------------ | ---: | :-------- | ----: | +| e_acf10 | 0 | e_acf1 | 0 | diff2_acf1 | 0 | alpha | 3.2 | +| seasonal_period | 0 | spike | 0 | diff1_acf10 | 0 | arch_acf | 3.3 | +| nperiods | 0 | curvature | 0 | x_acf1 | 0 | beta | 4.04 | +| linearity | 0 | crossing_points | 0 | nonlinearity | 0 | garch_r2 | 4.74 | +| hw_gamma | 0 | lumpiness | 0 | diff2x_pacf5 | 0 | hurst | 5.45 | +| hw_beta | 0 | diff1x_pacf5 | 0 | unitroot_kpss | 0 | garch_acf | 5.53 | +| hw_alpha | 0 | diff1_acf10 | 0 | x_pacf5 | 0 | entropy | 11.65 | +| trend | 0 | arch_lm | 0 | x_acf10 | 0 | +| flat_spots | 0 | diff1_acf1 | 0 | unitroot_pp | 0 | +| series_length | 0 | stability | 0 | arch_r2 | 1.37 | + +To replicate this results use: + +``` console +python -m tsfeatures.compare_with_r --results_directory /some/path + --dataset_name Daily --num_obs 100 +``` + +### Sesonal data (100 Hourly M4 time series) + +| feature | diff | feature | diff | feature | diff | feature | diff | +| :-------------- | ---: | :---------------- | ---: | :-------- | ---: | :------ | ----: | +| series_length | 0 | seas_acf1 | 0 | trend | 2.28 | hurst | 26.02 | +| flat_spots | 0 | x_acf1 | 0 | arch_r2 | 2.29 | hw_beta | 32.39 | +| nperiods | 0 | unitroot_kpss | 0 | alpha | 2.52 | trough | 35 | +| crossing_points | 0 | nonlinearity | 0 | beta | 3.67 | peak | 69 | +| seasonal_period | 0 | diff1_acf10 | 0 | linearity | 3.97 | +| lumpiness | 0 | x_acf10 | 0 | curvature | 4.8 | +| stability | 0 | seas_pacf | 0 | e_acf10 | 7.05 | +| arch_lm | 0 | unitroot_pp | 0 | garch_r2 | 7.32 | +| diff2_acf1 | 0 | spike | 0 | hw_gamma | 7.32 | +| diff2_acf10 | 0 | seasonal_strength | 0.79 | hw_alpha | 7.47 | +| diff1_acf1 | 0 | e_acf1 | 1.67 | garch_acf | 7.53 | +| diff2x_pacf5 | 0 | arch_acf | 2.18 | entropy | 9.45 | + +To replicate this results use: + +``` console +python -m tsfeatures.compare_with_r --results_directory /some/path \ + --dataset_name Hourly --num_obs 100 +``` + +# Authors + +* **Federico Garza** - [FedericoGarza](https://github.com/FedericoGarza) +* **Kin Gutierrez** - [kdgutier](https://github.com/kdgutier) +* **Cristian Challu** - [cristianchallu](https://github.com/cristianchallu) +* **Jose Moralez** - [jose-moralez](https://github.com/jose-moralez) +* **Ricardo Olivares** - [rolivaresar](https://github.com/rolivaresar) +* **Max Mergenthaler** - [mergenthaler](https://github.com/mergenthaler) From 22a17677aa84fdce8d0a8ff9bd70780af85fb31d Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:39:22 +0200 Subject: [PATCH 37/52] alter rpy2 version --- settings.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.ini b/settings.ini index 9dd8fc6..7a68da7 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ status = 3 user = Nixtla ### Optional ### -requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 rpy2>=3.5.16 tqdm +requirements = rpy2>=3.5.12 antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 rpy2>=3.5.12 tqdm dev_requirements = nbdev black_formatting = True From 02f1a6be4cce92be3a323fd5e7728746966e3027 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:49:03 +0200 Subject: [PATCH 38/52] debug R install --- .github/workflows/ci.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1246177..efff9e6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] - os: [macos-latest, windows-latest, ubuntu-latest] + os: [windows-latest] #[macos-latest, windows-latest, ubuntu-latest] runs-on: ${{ matrix.os }} steps: - name: Clone repo @@ -27,6 +27,13 @@ jobs: uses: r-lib/actions/setup-r@v2 with: r-version: "4.3.3" + update-rtools: true # Update rtools40 compilers and libraries to the latest builds. + + - name: Check R Installation + run: | + Write-Output "R_HOME: $Env:R_HOME" + R --version + shell: pwsh - name: Set up environment uses: actions/setup-python@v4 From 936f0ec9c538f4df2ef2843523ffece1c858311c Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:03:47 +0200 Subject: [PATCH 39/52] more debug --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index efff9e6..1aec8d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,13 +26,14 @@ jobs: - name: setup R uses: r-lib/actions/setup-r@v2 with: - r-version: "4.3.3" + # r-version: "4.3.3" + install-r: false # If “false” use the existing installation in the GitHub Action image update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - name: Check R Installation run: | Write-Output "R_HOME: $Env:R_HOME" - R --version + & R --version shell: pwsh - name: Set up environment From d6791225bacb9dff914f5968cf09961479f6b4a4 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:11:11 +0200 Subject: [PATCH 40/52] more debug2 --- .github/workflows/ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1aec8d1..25ef8cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,9 +31,7 @@ jobs: update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - name: Check R Installation - run: | - Write-Output "R_HOME: $Env:R_HOME" - & R --version + run: R --version shell: pwsh - name: Set up environment From 80f5d96fd816706e592583f572ff0a472407aa38 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:13:25 +0200 Subject: [PATCH 41/52] one string --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25ef8cc..b6c81a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - name: Check R Installation - run: R --version + run: "R --version" shell: pwsh - name: Set up environment From bff351c6e8997c5a29808c21f94039d79908368b Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:15:24 +0200 Subject: [PATCH 42/52] asdf --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b6c81a4..c7aabfb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,10 +30,6 @@ jobs: install-r: false # If “false” use the existing installation in the GitHub Action image update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - - name: Check R Installation - run: "R --version" - shell: pwsh - - name: Set up environment uses: actions/setup-python@v4 with: From 8b9a01f166dbb122c943df0bb1bf2138977d9e87 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:23:12 +0200 Subject: [PATCH 43/52] alter rpy2 version --- settings.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.ini b/settings.ini index 7a68da7..c548a4b 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ status = 3 user = Nixtla ### Optional ### -requirements = rpy2>=3.5.12 antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 rpy2>=3.5.12 tqdm +requirements = rpy2>=3.5.10 antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 rpy2>=3.5.12 tqdm dev_requirements = nbdev black_formatting = True From 940977693a0387684cbe7e7a04f77dd5de09c223 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:42:35 +0200 Subject: [PATCH 44/52] update R version --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c7aabfb..45cd36a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: - name: setup R uses: r-lib/actions/setup-r@v2 with: - # r-version: "4.3.3" + r-version: "4.3.3" install-r: false # If “false” use the existing installation in the GitHub Action image update-rtools: true # Update rtools40 compilers and libraries to the latest builds. From 7e9269695a41c873675c3da44cfbd3203d829de0 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:46:11 +0200 Subject: [PATCH 45/52] exclude the R dependency --- .github/workflows/ci.yml | 7 ------- settings.ini | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45cd36a..ab7bf13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,13 +23,6 @@ jobs: - name: Clone repo uses: actions/checkout@v3 - - name: setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: "4.3.3" - install-r: false # If “false” use the existing installation in the GitHub Action image - update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - - name: Set up environment uses: actions/setup-python@v4 with: diff --git a/settings.ini b/settings.ini index c548a4b..16db827 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ status = 3 user = Nixtla ### Optional ### -requirements = rpy2>=3.5.10 antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 rpy2>=3.5.12 tqdm +requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm dev_requirements = nbdev black_formatting = True From 00a9b9ddf7756865e96bad45eff30ec58d7f6d12 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 08:36:26 +0200 Subject: [PATCH 46/52] other R settings --- .github/workflows/ci.yml | 7 +++++++ settings.ini | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab7bf13..539172d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,13 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: setup R + uses: r-lib/actions/setup-r@v2 + with: + r-version: "4.3.3" + install-r: true # If “false” use the existing installation in the GitHub Action image + update-rtools: true # Update rtools40 compilers and libraries to the latest builds. + - name: Install the library run: pip install ".[dev]" diff --git a/settings.ini b/settings.ini index 16db827..8caa527 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ status = 3 user = Nixtla ### Optional ### -requirements = antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm +requirements = rpy2==3.5.11 antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm dev_requirements = nbdev black_formatting = True From b4370283d5e5b86df6c5a41bc1a146f1d04a9929 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 14:56:45 +0200 Subject: [PATCH 47/52] explicit PATH --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 539172d..9927731 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,8 +32,10 @@ jobs: uses: r-lib/actions/setup-r@v2 with: r-version: "4.3.3" + windows-path-include-rtools: true # Whether to add Rtools to the PATH. install-r: true # If “false” use the existing installation in the GitHub Action image update-rtools: true # Update rtools40 compilers and libraries to the latest builds. + - run: Rscript -e 'print("hello")' - name: Install the library run: pip install ".[dev]" From 903876dc8c384937cebc16a9316184044a64da0b Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:08:46 +0200 Subject: [PATCH 48/52] echo rhome --- .github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9927731..e7a3f53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.10"] #["3.8", "3.9", "3.10", "3.11"] os: [windows-latest] #[macos-latest, windows-latest, ubuntu-latest] runs-on: ${{ matrix.os }} steps: @@ -35,7 +35,9 @@ jobs: windows-path-include-rtools: true # Whether to add Rtools to the PATH. install-r: true # If “false” use the existing installation in the GitHub Action image update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - - run: Rscript -e 'print("hello")' + - name: Print R_HOME + run: | + echo $R_HOME - name: Install the library run: pip install ".[dev]" From 523ccf5affd0cc0b339102d58664672d58d21358 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:12:09 +0200 Subject: [PATCH 49/52] export R functions --- nbs/tsfeatures_r.ipynb | 5 +- tsfeatures/_modidx.py | 5 +- tsfeatures/tsfeatures_r.py | 181 ++++++++++++++++++++++++++++++++++++- 3 files changed, 187 insertions(+), 4 deletions(-) diff --git a/nbs/tsfeatures_r.ipynb b/nbs/tsfeatures_r.ipynb index 5815558..6a62f6a 100644 --- a/nbs/tsfeatures_r.ipynb +++ b/nbs/tsfeatures_r.ipynb @@ -58,6 +58,7 @@ "metadata": {}, "outputs": [], "source": [ + "# |export\n", "def tsfeatures_r(\n", " ts: pd.DataFrame,\n", " freq: int,\n", @@ -80,7 +81,7 @@ " \"unitroot_kpss\",\n", " \"unitroot_pp\",\n", " ],\n", - " **kwargs\n", + " **kwargs,\n", ") -> pd.DataFrame:\n", " \"\"\"tsfeatures wrapper using r.\n", "\n", @@ -168,7 +169,7 @@ " \"unitroot_kpss\",\n", " \"unitroot_pp\",\n", " ],\n", - " **kwargs\n", + " **kwargs,\n", ") -> pd.DataFrame:\n", " \"\"\"tsfeatures wrapper using r.\n", "\n", diff --git a/tsfeatures/_modidx.py b/tsfeatures/_modidx.py index b87cd70..a0c227a 100644 --- a/tsfeatures/_modidx.py +++ b/tsfeatures/_modidx.py @@ -39,7 +39,10 @@ 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_kpss': ('tsfeatures.html#unitroot_kpss', 'tsfeatures/tsfeatures.py'), 'tsfeatures.tsfeatures.unitroot_pp': ('tsfeatures.html#unitroot_pp', 'tsfeatures/tsfeatures.py')}, - 'tsfeatures.tsfeatures_r': {}, + 'tsfeatures.tsfeatures_r': { 'tsfeatures.tsfeatures_r.tsfeatures_r': ( 'tsfeatures_r.html#tsfeatures_r', + 'tsfeatures/tsfeatures_r.py'), + 'tsfeatures.tsfeatures_r.tsfeatures_r_wide': ( 'tsfeatures_r.html#tsfeatures_r_wide', + 'tsfeatures/tsfeatures_r.py')}, 'tsfeatures.utils': { 'tsfeatures.utils.embed': ('utils.html#embed', 'tsfeatures/utils.py'), 'tsfeatures.utils.hurst_exponent': ('utils.html#hurst_exponent', 'tsfeatures/utils.py'), 'tsfeatures.utils.lambda_coef_var': ('utils.html#lambda_coef_var', 'tsfeatures/utils.py'), diff --git a/tsfeatures/tsfeatures_r.py b/tsfeatures/tsfeatures_r.py index ba9e423..2a84962 100644 --- a/tsfeatures/tsfeatures_r.py +++ b/tsfeatures/tsfeatures_r.py @@ -1,7 +1,7 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/tsfeatures_r.ipynb. # %% auto 0 -__all__ = [] +__all__ = ['tsfeatures_r', 'tsfeatures_r_wide'] # %% ../nbs/tsfeatures_r.ipynb 3 from typing import List @@ -9,3 +9,182 @@ import pandas as pd import rpy2.robjects as robjects from rpy2.robjects import pandas2ri + +# %% ../nbs/tsfeatures_r.ipynb 4 +def tsfeatures_r( + ts: pd.DataFrame, + freq: int, + features: List[str] = [ + "length", + "acf_features", + "arch_stat", + "crossing_points", + "entropy", + "flat_spots", + "heterogeneity", + "holt_parameters", + "hurst", + "hw_parameters", + "lumpiness", + "nonlinearity", + "pacf_features", + "stability", + "stl_features", + "unitroot_kpss", + "unitroot_pp", + ], + **kwargs, +) -> pd.DataFrame: + """tsfeatures wrapper using r. + + Parameters + ---------- + ts: pandas df + Pandas DataFrame with columns ['unique_id', 'ds', 'y']. + Long panel of time series. + freq: int + Frequency of the time series. + features: List[str] + String list of features to calculate. + **kwargs: + Arguments used by the original tsfeatures function. + + References + ---------- + https://pkg.robjhyndman.com/tsfeatures/reference/tsfeatures.html + """ + rstring = """ + function(df, freq, features, ...){ + suppressMessages(library(data.table)) + suppressMessages(library(tsfeatures)) + + dt <- as.data.table(df) + setkey(dt, unique_id) + + series_list <- split(dt, by = "unique_id", keep.by = FALSE) + series_list <- lapply(series_list, + function(serie) serie[, ts(y, frequency = freq)]) + + if("hw_parameters" %in% features){ + features <- setdiff(features, "hw_parameters") + + if(length(features)>0){ + hw_series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) + names(hw_series_features) <- paste0("hw_", names(hw_series_features)) + + series_features <- suppressMessages(tsfeatures(series_list, features, ...)) + series_features <- cbind(series_features, hw_series_features) + } else { + series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) + names(series_features) <- paste0("hw_", names(series_features)) + } + } else { + series_features <- suppressMessages(tsfeatures(series_list, features, ...)) + } + + setDT(series_features) + + series_features[, unique_id := names(series_list)] + + } + """ + pandas2ri.activate() + rfunc = robjects.r(rstring) + + feats = rfunc(ts, freq, features, **kwargs) + pandas2ri.deactivate() + + renamer = {"ARCH.LM": "arch_lm", "length": "series_length"} + feats = feats.rename(columns=renamer) + + return feats + + +def tsfeatures_r_wide( + ts: pd.DataFrame, + features: List[str] = [ + "length", + "acf_features", + "arch_stat", + "crossing_points", + "entropy", + "flat_spots", + "heterogeneity", + "holt_parameters", + "hurst", + "hw_parameters", + "lumpiness", + "nonlinearity", + "pacf_features", + "stability", + "stl_features", + "unitroot_kpss", + "unitroot_pp", + ], + **kwargs, +) -> pd.DataFrame: + """tsfeatures wrapper using r. + + Parameters + ---------- + ts: pandas df + Pandas DataFrame with columns ['unique_id', 'seasonality', 'y']. + Wide panel of time series. + features: List[str] + String list of features to calculate. + **kwargs: + Arguments used by the original tsfeatures function. + + References + ---------- + https://pkg.robjhyndman.com/tsfeatures/reference/tsfeatures.html + """ + rstring = """ + function(uids, seasonalities, ys, features, ...){ + suppressMessages(library(data.table)) + suppressMessages(library(tsfeatures)) + suppressMessages(library(purrr)) + + series_list <- pmap( + list(uids, seasonalities, ys), + function(uid, seasonality, y) ts(y, frequency=seasonality) + ) + names(series_list) <- uids + + if("hw_parameters" %in% features){ + features <- setdiff(features, "hw_parameters") + + if(length(features)>0){ + hw_series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) + names(hw_series_features) <- paste0("hw_", names(hw_series_features)) + + series_features <- suppressMessages(tsfeatures(series_list, features, ...)) + series_features <- cbind(series_features, hw_series_features) + } else { + series_features <- suppressMessages(tsfeatures(series_list, "hw_parameters", ...)) + names(series_features) <- paste0("hw_", names(series_features)) + } + } else { + series_features <- suppressMessages(tsfeatures(series_list, features, ...)) + } + + setDT(series_features) + + series_features[, unique_id := names(series_list)] + + } + """ + pandas2ri.activate() + rfunc = robjects.r(rstring) + + uids = ts["unique_id"].to_list() + seasonalities = ts["seasonality"].to_list() + ys = ts["y"].to_list() + + feats = rfunc(uids, seasonalities, ys, features, **kwargs) + pandas2ri.deactivate() + + renamer = {"ARCH.LM": "arch_lm", "length": "series_length"} + feats = feats.rename(columns=renamer) + + return feats From d3c432409397e2a409dc036e15a7697ac1852092 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:21:52 +0200 Subject: [PATCH 50/52] include ubuntu --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e7a3f53..e5c541e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: python-version: ["3.10"] #["3.8", "3.9", "3.10", "3.11"] - os: [windows-latest] #[macos-latest, windows-latest, ubuntu-latest] + os: [ubuntu-latest, windows-latest] #[macos-latest, windows-latest, ubuntu-latest] runs-on: ${{ matrix.os }} steps: - name: Clone repo From 233ea757b9ff07e6d2326938db892bf965db76e3 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:41:43 +0200 Subject: [PATCH 51/52] include fastcore explicitly as a dev dep --- .github/workflows/ci.yml | 3 --- settings.ini | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5c541e..d85247c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,9 +35,6 @@ jobs: windows-path-include-rtools: true # Whether to add Rtools to the PATH. install-r: true # If “false” use the existing installation in the GitHub Action image update-rtools: true # Update rtools40 compilers and libraries to the latest builds. - - name: Print R_HOME - run: | - echo $R_HOME - name: Install the library run: pip install ".[dev]" diff --git a/settings.ini b/settings.ini index 8caa527..7cf372f 100644 --- a/settings.ini +++ b/settings.ini @@ -39,7 +39,7 @@ user = Nixtla ### Optional ### requirements = rpy2==3.5.11 antropy>=0.1.4 arch>=4.11 pandas>=1.0.5 scikit-learn>=0.23.1 statsmodels>=0.13.2 supersmoother>=0.4 numba>=0.55.0 numpy>=1.21.6 tqdm -dev_requirements = nbdev +dev_requirements = nbdev fastcore black_formatting = True jupyter_hooks = True From 180136a66eb71af0d8cd33724abd40b5173e13e0 Mon Sep 17 00:00:00 2001 From: jope35 <15650945+jope35@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:55:49 +0200 Subject: [PATCH 52/52] no parallel --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d85247c..38afe05 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,4 +40,4 @@ jobs: run: pip install ".[dev]" - name: Run tests - run: nbdev_test --do_print --timing + run: nbdev_test --do_print --timing --n_workers 1