Skip to content
This repository has been archived by the owner on Jul 4, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' of github.com:PetrochukM/PyTorch-NLP
Browse files Browse the repository at this point in the history
  • Loading branch information
PetrochukM committed Apr 12, 2018
2 parents 80ccb67 + 7e0840f commit 66fba98
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 2 deletions.
6 changes: 6 additions & 0 deletions tests/_test_data/fast_text/wiki.multi.en.vec
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
200000 300
, -0.0112864 -0.00206967 -0.0515041 0.0208428 -0.069744 -0.0384644 0.0380908 -0.094775 0.0108655 0.152033 0.027994 -0.0564587 0.0470772 -0.0454188 -0.0166755 -0.0713809 -0.0540472 -0.0540862 0.0329954 0.0488296 -0.0328419 0.115758 -0.0638443 -0.00409125 -0.0516065 0.119485 0.0321063 -0.115655 -0.0524103 0.00278089 -0.0396624 0.127952 -0.0255577 0.0989258 0.0246608 -0.0919104 -0.0559959 -0.125457 0.0227993 -0.0246145 0.0305215 0.0751857 -0.0274226 -0.0235973 -0.0485655 -0.0172129 -0.0446481 -0.0395347 -0.000528293 -0.0409909 -0.0634594 0.00729788 -0.0420315 -0.0258402 -0.0518549 -0.0206143 0.0421255 0.11017 -0.078328 0.0918081 0.025868 -0.104621 0.0813534 -0.0703578 -0.077607 0.0030427 -0.0373322 -0.0446096 -0.141203 0.0131917 0.0103646 0.0116625 0.0726037 -0.16107 -0.0476511 -0.0164733 0.0343882 0.0113483 0.0318525 0.0911797 -0.0654958 0.0703042 -0.0130563 -0.0111578 0.0475259 -0.0160324 -0.0135566 -0.0335035 0.083078 -0.0289674 0.00995006 -0.0377994 0.0592405 -0.0377254 0.0519572 0.0248883 0.00372099 -0.0316352 0.0439968 0.0289611 0.00238136 0.0381502 -0.00592551 0.0302804 -0.0354001 0.0869948 -0.111436 0.032347 -0.0191435 -0.013503 0.0299959 0.035478 -0.0450695 -0.0428636 -0.0621294 -0.000911261 0.0910481 0.00249302 -0.000659245 0.0210684 0.0529413 -0.0594792 -0.00446623 0.11662 -0.0289874 -0.00050939 0.0421816 0.00985945 0.105649 0.0803595 0.0181502 0.060132 0.0862592 0.0369167 0.0151131 -0.0630794 0.0150712 0.0472058 -0.0525808 0.0615984 -0.02752 0.0400614 0.100743 0.0568972 0.068034 0.128429 0.0183192 -0.00151268 -0.0436026 -0.00374974 -0.0567754 -0.139171 0.0227102 -0.00399786 0.0761016 -0.0713273 0.0314525 -0.0551969 0.132166 0.0707963 -0.0106073 0.113536 -0.0787762 0.0765108 0.0674494 0.010921 -0.0535016 -0.0240815 0.0374418 -0.022069 -0.0927045 0.00544808 -0.00506321 0.000824105 0.0435569 -0.0248566 -0.0419292 0.0408901 -0.00399298 -0.00376689 0.0165596 -0.0978832 0.0162132 0.030314 0.00785375 0.132234 -0.0962365 -0.0741578 -0.0074757 -0.0311339 -0.048034 -0.0982242 0.068224 -5.61859e-06 -0.0922125 0.0596936 -0.00342864 0.0420919 -0.147395 -0.0190388 0.0221747 -0.0801354 -0.11472 0.0257136 0.0674591 -0.0975422 -0.00778213 0.0132205 0.0890897 -0.0116386 0.107529 -0.0208072 -0.0369956 -0.0425723 -0.0240358 0.0232987 -0.0287813 -0.073958 -0.103422 -0.0267673 -0.00557962 -0.0584415 -0.0745719 -0.0267108 -0.025926 -0.0236504 0.0140579 -0.0460561 0.132239 0.0266183 0.0290975 0.0300938 0.00451075 -0.0586169 -0.0810952 -0.0141705 0.00140073 -0.0783183 -0.0667917 -0.0925048 0.0111354 0.0899081 -0.00888412 -0.00295043 0.0696758 0.0158502 0.0698268 -0.0150396 -0.0741383 0.0452162 0.032328 0.0089231 -0.117619 0.00264979 -0.0314126 -0.0418776 0.0148325 0.0400248 0.0127119 0.028736 0.00248873 0.0434204 -0.00884905 -0.0379124 0.0016677 0.0676783 0.0224617 -0.0263903 0.00411171 -0.0748398 -0.0717852 0.0317594 -0.011148 -0.0706306 -0.0824154 -0.0936107 0.0516941 0.00081748 -0.0795606 -0.0355979 0.0563954 0.0407118 -0.0293849 -0.031527 0.0745621 -0.0582125 0.0812803 0.0687355 0.0224276 -0.029422 0.0226999 -0.0563077 0.0215253 -0.0112654 0.0125983 -0.0567705 0.00532775 0.0435525 -0.00774608 0.0724234
. -0.0469256 -0.000585261 -0.0750844 0.0272415 -0.101507 0.0131279 -0.0127297 -0.155756 -0.0185198 0.104861 0.0332065 -0.0678715 0.0444594 -0.0406334 -0.0328302 -0.11935 -0.0549535 0.00236837 0.0819509 0.0814484 -0.0462626 0.0971621 -0.0840835 0.0795438 -0.0923183 0.000739231 -0.0392808 -0.105844 0.0144945 0.0719889 -0.0398682 0.0608952 -0.0782643 0.071157 -0.0700253 -0.0260608 -0.00436908 -0.0332842 0.0869763 -0.0518834 -0.0270418 -0.017307 -0.089831 0.0412314 -0.0247335 -0.018948 0.0119113 -0.0532896 0.00509417 -0.0809839 -0.018929 -0.0345291 -0.031133 0.015731 -0.0319362 0.00361524 0.0264408 0.0935134 -0.0454603 0.0682854 0.0275181 -0.161165 0.169958 -0.171609 -0.0662795 -0.0110063 0.0349071 -0.0907389 -0.0805616 0.0066309 0.000815582 0.0120629 -0.0473859 -0.136448 -0.0465962 -0.00154172 0.041671 0.0189514 -0.016 0.0233771 -0.0350726 0.0586148 -0.00609417 0.0155194 0.0103205 -0.0992102 -0.0115469 0.0336777 0.014266 -0.0609417 -0.0548775 0.000586317 0.0546283 -0.0108771 0.060114 -0.0164987 0.0187542 -0.0452998 0.00431038 -0.108623 0.00456081 0.0584163 -0.0507052 0.0425253 -0.0137217 0.0164704 -0.0951308 -0.00346934 0.038005 0.0541976 -0.0177614 0.0145241 -0.0265481 -0.073467 -0.0507685 0.0438217 0.101246 0.00709459 0.00555912 0.0755067 0.0674366 -0.0420439 -0.000141271 0.0626393 0.00100958 0.00548352 0.00790244 -0.00589273 0.00756291 0.0761106 -0.0260844 0.0582136 0.0465709 0.0286951 -0.0394704 0.0275616 0.0343289 0.031448 -0.0632939 0.114915 -0.101529 0.0461402 0.0785937 0.108273 0.112327 -0.0142605 0.015141 0.0195207 -0.0409362 -0.00936697 -0.00393196 -0.0727026 -0.00220338 -0.0368281 0.0691638 -0.0738175 0.0450802 -0.0503124 0.0488978 0.118682 0.0132884 0.037826 -0.0322669 0.0388255 0.0463344 0.0457136 0.0189383 -0.0433361 0.00472761 0.0516596 -0.0905573 -0.00362335 -0.0845396 0.0322626 0.0292373 -0.000114818 -0.0619552 0.000879349 0.0462246 -0.0110477 -0.0150591 -0.087255 -0.00725041 0.061195 -0.00765751 0.112154 -0.0302753 -0.135295 0.0380903 0.0164514 -0.0560135 -0.035027 0.118948 0.0404353 -0.0915413 0.0155274 0.00102491 0.0310832 -0.1472 -0.0097521 -0.034709 -0.0445861 0.0123302 0.0390831 0.112517 -0.0550675 -0.0623479 0.0886401 0.0746705 -0.0329852 0.0670734 -0.0523057 -0.0154776 -0.0151672 -0.0341663 -0.0221039 -0.018236 -0.0644594 -0.0535303 0.0563977 0.0194214 -0.0269907 0.0538175 -0.0444256 -0.0518327 0.0344214 0.0234109 -0.0783783 0.0452998 0.0416076 0.0279645 0.0950675 -0.0821452 -0.0248488 0.0256748 0.0377322 -0.0212208 -0.032046 -0.0569172 -0.0819129 0.000377854 0.102656 -0.0179459 0.00713724 0.0638428 0.0158256 0.0557474 0.0223201 -0.063391 0.0106567 0.0105194 0.0170355 -0.170004 0.0126731 0.00608868 -0.00377381 0.0154489 0.00940413 -0.0315042 0.0463978 0.0076347 0.0816131 0.0161706 0.0338454 0.0227622 0.055076 0.00183192 -0.0650844 0.0392166 -0.0250735 -0.0141972 0.0186119 0.0279468 -0.0703884 -0.0263737 -0.0987752 -0.0109903 -0.0119759 -0.108002 -0.0447043 0.041986 0.0662373 -0.0534586 -0.0970227 0.063353 -0.0686359 0.040206 0.129738 0.0221136 -0.0462964 0.0402584 -0.0925421 -0.0180574 -0.0577913 0.0411613 -0.0921367 -0.0245916 0.0267626 -0.0513555 0.0166127
the -0.0324474 -0.0462027 -0.00872643 0.0993624 0.0146613 -0.0198258 -0.0810911 -0.0362278 0.0445008 0.0401815 -0.0198814 -0.117326 0.0906365 -0.0304146 -0.0319765 -0.037376 -0.0248697 -0.00993326 0.00173625 0.071948 -0.0833905 0.0381676 -0.114113 -0.0287737 -0.0665892 -0.0365456 -0.000624522 0.00978774 0.0282418 0.0310399 -0.0772521 0.0755238 -0.0527877 0.122521 -0.0138329 -0.0879198 0.00357867 -0.0593036 0.0416396 -0.0588169 0.0266258 -0.00114942 -0.0418562 0.0141145 0.0388292 -0.059681 -0.0202956 0.0444183 0.0252516 -0.0316458 0.0351938 -0.031766 -0.047344 0.0346892 -0.024958 0.0289366 0.0426498 0.0218352 -0.0254174 0.0485806 -0.025182 -0.0903633 0.160737 -0.0379248 0.0230897 -0.0987615 -0.121324 -0.092628 -0.11157 0.0344627 -0.185628 -0.0408986 0.0305586 -0.0653029 -0.0376571 -0.0301196 0.0361289 0.121165 0.0104522 -0.035387 0.0552013 0.0363226 -0.0427432 0.0555291 -0.00308546 -0.0830081 -0.0325407 0.041505 -0.0461163 -0.0614689 -0.0411753 0.00597207 0.168042 -0.134708 0.0270638 -0.0438065 0.0364006 0.0121269 0.00179654 -0.0138085 -0.062492 -0.0160732 -0.000860326 -0.0372806 -0.100937 -0.0582805 0.00380385 0.0108739 -0.0067801 0.0319279 -0.00429016 -0.0411564 -0.0506422 -0.0673888 0.0426046 -0.00305348 0.0787569 0.0924194 0.0559314 0.0449169 0.136362 0.113209 -0.0377922 0.106017 0.0130159 0.0348615 0.0637584 0.102019 0.045864 0.0634405 -0.0869911 0.0446686 -0.012407 0.0166697 -0.060272 0.0296508 -0.0298062 0.0690724 -0.0280054 0.0748583 0.0474294 0.0275028 0.0254666 0.0184317 0.00850046 0.111624 0.0232536 0.0176376 0.0326713 0.0471344 0.066177 -0.0353393 -0.0386831 -0.0335559 -0.0354103 -0.0348263 0.0157385 -0.0293796 0.0709845 0.0299031 -0.0602124 0.0731896 -0.0344279 0.0419451 0.0773117 0.0119258 -0.0549927 0.0376705 0.0808378 -0.0424492 -0.0976788 -0.0385813 -0.0333711 -0.0383653 -0.0519533 0.0641259 0.00494627 0.12256 -0.00106191 -0.0130795 0.0224391 0.0137877 -0.0242573 0.0543968 -0.0163632 0.119417 0.0915652 -0.0755138 0.0565075 0.0235357 -0.000859482 -0.0818311 0.0952651 0.0873189 -0.0215312 0.0239941 -0.0271477 0.0133839 -0.0870209 0.0596959 -0.00732442 -0.0229839 -0.0220115 0.0562194 -0.00690774 -0.0795863 -0.0118359 0.00591446 0.0220542 0.0509153 0.117505 0.0507564 -0.00442912 -0.0265269 0.0327935 -0.0524549 0.049343 -0.130914 -0.0674385 0.0147963 -0.00244783 -0.0163409 -0.0241316 0.0725539 -0.0165167 0.036789 -0.0913616 0.0197319 0.001773 -0.0148669 0.0653923 0.0911728 -0.0637634 -0.013522 -0.027694 -0.00776345 0.00917837 -0.047716 0.00540889 -0.0153352 -0.0411236 -0.0176694 0.0874033 0.0221386 0.104041 0.100415 0.059532 -0.0609971 0.0649702 -0.0235387 0.0257209 0.120822 0.0129111 -0.00860525 -0.0846023 0.110179 -0.0337957 -0.0553205 0.0165544 -0.0602025 0.0128028 0.0791542 -0.0180503 0.00456763 -0.0547941 -0.0393576 -0.0545954 0.0424899 0.00484421 -0.117246 -0.092469 -0.035676 -0.0123231 0.037078 -0.0142381 0.0156669 0.0441665 0.118557 0.0834352 -0.0292853 0.0313498 -0.0287121 0.0095424 0.00800034 0.0566465 -0.0370065 0.0257363 0.103216 -0.0431097 0.0543968 0.0322537 -0.107636 -0.0187148 0.0406548 -0.0198114 -0.0254944 -0.0504833 0.0826505 -0.0649553 0.0175795
</s> 0.0101791 -0.0148314 0.088268 0.0354097 -0.0375626 -0.0808551 -0.102822 -0.102606 -0.0315088 0.00637001 -0.0481918 -0.0903824 -0.0533505 -0.0534963 -0.0545535 -0.127325 -0.0274925 0.0141169 0.0538102 0.10439 0.0320293 0.0161234 -0.0666754 -0.10076 -0.115916 -0.0671656 0.0028815 -0.137916 0.0405055 0.062572 -0.0297548 0.152041 -0.0536441 -0.0325903 -0.0567794 -0.0408782 -0.0242741 0.0459862 -0.00922498 -0.0306521 -0.00236746 -0.0560989 -0.0493584 -0.0314561 0.0502334 0.0913242 0.0573688 0.00302531 0.0188299 -0.0221171 -0.00175493 -0.134951 0.0092422 -0.135731 -0.0718178 0.0652455 0.0260099 -0.0498668 0.00914134 0.0328374 -0.0866011 0.0096256 0.0453198 -0.0154431 -0.0673276 0.0668354 0.0448641 -0.0149149 -0.0618996 -0.0153694 -0.0619097 -0.0152487 0.0969589 -0.000337286 -0.0778049 -0.0306784 0.00659078 0.0203773 -0.0454231 -0.0181012 -0.0200897 0.00780722 -0.0210072 0.0226214 0.0198017 -0.0253496 0.0704933 0.00840086 -0.019792 0.0260099 0.0429198 0.07231 0.0033597 0.019828 0.010934 0.0215034 0.0580838 0.00162397 -0.0791619 0.022666 -0.0132468 0.0507641 0.0452692 -0.0507722 0.0678461 0.0285274 0.0373419 -0.0497493 0.0236787 -0.0123911 0.0283188 -0.0125268 -0.129205 0.0522871 -0.0916179 -0.0460712 0.0223703 -0.0395941 -0.0775557 0.0247359 0.0425106 -0.00481655 -0.00596149 0.137039 -0.0278327 -0.00359889 -0.0454333 0.00737176 -0.0615269 0.0539317 -0.0687575 -0.0591228 -0.0250843 0.116579 0.0900726 0.0126106 -0.00922276 -0.126353 0.0735982 0.0848694 -0.0781593 -0.0164128 0.0665944 0.0170994 -0.000760995 -0.0228726 0.0325782 -0.0829048 0.0440114 0.0517038 -0.0132391 -0.0351362 -0.016951 0.0509868 0.152393 -0.108265 0.0344233 0.0191643 0.0244119 0.045413 0.000226012 0.033838 0.104994 -0.0353226 0.00515499 0.0441674 0.0927278 -0.0938053 0.0612697 0.051396 0.00418443 -0.0555703 -0.0485584 0.0671514 -0.0278186 0.0880148 -0.0276768 -0.0714229 -0.0512704 -0.188269 0.0237354 -0.0291371 0.0435173 0.0076138 -0.101516 0.0843752 -0.0116963 -0.0492977 0.0479549 -0.000374188 0.070629 -0.124553 0.00463264 0.0659605 -0.169872 -0.0327038 -0.0352314 0.0432681 -0.0885637 0.0289204 0.0541545 -0.00950752 0.0276059 0.0426544 0.101866 -0.0029212 0.0187742 -0.00459153 0.0809077 -0.0203773 -0.0279016 0.0420043 0.0262914 0.0546143 0.0299938 -0.0452935 -0.00578488 -0.0719758 0.00577658 0.00108293 0.046766 0.0334775 -0.0126155 0.0344415 -0.0767294 -0.0731971 0.0555399 -0.0626996 0.106523 -0.0869616 0.0101431 -0.0197468 -0.00532917 -0.0288434 -0.019809 0.00270712 0.026421 -0.0502537 -0.00724964 -0.0119046 -0.012993 0.0820055 0.0104029 -0.0416681 0.0207439 -0.0153029 -0.0371677 0.0364487 -0.0488946 -0.061695 -0.0603218 -0.0422514 -0.0937547 0.00385672 0.046687 -0.0338846 0.0284464 -0.0836562 0.055627 0.0234012 0.0204766 -0.0334349 -0.00915572 0.06736 -0.12449 -0.0719535 0.011601 -0.0381824 -0.0313447 0.0790808 -0.0262894 -0.0310004 0.0649721 0.0611623 0.000383849 -0.0229495 -0.105889 0.0395981 0.0334775 -0.0369793 -0.0171292 0.0269638 0.0750422 -0.042859 -0.00807133 0.0935906 0.014814 -0.0135558 -0.00436995 0.0733592 -0.0196164 -0.0966571 -0.00557201 0.0525869 -0.00212016 -0.0603866 -0.0478171 0.0415709 0.152274 0.0196778
of 0.0211915 -0.123873 0.00805774 0.0893485 0.0263586 0.037102 -0.0157477 -0.0296886 0.0629916 0.0775162 0.0633043 -0.0591836 0.101381 0.0129214 -0.0955318 -0.00197633 -0.0486191 -0.0179622 0.0365744 0.0332926 -0.106213 0.0232709 -0.0474988 -0.054138 -0.0881023 0.0323773 0.0462874 0.0235948 0.0383273 0.097946 -0.0126274 0.129396 -0.0700346 0.158137 0.0321224 -0.0388888 -0.017848 -0.105415 -0.00251888 -0.0272852 0.0268198 -0.0275206 0.0281745 0.0332961 0.023765 -0.0527398 -0.0473599 -0.0402904 0.0145762 -0.0824315 0.0386886 0.00565175 -0.0223929 0.0121754 0.0551323 -0.0336752 0.0682674 0.0336413 -0.0709638 0.0718496 -0.113326 -0.126452 0.131055 -0.0727702 -0.0245414 -0.0560051 -0.0855491 -0.0656447 -0.058072 -0.0651454 -0.102149 -0.0691054 0.0566304 -0.0118732 -0.05396 -0.0189453 0.0539035 0.151493 0.0214663 -0.00439079 0.0879156 0.0357333 -0.065814 -0.0987276 0.0523838 -0.0266191 -0.0337442 0.04759 -0.0417264 -0.0883108 -0.0099609 0.0692357 0.101281 -0.100334 0.0228762 -0.0420955 0.0249122 0.0614111 0.0332379 0.00793095 0.0278293 0.019478 0.0382665 -0.0507772 -0.10838 -0.0566608 -0.0360473 -0.0260885 0.0104451 -0.0090091 0.016993 0.0189175 -0.0443247 -0.0671471 0.0559791 -0.0105037 0.0197004 0.0554841 0.0573686 0.0834041 0.107412 0.0538688 -0.0879113 0.0597567 -0.0100525 0.0181224 -0.00606469 0.0790012 0.0703863 0.0271758 -0.034729 0.057247 -0.0336031 0.00882022 -0.0180147 0.00891184 -0.0491098 0.0635648 0.0347863 0.0160008 0.059227 -0.00425336 0.0511462 0.0588232 0.0993919 0.0921883 -0.0346413 0.0600564 -0.0330499 0.0124025 0.0496786 -0.067108 -0.0361558 -0.0248228 -0.0431402 -0.0740902 -0.031158 -0.0223608 0.115362 0.00816282 -0.118193 0.0550585 -0.00913807 0.0255453 0.13119 -0.0153083 -0.0416638 -0.01697 0.0884454 -0.07759 -0.113712 -0.0477898 -0.0681458 -0.0125067 -0.103656 0.0677116 0.00378571 0.104442 0.0527094 0.0596091 0.00691575 -0.00581241 -0.0299804 0.0447372 -0.0312943 0.121415 0.0831827 -0.0599652 0.0309999 -0.0136986 -0.0153274 -0.0323964 0.0886451 0.0311984 -0.0298184 -0.0178971 -0.0378015 -0.0506512 -0.087959 0.0317794 -0.0319135 -0.121368 -0.0755231 0.027792 -0.0218819 -0.0192905 0.0036851 0.0282878 0.0327316 0.0483021 0.108428 0.0724966 0.0175736 -0.0631045 0.00727875 -0.0489491 0.00757055 -0.0832695 -0.0195228 0.0371853 0.0344511 -0.0200664 -0.0879503 0.114086 -0.0395366 0.0337477 -0.0671037 0.0183764 0.0509856 -0.0373603 0.0380421 0.0927702 -0.0590403 -0.0130208 -0.00914154 0.011386 -0.0276773 -0.0363217 0.00719321 0.0189062 -0.0169396 -0.00433117 0.0583021 0.0274463 0.0991097 0.064455 0.014297 -0.0825531 0.0861397 -0.0971297 8.18366e-05 0.0755752 -0.0164581 0.00897133 -0.0364971 0.0799782 0.0123825 -0.0425557 0.0865782 0.0346547 0.0538861 0.041359 -0.0153456 0.0365306 -0.0357372 -0.0329101 0.0717975 0.0502865 0.00565305 -0.0351254 -0.00451584 -0.0341884 -0.0482935 0.0125905 -0.0274902 -0.0147329 0.00893268 0.114859 0.000751367 0.00825096 0.0613633 -0.0343508 0.0666781 0.0316426 0.0378497 -0.0024112 -0.00871601 0.0652279 -0.00666564 0.0727007 0.0736256 -0.146231 -0.0261771 0.0373847 -0.0282244 0.00209861 -0.0438384 0.0603994 -0.0595483 -0.0172405
22 changes: 22 additions & 0 deletions tests/word_to_vector/test_fast_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,30 @@ def test_fasttext_simple(mock_urlretrieve):
assert 'the' in vectors.stoi
assert len(vectors) == 1

# Test implementation of __contains()__
assert 'the' in vectors

# Test with the unknown characters
assert len(vectors['漢字']) == 300

# Clean up
os.remove(os.path.join(directory, 'wiki.simple.vec.pt'))


@mock.patch('urllib.request.urlretrieve')
def test_aligned_fasttext(mock_urlretrieve):
directory = 'tests/_test_data/fast_text/'

# Make sure URL has a 200 status
mock_urlretrieve.side_effect = urlretrieve_side_effect

# Parse the aligned FastText embeddings
vectors = FastText(aligned=True, cache=directory)

# Assert the embeddings' dimensionality
assert len(vectors['the']) == 300
# Our test file contains only five words to keep the file size small
assert len(vectors) == 5

# Clean up
os.remove(os.path.join(directory, 'wiki.multi.en.vec.pt'))
14 changes: 12 additions & 2 deletions torchnlp/word_to_vector/fast_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,15 @@ class FastText(_PretrainedWordVectors):
References:
* https://arxiv.org/abs/1607.04606
* https://fasttext.cc/
* https://arxiv.org/abs/1710.04087
Args:
language (str): language of the vectors
aligned (bool): if True: use multilingual embeddings where words with
the same meaning share (approximately) the same position in the
vector space across languages. if False: use regular FastText
embeddings. All available languages can be found under
https://github.com/facebookresearch/MUSE#multilingual-word-embeddings
cache (str, optional): directory for cached vectors
unk_init (callback, optional): by default, initialize out-of-vocabulary word vectors
to zero vectors; can be any function that takes in a Tensor and
Expand All @@ -66,8 +72,12 @@ class FastText(_PretrainedWordVectors):
[torch.FloatTensor of size 100]
"""
url_base = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.{}.vec'
aligned_url_base = 'https://s3.amazonaws.com/arrival/embeddings/wiki.multi.{}.vec'

def __init__(self, language="en", **kwargs):
url = self.url_base.format(language)
def __init__(self, language="en", aligned=False, **kwargs):
if aligned:
url = self.aligned_url_base.format(language)
else:
url = self.url_base.format(language)
name = os.path.basename(url)
super(FastText, self).__init__(name, url=url, **kwargs)
3 changes: 3 additions & 0 deletions torchnlp/word_to_vector/pretrained_word_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ def __init__(self,
self.name = name
self.cache(name, cache, url=url)

def __contains__(self, token):
return token in self.stoi

def __getitem__(self, token):
if token in self.stoi:
return self.vectors[self.stoi[token]]
Expand Down

0 comments on commit 66fba98

Please sign in to comment.