diff --git a/ijulia_tutorials/readFamaFrenchRaw.ipynb b/ijulia_tutorials/readFamaFrenchRaw.ipynb index ff20e05..615e068 100644 --- a/ijulia_tutorials/readFamaFrenchRaw.ipynb +++ b/ijulia_tutorials/readFamaFrenchRaw.ipynb @@ -1,520 +1,435 @@ { - "metadata": { - "language": "Julia", - "name": "", - "signature": "sha256:d0c2c7176cdf38e58bdfba4b342892669e3cf028fc4fb88384379e55ffdcb3a9" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ + "cells": [ { - "cells": [ - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Accessible data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The function tries to make data from Kenneth R. French's data [library](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html) accessible. " - ] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "What the function expects" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The function expects to find a .zip file which contains a .txt file of data. Thereby the .txt file starts with a description of the data, and lists several data sets afterwards. The individual data sets are separated by empty lines, and each data set has one header line followed by one or two lines of column names. The first column of the data contains dates given without any separator.\n", - "\n", - "The obstacle for full automation was dealing with the column names, since a single variable name sometimes may consist of two parts separated by whitespace. Hence, it is very difficult to tell automatically, whether two separated strings refer to two different column names or just one single column name.\n", - "\n", - "As an example, the following cell shows an extract of data *6 Portfolios formed on size and momentum (2 x 3)* - the comment signs `##` at the beginning of each line are not part of the original file, and shall only avoid execution of the lines by julia. " - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "##This file was created by CMPT_ME_PRIOR_RETS using the 201405 CRSP database.\n", - "##It contains value- weighted returns for the intersections of 2 ME portfolios\n", - "##and 3 prior return portfolios.\n", - "##\n", - "##The portfolios are constructed monthly. ME is market cap at the end of the\n", - "##previous month. PRIOR_RET is from -12 to - 2.\n", - "##\n", - "##Missing data are indicated by -99.99 or -999.\n", - "##\n", - "##\n", - "## Average Value Weighted Returns -- Monthly\n", - "## Small Big \n", - "## Low 2 High Low 2 High \n", - "##192701 0.01 3.79 0.39 -0.63 0.23 0.00\n", - "##192702 7.13 6.24 5.75 5.59 3.78 4.49\n", - "##192703 -3.26 -2.95 -2.30 -7.66 -0.22 2.29\n", - "##192704 -0.56 -0.96 3.36 -1.90 0.78 1.89\n", - "##192705 2.47 11.39 7.00 4.21 4.87 7.10\n", - "## .\n", - "## .\n", - "## .\n", - "##201401 -2.48 -3.55 -2.59 -5.00 -3.35 -1.51\n", - "##201402 3.90 4.12 5.49 3.90 4.13 6.62\n", - "##201403 0.61 1.50 -1.20 2.02 1.63 -2.72\n", - "##201404 -2.45 -3.03 -5.27 2.70 0.73 -2.23\n", - "##201405 0.64 0.89 -0.50 0.61 2.40 3.43\n", - "##\n", - "##\n", - "## Average Equal Weighted Returns -- Monthly\n", - "## Small Big \n", - "## Low 2 High Low 2 High \n", - "##192701 1.77 3.33 -0.81 0.36 0.62 0.95\n", - "##192702 6.82 6.46 6.08 7.93 4.98 5.10\n", - "##192703 -4.55 -1.02 -3.56 -4.46 -1.19 0.63\n", - "##192704 2.13 -1.05 3.51 -1.74 0.95 1.33\n", - "##192705 2.72 11.36 7.54 5.51 4.60 8.43\n", - "##192706 -2.86 -1.33 -2.61 -4.19 -1.66 -1.59\n", - "##192707 5.32 4.88 6.45 6.01 6.72 6.85" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 26 - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Application" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The function needs to be called with some url given as `ASCIIString`. It returns a tuple consisting of three parts:\n", - "- the actual data sets as `Array{Any,1}`\n", - "- the description of each individual data set as `Array{Symbol,1}`\n", - "- the column / variable names as `Array{Union(UTF8String,ASCIIString),1})`" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dataUrl = \"http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_ME_Prior_12_2.zip\"\n", - "(data, dataNames, varnames) = readFamaFrenchRaw(dataUrl)\n", - "\n", - "(typeof(data), typeof(dataNames), typeof(varnames))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "--2014-07-16 17:59:31-- http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_ME_Prior_12_2.zip\n" - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "Resolving mba.tuck.dartmouth.edu (mba.tuck.dartmouth.edu)... " - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "129.170.38.49\n", - "Connecting to mba.tuck.dartmouth.edu (mba.tuck.dartmouth.edu)|129.170.38.49|:80... " - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "connected.\n", - "HTTP request sent, awaiting response... " - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "200 OK\n", - "Length: 104862" - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - " (102K) [application/x-zip-compressed]\n", - "Saving to: `/tmp/julialDA9T3'\n", - "\n", - " 0K .." - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "......" - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - ".. ......" - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - ".." - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - ".. .........." - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - " .......... .......... 48% 86.9K 1s\n", - " 50K ...." - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "...... .......... .......... ....." - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - "....." - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Archive: /tmp/julialDA9T3\n" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - " inflating: /tmp/6_Portfolios_ME_Prior_12_2.txt \n" - ] - }, - { - "output_type": "stream", - "stream": "stderr", - "text": [ - " .......... 97% 178K 0s\n", - " 100K .. 100% 3.87M=0.9s\n", - "\n", - "2014-07-16 17:59:33 (120 KB/s) - `/tmp/julialDA9T3' saved [104862/104862]\n", - "\n" - ] - }, - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 16, - "text": [ - "(Array{Any,1},Array{Symbol,1},Array{Union(UTF8String,ASCIIString),1})" - ] - } - ], - "prompt_number": 16 - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Data format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Each data set is one entry in an `Array{Any,1}`. Hence, the number of data sets can be determined with `length`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "nData = length(data)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 19, - "text": [ - "8" - ] - } - ], - "prompt_number": 19 - }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessible data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function tries to make data from Kenneth R. French's data [library](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html) accessible. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What the function expects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function expects to find a .zip file which contains a .txt file of data. Thereby the .txt file starts with a description of the data, and lists several data sets afterwards. The individual data sets are separated by empty lines, and each data set has one header line followed by one or two lines of column names. The first column of the data contains dates given without any separator.\n", + "\n", + "The obstacle for full automation was dealing with the column names, since a single variable name sometimes may consist of two parts separated by whitespace. Hence, it is very difficult to tell automatically, whether two separated strings refer to two different column names or just one single column name.\n", + "\n", + "As an example, the following cell shows an extract of data *6 Portfolios formed on size and momentum (2 x 3)* - the comment signs `##` at the beginning of each line are not part of the original file, and shall only avoid execution of the lines by julia. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "##This file was created by CMPT_ME_PRIOR_RETS using the 201405 CRSP database.\n", + "##It contains value- weighted returns for the intersections of 2 ME portfolios\n", + "##and 3 prior return portfolios.\n", + "##\n", + "##The portfolios are constructed monthly. ME is market cap at the end of the\n", + "##previous month. PRIOR_RET is from -12 to - 2.\n", + "##\n", + "##Missing data are indicated by -99.99 or -999.\n", + "##\n", + "##\n", + "## Average Value Weighted Returns -- Monthly\n", + "## Small Big \n", + "## Low 2 High Low 2 High \n", + "##192701 0.01 3.79 0.39 -0.63 0.23 0.00\n", + "##192702 7.13 6.24 5.75 5.59 3.78 4.49\n", + "##192703 -3.26 -2.95 -2.30 -7.66 -0.22 2.29\n", + "##192704 -0.56 -0.96 3.36 -1.90 0.78 1.89\n", + "##192705 2.47 11.39 7.00 4.21 4.87 7.10\n", + "## .\n", + "## .\n", + "## .\n", + "##201401 -2.48 -3.55 -2.59 -5.00 -3.35 -1.51\n", + "##201402 3.90 4.12 5.49 3.90 4.13 6.62\n", + "##201403 0.61 1.50 -1.20 2.02 1.63 -2.72\n", + "##201404 -2.45 -3.03 -5.27 2.70 0.73 -2.23\n", + "##201405 0.64 0.89 -0.50 0.61 2.40 3.43\n", + "##\n", + "##\n", + "## Average Equal Weighted Returns -- Monthly\n", + "## Small Big \n", + "## Low 2 High Low 2 High \n", + "##192701 1.77 3.33 -0.81 0.36 0.62 0.95\n", + "##192702 6.82 6.46 6.08 7.93 4.98 5.10\n", + "##192703 -4.55 -1.02 -3.56 -4.46 -1.19 0.63\n", + "##192704 2.13 -1.05 3.51 -1.74 0.95 1.33\n", + "##192705 2.72 11.36 7.54 5.51 4.60 8.43\n", + "##192706 -2.86 -1.33 -2.61 -4.19 -1.66 -1.59\n", + "##192707 5.32 4.88 6.45 6.01 6.72 6.85" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Application" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function needs to be called with some url given as `ASCIIString`. It returns a tuple consisting of three parts:\n", + "- the actual data sets as `Array{Any,1}`\n", + "- the description of each individual data set as `Array{Symbol,1}`\n", + "- the column / variable names as `Array{Union(UTF8String,ASCIIString),1})`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "using EconDatasets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Their descriptions can be found in variable `dataNames`." + "name": "stderr", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 103k 100 103k 0 0 94912 0 0:00:01 0:00:01 --:--:-- 95004\n" ] }, { - "cell_type": "code", - "collapsed": false, - "input": [ - "dataNames" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 21, - "text": [ - "8-element Array{Symbol,1}:\n", - " symbol(\" Average Value Weighted Returns -- Monthly\\r\\n\")\n", - " symbol(\" Average Equal Weighted Returns -- Monthly\\r\\n\")\n", - " symbol(\" Average Value Weighted Returns -- Annual\\r\\n\") \n", - " symbol(\" Average Equal Weighted Returns -- Annual\\r\\n\") \n", - " symbol(\" Number of Firms in Portfolios\\r\\n\") \n", - " symbol(\" Average Firm Size\\r\\n\") \n", - " symbol(\" Equally-Weighted Average of Prior Returns\\r\\n\")\n", - " symbol(\" Value-Weighted Average of Prior Returns\\r\\n\") " - ] - } - ], - "prompt_number": 21 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Any individual data set is stored as `Timematr`, with default names for the individual columns." + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: /tmp/juliaiI5UdL\n", + " inflating: /tmp/6_Portfolios_ME_Prior_12_2.txt \n" ] }, { - "cell_type": "code", - "collapsed": false, - "input": [ - "data[1]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "html": [ - "

Timematr{Date{ISOCalendar}}

Dimensions: (1049, 6)

From: 1927-01-31, To: 2014-05-31

idxx1x2x3x4x5x6
11927-01-310.013.790.39-0.630.230.0
21927-02-287.136.245.755.593.784.49
31927-03-31-3.26-2.95-2.3-7.66-0.222.29
41927-04-30-0.56-0.963.36-1.90.781.89
51927-05-312.4711.397.04.214.877.1
61927-06-30-2.13-0.89-2.3-0.74-2.09-1.86
71927-07-313.024.146.953.287.879.4
81927-08-31-0.020.040.091.292.113.35
91927-09-302.50.352.823.364.886.23
101927-10-31-4.34-1.87-3.52-4.54-3.39-5.5
111927-11-3011.528.566.985.16.618.35
121927-12-310.25.784.47.71.73.45
131928-01-313.253.620.73-2.17-0.47-0.86
141928-02-29-3.19-2.28-4.85-0.76-1.04-1.35
151928-03-316.257.367.836.985.3314.84
161928-04-307.8810.476.1411.244.052.03
171928-05-313.072.373.28-1.51.063.7
181928-06-30-9.31-6.97-6.14-5.19-4.07-4.34
191928-07-31-1.43-0.321.76-0.20.771.39
201928-08-314.524.186.584.46.219.67
211928-09-303.754.887.891.581.874.89
221928-10-310.081.395.480.850.783.69
231928-11-3010.7712.3810.558.6113.4812.42
241928-12-31-2.630.22-1.56-0.48-0.051.67
251929-01-310.042.013.123.924.945.74
261929-02-28-0.230.835.03-0.65-1.071.14
271929-03-31-4.39-5.66-4.18-0.240.92-1.52
281929-04-301.91-0.452.08-1.741.72.75
291929-05-31-10.67-9.54-13.01-9.19-4.48-6.89
301929-06-303.976.6611.365.017.6414.44
" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 20, - "text": [ - "Timematr{Date{ISOCalendar}}(1049x6 DataFrame\n", - "|-------|-------|-------|-------|-------|-------|-------|\n", - "| Row # | x1 | x2 | x3 | x4 | x5 | x6 |\n", - "| 1 | 0.01 | 3.79 | 0.39 | -0.63 | 0.23 | 0.0 |\n", - "| 2 | 7.13 | 6.24 | 5.75 | 5.59 | 3.78 | 4.49 |\n", - "| 3 | -3.26 | -2.95 | -2.3 | -7.66 | -0.22 | 2.29 |\n", - "| 4 | -0.56 | -0.96 | 3.36 | -1.9 | 0.78 | 1.89 |\n", - "| 5 | 2.47 | 11.39 | 7.0 | 4.21 | 4.87 | 7.1 |\n", - "| 6 | -2.13 | -0.89 | -2.3 | -0.74 | -2.09 | -1.86 |\n", - "| 7 | 3.02 | 4.14 | 6.95 | 3.28 | 7.87 | 9.4 |\n", - "| 8 | -0.02 | 0.04 | 0.09 | 1.29 | 2.11 | 3.35 |\n", - "| 9 | 2.5 | 0.35 | 2.82 | 3.36 | 4.88 | 6.23 |\n", - "| 10 | -4.34 | -1.87 | -3.52 | -4.54 | -3.39 | -5.5 |\n", - "| 11 | 11.52 | 8.56 | 6.98 | 5.1 | 6.61 | 8.35 |\n", - "\u22ee\n", - "| 1038 | -0.32 | 0.3 | 0.51 | -2.66 | -0.55 | -2.19 |\n", - "| 1039 | 7.23 | 7.29 | 8.27 | 4.74 | 5.08 | 7.07 |\n", - "| 1040 | -3.2 | -3.05 | -2.42 | -2.25 | -2.79 | -2.92 |\n", - "| 1041 | 5.05 | 5.76 | 7.45 | 1.93 | 3.86 | 5.58 |\n", - "| 1042 | 2.14 | 3.52 | 2.25 | 3.49 | 5.13 | 3.7 |\n", - "| 1043 | 4.84 | 4.97 | 4.47 | 2.35 | 3.18 | 3.46 |\n", - "| 1044 | 2.89 | 1.63 | 2.76 | 2.82 | 2.48 | 3.04 |\n", - "| 1045 | -2.48 | -3.55 | -2.59 | -5.0 | -3.35 | -1.51 |\n", - "| 1046 | 3.9 | 4.12 | 5.49 | 3.9 | 4.13 | 6.62 |\n", - "| 1047 | 0.61 | 1.5 | -1.2 | 2.02 | 1.63 | -2.72 |\n", - "| 1048 | -2.45 | -3.03 | -5.27 | 2.7 | 0.73 | -2.23 |\n", - "| 1049 | 0.64 | 0.89 | -0.5 | 0.61 | 2.4 | 3.43 |,[1927-01-31,1927-02-28,1927-03-31,1927-04-30,1927-05-31,1927-06-30,1927-07-31,1927-08-31,1927-09-30,1927-10-31 \u2026 2013-08-31,2013-09-30,2013-10-31,2013-11-30,2013-12-31,2014-01-31,2014-02-28,2014-03-31,2014-04-30,2014-05-31])" - ] - } - ], - "prompt_number": 20 - }, - { - "cell_type": "heading", - "level": 3, + "data": { + "text/plain": [ + "(Array{Any,1},Array{Symbol,1},Array{Union(ASCIIString,UTF8String),1})" + ] + }, + "execution_count": 3, "metadata": {}, - "source": [ - "Data processing" - ] - }, + "output_type": "execute_result" + } + ], + "source": [ + "dataUrl = \"http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_ME_Prior_12_2_TXT.zip\"\n", + "(data, dataNames, varnames) = readFamaFrenchRaw(dataUrl)\n", + "\n", + "(typeof(data), typeof(dataNames), typeof(varnames))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data format" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each data set is one entry in an `Array{Any,1}`. Hence, the number of data sets can be determined with `length`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "markdown", + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 4, "metadata": {}, - "source": [ - "For a clean end result, one only needs to rename the individual variable names. The variable names can accessed from variable `varnames`. Note that the function assumes that the column names of all data sets are the same!" - ] - }, + "output_type": "execute_result" + } + ], + "source": [ + "nData = length(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Their descriptions can be found in variable `dataNames`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "code", - "collapsed": false, - "input": [ - "varnames" - ], - "language": "python", + "data": { + "text/plain": [ + "8-element Array{Symbol,1}:\n", + " symbol(\" Average Value Weighted Returns -- Monthly\\r\\n\")\n", + " symbol(\" Average Equal Weighted Returns -- Monthly\\r\\n\")\n", + " symbol(\" Average Value Weighted Returns -- Annual\\r\\n\") \n", + " symbol(\" Average Equal Weighted Returns -- Annual\\r\\n\") \n", + " symbol(\" Number of Firms in Portfolios\\r\\n\") \n", + " symbol(\" Average Firm Size\\r\\n\") \n", + " symbol(\" Equally-Weighted Average of Prior Returns\\r\\n\")\n", + " symbol(\" Value-Weighted Average of Prior Returns\\r\\n\") " + ] + }, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 22, - "text": [ - "2-element Array{Union(UTF8String,ASCIIString),1}:\n", - " \" Small Big \\r\\n\"\n", - " \" Low 2 High Low 2 High \\r\\n\"" - ] - } - ], - "prompt_number": 22 - }, + "output_type": "execute_result" + } + ], + "source": [ + "dataNames" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Any individual data set is stored as `Timematr`, with default names for the individual columns." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "markdown", + "data": { + "text/html": [ + "

Timematr{Date}

Dimensions: (1064, 6)

From: 1927-01-31, To: 2015-08-31

idxx1x2x3x4x5x6
11927-01-31-0.093.620.4-0.40.270.0
21927-02-287.266.116.057.273.84.47
31927-03-31-3.38-2.88-2.06-3.63-0.242.23
41927-04-30-0.51-0.563.32-2.720.751.82
51927-05-312.3911.216.825.524.857.11
61927-06-30-2.06-0.44-2.88-3.72-2.12-1.87
71927-07-313.144.296.934.537.849.38
81927-08-310.0-0.440.11.212.13.35
91927-09-302.530.33.193.024.876.22
101927-10-31-4.5-1.8-3.34-2.11-3.39-5.5
111927-11-3011.628.647.025.126.028.35
121927-12-310.375.744.531.231.73.46
131928-01-312.923.470.66-1.93-0.24-0.87
141928-02-29-3.5-1.92-4.77-0.81-1.04-1.58
151928-03-316.457.817.576.755.315.37
161928-04-308.210.425.9811.144.022.05
171928-05-312.762.374.01-1.211.063.24
181928-06-30-9.28-7.31-6.27-5.17-4.05-4.09
191928-07-31-1.67-0.571.99-0.280.781.56
201928-08-314.952.416.294.36.2610.21
211928-09-304.25.337.611.431.895.25
221928-10-310.30.927.290.770.783.88
231928-11-3010.0611.6710.558.7813.4912.15
241928-12-31-2.670.65-1.4-0.470.071.67
251929-01-31-0.112.192.843.885.25.79
261929-02-28-0.121.223.19-1.61-0.831.18
271929-03-31-4.2-5.67-4.070.780.82-1.44
281929-04-301.74-0.651.9-0.941.742.69
291929-05-31-11.22-8.92-11.78-3.41-4.53-6.93
301929-06-304.296.1310.383.087.7214.43
" + ], + "text/plain": [ + "Timematr{Date}(1064x6 DataFrame\n", + "| Row | x1 | x2 | x3 | x4 | x5 | x6 |\n", + "|------|-------|-------|-------|-------|-------|-------|\n", + "| 1 | -0.09 | 3.62 | 0.4 | -0.4 | 0.27 | 0.0 |\n", + "| 2 | 7.26 | 6.11 | 6.05 | 7.27 | 3.8 | 4.47 |\n", + "| 3 | -3.38 | -2.88 | -2.06 | -3.63 | -0.24 | 2.23 |\n", + "| 4 | -0.51 | -0.56 | 3.32 | -2.72 | 0.75 | 1.82 |\n", + "| 5 | 2.39 | 11.21 | 6.82 | 5.52 | 4.85 | 7.11 |\n", + "| 6 | -2.06 | -0.44 | -2.88 | -3.72 | -2.12 | -1.87 |\n", + "| 7 | 3.14 | 4.29 | 6.93 | 4.53 | 7.84 | 9.38 |\n", + "| 8 | 0.0 | -0.44 | 0.1 | 1.21 | 2.1 | 3.35 |\n", + "| 9 | 2.53 | 0.3 | 3.19 | 3.02 | 4.87 | 6.22 |\n", + "| 10 | -4.5 | -1.8 | -3.34 | -2.11 | -3.39 | -5.5 |\n", + "| 11 | 11.62 | 8.64 | 7.02 | 5.12 | 6.02 | 8.35 |\n", + "⋮\n", + "| 1053 | -6.65 | -4.81 | -5.93 | -1.94 | -1.6 | -1.67 |\n", + "| 1054 | 5.63 | 7.78 | 4.9 | 1.22 | 2.57 | 2.07 |\n", + "| 1055 | -0.63 | -0.26 | 0.44 | 2.43 | 2.19 | 3.39 |\n", + "| 1056 | 1.64 | 3.07 | 2.6 | -1.92 | 0.37 | -0.81 |\n", + "| 1057 | -7.42 | -4.43 | -1.82 | -3.31 | -4.52 | -1.39 |\n", + "| 1058 | 9.18 | 6.28 | 5.56 | 7.79 | 6.18 | 5.64 |\n", + "| 1059 | -0.98 | 1.66 | 2.68 | -3.01 | -1.62 | -0.63 |\n", + "| 1060 | 2.82 | -2.11 | -4.35 | 6.21 | 2.13 | -1.43 |\n", + "| 1061 | -1.08 | 0.88 | 4.51 | -2.96 | 1.11 | 3.3 |\n", + "| 1062 | -0.96 | 1.29 | 2.22 | -3.89 | -1.87 | -0.97 |\n", + "| 1063 | -8.93 | -1.24 | 0.5 | -7.48 | 2.44 | 3.55 |\n", + "| 1064 | -4.66 | -4.36 | -6.8 | -4.23 | -6.27 | -6.54 |,[1927-01-31,1927-02-28,1927-03-31,1927-04-30,1927-05-31,1927-06-30,1927-07-31,1927-08-31,1927-09-30,1927-10-31 … 2014-11-30,2014-12-31,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,2015-06-30,2015-07-31,2015-08-31])" + ] + }, + "execution_count": 6, "metadata": {}, - "source": [ - "As an example, we translate these variable names manually into the following names:" - ] - }, + "output_type": "execute_result" + } + ], + "source": [ + "data[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data processing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For a clean end result, one only needs to rename the individual variable names. The variable names can accessed from variable `varnames`. Note that the function assumes that the column names of all data sets are the same!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "code", - "collapsed": false, - "input": [ - "newVarnames = [:SmallLow, :SmallMed, :SmallHigh, :BigLow, :BigMed, :BigHigh]" - ], - "language": "python", + "data": { + "text/plain": [ + "2-element Array{Union(ASCIIString,UTF8String),1}:\n", + " \" Small Big \\r\\n\"\n", + " \" Low 2 High Low 2 High \\r\\n\"" + ] + }, + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 23, - "text": [ - "6-element Array{Symbol,1}:\n", - " :SmallLow \n", - " :SmallMed \n", - " :SmallHigh\n", - " :BigLow \n", - " :BigMed \n", - " :BigHigh " - ] - } - ], - "prompt_number": 23 - }, + "output_type": "execute_result" + } + ], + "source": [ + "varnames" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an example, we translate these variable names manually into the following names:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "code", - "collapsed": false, - "input": [ - "for ii=1:length(data)\n", - " rename!(data[ii].vals, names(data[ii].vals), newVarnames)\n", - "end" - ], - "language": "python", + "data": { + "text/plain": [ + "6-element Array{Symbol,1}:\n", + " :SmallLow \n", + " :SmallMed \n", + " :SmallHigh\n", + " :BigLow \n", + " :BigMed \n", + " :BigHigh " + ] + }, + "execution_count": 8, "metadata": {}, - "outputs": [], - "prompt_number": 24 - }, + "output_type": "execute_result" + } + ], + "source": [ + "newVarnames = [:SmallLow, :SmallMed, :SmallHigh, :BigLow, :BigMed, :BigHigh]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for ii=1:length(data)\n", + " rename!(data[ii].vals, names(data[ii].vals), newVarnames)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [ { - "cell_type": "code", - "collapsed": false, - "input": [ - "data[1]" - ], - "language": "python", + "data": { + "text/html": [ + "

Timematr{Date}

Dimensions: (1064, 6)

From: 1927-01-31, To: 2015-08-31

idxSmallLowSmallMedSmallHighBigLowBigMedBigHigh
11927-01-31-0.093.620.4-0.40.270.0
21927-02-287.266.116.057.273.84.47
31927-03-31-3.38-2.88-2.06-3.63-0.242.23
41927-04-30-0.51-0.563.32-2.720.751.82
51927-05-312.3911.216.825.524.857.11
61927-06-30-2.06-0.44-2.88-3.72-2.12-1.87
71927-07-313.144.296.934.537.849.38
81927-08-310.0-0.440.11.212.13.35
91927-09-302.530.33.193.024.876.22
101927-10-31-4.5-1.8-3.34-2.11-3.39-5.5
111927-11-3011.628.647.025.126.028.35
121927-12-310.375.744.531.231.73.46
131928-01-312.923.470.66-1.93-0.24-0.87
141928-02-29-3.5-1.92-4.77-0.81-1.04-1.58
151928-03-316.457.817.576.755.315.37
161928-04-308.210.425.9811.144.022.05
171928-05-312.762.374.01-1.211.063.24
181928-06-30-9.28-7.31-6.27-5.17-4.05-4.09
191928-07-31-1.67-0.571.99-0.280.781.56
201928-08-314.952.416.294.36.2610.21
211928-09-304.25.337.611.431.895.25
221928-10-310.30.927.290.770.783.88
231928-11-3010.0611.6710.558.7813.4912.15
241928-12-31-2.670.65-1.4-0.470.071.67
251929-01-31-0.112.192.843.885.25.79
261929-02-28-0.121.223.19-1.61-0.831.18
271929-03-31-4.2-5.67-4.070.780.82-1.44
281929-04-301.74-0.651.9-0.941.742.69
291929-05-31-11.22-8.92-11.78-3.41-4.53-6.93
301929-06-304.296.1310.383.087.7214.43
" + ], + "text/plain": [ + "Timematr{Date}(1064x6 DataFrame\n", + "| Row | SmallLow | SmallMed | SmallHigh | BigLow | BigMed | BigHigh |\n", + "|------|----------|----------|-----------|--------|--------|---------|\n", + "| 1 | -0.09 | 3.62 | 0.4 | -0.4 | 0.27 | 0.0 |\n", + "| 2 | 7.26 | 6.11 | 6.05 | 7.27 | 3.8 | 4.47 |\n", + "| 3 | -3.38 | -2.88 | -2.06 | -3.63 | -0.24 | 2.23 |\n", + "| 4 | -0.51 | -0.56 | 3.32 | -2.72 | 0.75 | 1.82 |\n", + "| 5 | 2.39 | 11.21 | 6.82 | 5.52 | 4.85 | 7.11 |\n", + "| 6 | -2.06 | -0.44 | -2.88 | -3.72 | -2.12 | -1.87 |\n", + "| 7 | 3.14 | 4.29 | 6.93 | 4.53 | 7.84 | 9.38 |\n", + "| 8 | 0.0 | -0.44 | 0.1 | 1.21 | 2.1 | 3.35 |\n", + "| 9 | 2.53 | 0.3 | 3.19 | 3.02 | 4.87 | 6.22 |\n", + "| 10 | -4.5 | -1.8 | -3.34 | -2.11 | -3.39 | -5.5 |\n", + "| 11 | 11.62 | 8.64 | 7.02 | 5.12 | 6.02 | 8.35 |\n", + "⋮\n", + "| 1053 | -6.65 | -4.81 | -5.93 | -1.94 | -1.6 | -1.67 |\n", + "| 1054 | 5.63 | 7.78 | 4.9 | 1.22 | 2.57 | 2.07 |\n", + "| 1055 | -0.63 | -0.26 | 0.44 | 2.43 | 2.19 | 3.39 |\n", + "| 1056 | 1.64 | 3.07 | 2.6 | -1.92 | 0.37 | -0.81 |\n", + "| 1057 | -7.42 | -4.43 | -1.82 | -3.31 | -4.52 | -1.39 |\n", + "| 1058 | 9.18 | 6.28 | 5.56 | 7.79 | 6.18 | 5.64 |\n", + "| 1059 | -0.98 | 1.66 | 2.68 | -3.01 | -1.62 | -0.63 |\n", + "| 1060 | 2.82 | -2.11 | -4.35 | 6.21 | 2.13 | -1.43 |\n", + "| 1061 | -1.08 | 0.88 | 4.51 | -2.96 | 1.11 | 3.3 |\n", + "| 1062 | -0.96 | 1.29 | 2.22 | -3.89 | -1.87 | -0.97 |\n", + "| 1063 | -8.93 | -1.24 | 0.5 | -7.48 | 2.44 | 3.55 |\n", + "| 1064 | -4.66 | -4.36 | -6.8 | -4.23 | -6.27 | -6.54 |,[1927-01-31,1927-02-28,1927-03-31,1927-04-30,1927-05-31,1927-06-30,1927-07-31,1927-08-31,1927-09-30,1927-10-31 … 2014-11-30,2014-12-31,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,2015-06-30,2015-07-31,2015-08-31])" + ] + }, + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "html": [ - "

Timematr{Date{ISOCalendar}}

Dimensions: (1049, 6)

From: 1927-01-31, To: 2014-05-31

idxSmallLowSmallMedSmallHighBigLowBigMedBigHigh
11927-01-310.013.790.39-0.630.230.0
21927-02-287.136.245.755.593.784.49
31927-03-31-3.26-2.95-2.3-7.66-0.222.29
41927-04-30-0.56-0.963.36-1.90.781.89
51927-05-312.4711.397.04.214.877.1
61927-06-30-2.13-0.89-2.3-0.74-2.09-1.86
71927-07-313.024.146.953.287.879.4
81927-08-31-0.020.040.091.292.113.35
91927-09-302.50.352.823.364.886.23
101927-10-31-4.34-1.87-3.52-4.54-3.39-5.5
111927-11-3011.528.566.985.16.618.35
121927-12-310.25.784.47.71.73.45
131928-01-313.253.620.73-2.17-0.47-0.86
141928-02-29-3.19-2.28-4.85-0.76-1.04-1.35
151928-03-316.257.367.836.985.3314.84
161928-04-307.8810.476.1411.244.052.03
171928-05-313.072.373.28-1.51.063.7
181928-06-30-9.31-6.97-6.14-5.19-4.07-4.34
191928-07-31-1.43-0.321.76-0.20.771.39
201928-08-314.524.186.584.46.219.67
211928-09-303.754.887.891.581.874.89
221928-10-310.081.395.480.850.783.69
231928-11-3010.7712.3810.558.6113.4812.42
241928-12-31-2.630.22-1.56-0.48-0.051.67
251929-01-310.042.013.123.924.945.74
261929-02-28-0.230.835.03-0.65-1.071.14
271929-03-31-4.39-5.66-4.18-0.240.92-1.52
281929-04-301.91-0.452.08-1.741.72.75
291929-05-31-10.67-9.54-13.01-9.19-4.48-6.89
301929-06-303.976.6611.365.017.6414.44
" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 25, - "text": [ - "Timematr{Date{ISOCalendar}}(1049x6 DataFrame\n", - "|-------|----------|----------|-----------|--------|--------|---------|\n", - "| Row # | SmallLow | SmallMed | SmallHigh | BigLow | BigMed | BigHigh |\n", - "| 1 | 0.01 | 3.79 | 0.39 | -0.63 | 0.23 | 0.0 |\n", - "| 2 | 7.13 | 6.24 | 5.75 | 5.59 | 3.78 | 4.49 |\n", - "| 3 | -3.26 | -2.95 | -2.3 | -7.66 | -0.22 | 2.29 |\n", - "| 4 | -0.56 | -0.96 | 3.36 | -1.9 | 0.78 | 1.89 |\n", - "| 5 | 2.47 | 11.39 | 7.0 | 4.21 | 4.87 | 7.1 |\n", - "| 6 | -2.13 | -0.89 | -2.3 | -0.74 | -2.09 | -1.86 |\n", - "| 7 | 3.02 | 4.14 | 6.95 | 3.28 | 7.87 | 9.4 |\n", - "| 8 | -0.02 | 0.04 | 0.09 | 1.29 | 2.11 | 3.35 |\n", - "| 9 | 2.5 | 0.35 | 2.82 | 3.36 | 4.88 | 6.23 |\n", - "| 10 | -4.34 | -1.87 | -3.52 | -4.54 | -3.39 | -5.5 |\n", - "| 11 | 11.52 | 8.56 | 6.98 | 5.1 | 6.61 | 8.35 |\n", - "\u22ee\n", - "| 1038 | -0.32 | 0.3 | 0.51 | -2.66 | -0.55 | -2.19 |\n", - "| 1039 | 7.23 | 7.29 | 8.27 | 4.74 | 5.08 | 7.07 |\n", - "| 1040 | -3.2 | -3.05 | -2.42 | -2.25 | -2.79 | -2.92 |\n", - "| 1041 | 5.05 | 5.76 | 7.45 | 1.93 | 3.86 | 5.58 |\n", - "| 1042 | 2.14 | 3.52 | 2.25 | 3.49 | 5.13 | 3.7 |\n", - "| 1043 | 4.84 | 4.97 | 4.47 | 2.35 | 3.18 | 3.46 |\n", - "| 1044 | 2.89 | 1.63 | 2.76 | 2.82 | 2.48 | 3.04 |\n", - "| 1045 | -2.48 | -3.55 | -2.59 | -5.0 | -3.35 | -1.51 |\n", - "| 1046 | 3.9 | 4.12 | 5.49 | 3.9 | 4.13 | 6.62 |\n", - "| 1047 | 0.61 | 1.5 | -1.2 | 2.02 | 1.63 | -2.72 |\n", - "| 1048 | -2.45 | -3.03 | -5.27 | 2.7 | 0.73 | -2.23 |\n", - "| 1049 | 0.64 | 0.89 | -0.5 | 0.61 | 2.4 | 3.43 |,[1927-01-31,1927-02-28,1927-03-31,1927-04-30,1927-05-31,1927-06-30,1927-07-31,1927-08-31,1927-09-30,1927-10-31 \u2026 2013-08-31,2013-09-30,2013-10-31,2013-11-30,2013-12-31,2014-01-31,2014-02-28,2014-03-31,2014-04-30,2014-05-31])" - ] - } - ], - "prompt_number": 25 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "data[1]" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 0.3.6", + "language": "julia", + "name": "julia-0.3" + }, + "language_info": { + "name": "julia", + "version": "0.3.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/src/getDataset/getFFF.jl b/src/getDataset/getFFF.jl index 04e2012..40d1b62 100644 --- a/src/getDataset/getFFF.jl +++ b/src/getDataset/getFFF.jl @@ -1,6 +1,6 @@ function getFFF() factorUrl = - "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily.zip" + "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_TXT.zip" (data, dataNames, varnames) = readFamaFrenchRaw(factorUrl) newVarnames = [:MktRf, :SMB, :HML, :RF] diff --git a/src/getDataset/getUMD.jl b/src/getDataset/getUMD.jl index 1d59f14..29305b3 100644 --- a/src/getDataset/getUMD.jl +++ b/src/getDataset/getUMD.jl @@ -1,5 +1,5 @@ function getUMD() - umdUrl = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_daily.zip" + umdUrl = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_daily_TXT.zip" (data, dataNames, varnames) = readFamaFrenchRaw(umdUrl) newVarnames = [:UMD] diff --git a/src/readFamaFrenchRaw.jl b/src/readFamaFrenchRaw.jl index cefe7b5..acd29b8 100644 --- a/src/readFamaFrenchRaw.jl +++ b/src/readFamaFrenchRaw.jl @@ -147,9 +147,10 @@ function downloadAndRemove(url::ASCIIString) run(`unzip $filepath -d $dirName`) - # get filename of unzipped file + # get filename of unzipped file; cut off _TXT from filename extInd = basename(url) |> - x -> searchindex(x, ".") + x -> searchindex(x, ".") |> + x -> x - 4 # read in file