# Investor - Flow of Funds - US

### Introduction:

Special thanks to: https://github.com/rgrp for sharing the dataset.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

# visualization
import matplotlib.pyplot as plt

%matplotlib inline

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). 

### Step 3. Assign it to a variable called df

In [2]:
url = 'https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv'
df = pd.read_csv(url, sep=',')
df.head()

Unnamed: 0,Date,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
0,2012-12-05,-7426,-6060,-1367,-74,5317,4210,1107,-2183
1,2012-12-12,-8783,-7520,-1263,123,1818,1598,219,-6842
2,2012-12-19,-5496,-5470,-26,-73,103,3472,-3369,-5466
3,2012-12-26,-4451,-4076,-375,550,2610,3333,-722,-1291
4,2013-01-02,-11156,-9622,-1533,-158,2383,2103,280,-8931


In [3]:
df.tail()

Unnamed: 0,Date,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
39,2015-03-11,3911,-7,3918,851,1298,999,298,6059
40,2015-03-18,1948,-1758,3706,912,452,258,194,3312
41,2015-03-25,-1167,-4478,3311,538,2404,1701,703,1775
42,2015-04-01,-1527,-3307,1780,720,-1296,-1392,96,-2103
43,2015-04-08,1906,-1321,3227,250,1719,1906,-187,3875


### Step 4.  What is the frequency of the dataset?

In [4]:
# weekly data

df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df.dtypes

Date               datetime64[ns]
Total Equity                int64
Domestic Equity             int64
World Equity                int64
Hybrid                      int64
Total Bond                  int64
Taxable Bond                int64
Municipal Bond              int64
Total                       int64
dtype: object

In [5]:
df['Date'].diff().value_counts(normalize=True)*100

7 days      86.046512
21 days      4.651163
448 days     2.325581
56 days      2.325581
6 days       2.325581
43 days      2.325581
Name: Date, dtype: float64

### Step 5. Set the column Date as the index.

In [6]:
df.set_index('Date', inplace=True)
df.head(20)

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-05,-7426,-6060,-1367,-74,5317,4210,1107,-2183
2012-12-12,-8783,-7520,-1263,123,1818,1598,219,-6842
2012-12-19,-5496,-5470,-26,-73,103,3472,-3369,-5466
2012-12-26,-4451,-4076,-375,550,2610,3333,-722,-1291
2013-01-02,-11156,-9622,-1533,-158,2383,2103,280,-8931
2013-01-09,14817,7995,6821,2888,9766,7311,2455,27471
2014-04-02,3155,938,2217,265,3379,3129,250,6799
2014-04-09,5761,2080,3681,1482,1609,1448,161,8852
2014-04-16,2286,634,1652,1186,633,604,29,4105
2014-04-23,3530,1392,2138,1239,1984,1453,531,6753


### Step 6. What is the type of the index?

In [7]:
# it is a 'object' type
df.index

DatetimeIndex(['2012-12-05', '2012-12-12', '2012-12-19', '2012-12-26',
               '2013-01-02', '2013-01-09', '2014-04-02', '2014-04-09',
               '2014-04-16', '2014-04-23', '2014-04-30', '2014-05-07',
               '2014-05-14', '2014-05-21', '2014-05-28', '2014-06-04',
               '2014-06-11', '2014-06-18', '2014-06-25', '2014-07-02',
               '2014-07-09', '2014-07-30', '2014-08-06', '2014-08-13',
               '2014-08-20', '2014-08-27', '2014-09-03', '2014-09-10',
               '2014-11-05', '2014-11-12', '2014-11-19', '2014-11-25',
               '2015-01-07', '2015-01-14', '2015-01-21', '2015-01-28',
               '2015-02-04', '2015-02-11', '2015-03-04', '2015-03-11',
               '2015-03-18', '2015-03-25', '2015-04-01', '2015-04-08'],
              dtype='datetime64[ns]', name='Date', freq=None)

### Step 7. Set the index to a DatetimeIndex type

In [8]:
df.index = pd.to_datetime(df.index)
df.index

DatetimeIndex(['2012-12-05', '2012-12-12', '2012-12-19', '2012-12-26',
               '2013-01-02', '2013-01-09', '2014-04-02', '2014-04-09',
               '2014-04-16', '2014-04-23', '2014-04-30', '2014-05-07',
               '2014-05-14', '2014-05-21', '2014-05-28', '2014-06-04',
               '2014-06-11', '2014-06-18', '2014-06-25', '2014-07-02',
               '2014-07-09', '2014-07-30', '2014-08-06', '2014-08-13',
               '2014-08-20', '2014-08-27', '2014-09-03', '2014-09-10',
               '2014-11-05', '2014-11-12', '2014-11-19', '2014-11-25',
               '2015-01-07', '2015-01-14', '2015-01-21', '2015-01-28',
               '2015-02-04', '2015-02-11', '2015-03-04', '2015-03-11',
               '2015-03-18', '2015-03-25', '2015-04-01', '2015-04-08'],
              dtype='datetime64[ns]', name='Date', freq=None)

### Step 8.  Change the frequency to monthly, sum the values and assign it to monthly.

In [9]:
df.asfreq('M', 'pad').head(20)

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-31,-4451,-4076,-375,550,2610,3333,-722,-1291
2013-01-31,14817,7995,6821,2888,9766,7311,2455,27471
2013-02-28,14817,7995,6821,2888,9766,7311,2455,27471
2013-03-31,14817,7995,6821,2888,9766,7311,2455,27471
2013-04-30,14817,7995,6821,2888,9766,7311,2455,27471
2013-05-31,14817,7995,6821,2888,9766,7311,2455,27471
2013-06-30,14817,7995,6821,2888,9766,7311,2455,27471
2013-07-31,14817,7995,6821,2888,9766,7311,2455,27471
2013-08-31,14817,7995,6821,2888,9766,7311,2455,27471
2013-09-30,14817,7995,6821,2888,9766,7311,2455,27471


In [10]:
df_month = df.resample('M').sum()
df_month.head(20)

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-31,-26156,-23126,-3031,526,9848,12613,-2765,-15782
2013-01-31,3661,-1627,5288,2730,12149,9414,2735,18540
2013-02-28,0,0,0,0,0,0,0,0
2013-03-31,0,0,0,0,0,0,0,0
2013-04-30,0,0,0,0,0,0,0,0
2013-05-31,0,0,0,0,0,0,0,0
2013-06-30,0,0,0,0,0,0,0,0
2013-07-31,0,0,0,0,0,0,0,0
2013-08-31,0,0,0,0,0,0,0,0
2013-09-30,0,0,0,0,0,0,0,0


### Step 9. You will notice that it filled the dataFrame with months that don't have any data with zeros. Let's drop these rows.

In [13]:
df_month[df_month.abs().sum(axis=1) != 0]

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-31,-26156,-23126,-3031,526,9848,12613,-2765,-15782
2013-01-31,3661,-1627,5288,2730,12149,9414,2735,18540
2014-04-30,10842,1048,9794,4931,8493,7193,1300,24267
2014-05-31,-2203,-8720,6518,3172,13767,10192,3576,14736
2014-06-30,2319,-6546,8865,4588,9715,7551,2163,16621
2014-07-31,-7051,-11128,4078,2666,7506,7026,481,3122
2014-08-31,1943,-5508,7452,1885,1897,-1013,2910,5723
2014-09-30,-2767,-6596,3829,1599,3984,2479,1504,2816
2014-11-30,-2753,-7239,4485,729,14528,11566,2962,12502
2015-01-31,3471,-1164,4635,1729,7368,2762,4606,12569


In [14]:
(df_month==0)

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-31,False,False,False,False,False,False,False,False
2013-01-31,False,False,False,False,False,False,False,False
2013-02-28,True,True,True,True,True,True,True,True
2013-03-31,True,True,True,True,True,True,True,True
2013-04-30,True,True,True,True,True,True,True,True
2013-05-31,True,True,True,True,True,True,True,True
2013-06-30,True,True,True,True,True,True,True,True
2013-07-31,True,True,True,True,True,True,True,True
2013-08-31,True,True,True,True,True,True,True,True
2013-09-30,True,True,True,True,True,True,True,True


In [16]:
df_month = df_month.loc[~(df_month==0).all(axis=1)]
df_month.head()

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-31,-26156,-23126,-3031,526,9848,12613,-2765,-15782
2013-01-31,3661,-1627,5288,2730,12149,9414,2735,18540
2014-04-30,10842,1048,9794,4931,8493,7193,1300,24267
2014-05-31,-2203,-8720,6518,3172,13767,10192,3576,14736
2014-06-30,2319,-6546,8865,4588,9715,7551,2163,16621


### Step 10. Good, now we have the monthly data. Now change the frequency to year.

In [17]:
df_year = df_month.resample('Y').sum()
df_year.head()

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-12-31,-26156,-23126,-3031,526,9848,12613,-2765,-15782
2013-12-31,3661,-1627,5288,2730,12149,9414,2735,18540
2014-12-31,330,-44689,45021,19570,59890,44994,14896,79787
2015-12-31,15049,-10459,25508,7280,26028,17986,8041,48357


In [18]:
df_month_aux = df_month.copy()
df_month_aux['Year'] = df_month.index.year
df_month_aux

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2012-12-31,-26156,-23126,-3031,526,9848,12613,-2765,-15782,2012
2013-01-31,3661,-1627,5288,2730,12149,9414,2735,18540,2013
2014-04-30,10842,1048,9794,4931,8493,7193,1300,24267,2014
2014-05-31,-2203,-8720,6518,3172,13767,10192,3576,14736,2014
2014-06-30,2319,-6546,8865,4588,9715,7551,2163,16621,2014
2014-07-31,-7051,-11128,4078,2666,7506,7026,481,3122,2014
2014-08-31,1943,-5508,7452,1885,1897,-1013,2910,5723,2014
2014-09-30,-2767,-6596,3829,1599,3984,2479,1504,2816,2014
2014-11-30,-2753,-7239,4485,729,14528,11566,2962,12502,2014
2015-01-31,3471,-1164,4635,1729,7368,2762,4606,12569,2015


In [20]:
df_month_aux.groupby('Year').sum()

Unnamed: 0_level_0,Total Equity,Domestic Equity,World Equity,Hybrid,Total Bond,Taxable Bond,Municipal Bond,Total
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012,-26156,-23126,-3031,526,9848,12613,-2765,-15782
2013,3661,-1627,5288,2730,12149,9414,2735,18540
2014,330,-44689,45021,19570,59890,44994,14896,79787
2015,15049,-10459,25508,7280,26028,17986,8041,48357


### BONUS: Create your own question and answer it.