# This is an example file which loads input data and plots it for you

## 1. Load some useful library for analysis

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

%matplotlib notebook

## 2. Load the data

In [30]:
df = pd.read_csv('data.csv').set_index('Date')

## 3. Some example plotting

In [31]:
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7fb16d19c0b8>

## 4. Exploration!

Looking at Data

In [32]:
print(df.head())
print(df.describe())
print(df.info())

                  Asset      Signal1     Signal2     Signal3     Signal4  \
Date                                                                       
2000-01-03  4258.481700  4237.827494  393.780879 -337.933192  659.763906   
2000-01-04  4260.246443  4222.822224  378.585650 -364.449284  648.940384   
2000-01-05  4262.254628  4213.824762  389.955615 -391.594831  641.329710   
2000-01-06  4289.405332  4239.016108  406.660516 -333.249594  683.822313   
2000-01-07  4293.746629  4232.818801  403.366787 -320.479903  682.553985   

               Signal5      Signal6     Signal7     Signal8  
Date                                                         
2000-01-03 -752.865453  1034.127659 -950.257818  198.264577  
2000-01-04 -732.505856  1042.256957 -977.358249  211.206055  
2000-01-05 -728.611976  1063.217724 -951.518386  198.595598  
2000-01-06 -729.657222  1079.316062 -932.785850  181.822031  
2000-01-07 -713.180569  1087.226725 -941.674136  197.875560  
             Asset      Signal1  

**Histograms of different columns**

In [33]:
df.hist(bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d163710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d0fb940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d112b70>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d12f080>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d0c65f8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d0dfb70>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d07c128>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d0936a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fb16d0abbe0>]],
      dtype=object)

In [23]:
df.hist("Asset", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb170405160>]],
      dtype=object)

In [34]:
df.hist("Signal1", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16cf98908>]],
      dtype=object)

In [24]:
df.hist("Signal2", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb1703b20b8>]],
      dtype=object)

In [35]:
df.hist("Signal3", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16cf75f28>]],
      dtype=object)

In [48]:
df.hist("Signal4", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb1673e45c0>]],
      dtype=object)

In [36]:
df.hist("Signal5", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16cf37b70>]],
      dtype=object)

In [26]:
df.hist("Signal6", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb17033be10>]],
      dtype=object)

In [37]:
df.hist("Signal7", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16ce836d8>]],
      dtype=object)

In [38]:
df.hist("Signal8", bins = 10)

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fb16ceadfd0>]],
      dtype=object)

**Observations**
1. The asset price seems to be log-normally distributed. This will be confirmed below.
2. Signals are distributed in different ways. Seems like Signal1 and Signal4 are mosst similarly distributed as the asset price.


**Checking distributions of returns**

In [47]:
Asset = df["Asset"].to_numpy()
Asset_Returns = 1.0* (Asset[1:] - Asset[:-1])/Asset[:-1]

hist_lens ,bins, patches  = plt.hist(Asset_Returns, bins = 20)
plt.xlabel("Returns")

def gaussian_func(x, mu, sigma):
    y = 1.0/math.sqrt(2*math.pi*(sigma**2))*np.exp(-0.5* (x - mu)**2 / sigma**2)
    return y
    

The above data gives confidence that the returns are normally distributed.

**Exploring Signals**

Let us see relations between individual signals and the Asset price

**Signal 1**


In [55]:

df[["Asset", "Signal1"]].plot()
plt.show()


s1 = df["Asset"]
s2 = df["Signal1"]

window_size = 100

roll_corr = s1.rolling(window_size).corr(s2)[window_size:]
print(roll_corr.to_numpy())

plt.figure()
plt.plot(roll_corr.to_numpy())
plt.show()

<IPython.core.display.Javascript object>

[ 0.82034856  0.81643942  0.81159478 ... -0.53432257 -0.55433712
 -0.57207627]


<IPython.core.display.Javascript object>

In [54]:

df[["Asset", "Signal1"]].plot()
plt.show()


s1 = df["Asset"]
s2 = df["Signal2"]

window_size = 100

roll_corr = s1.rolling(window_size).corr(s2)[window_size:]
print(roll_corr.to_numpy())

plt.plot(roll_corr.to_numpy())
plt.show()

<IPython.core.display.Javascript object>

[-0.2563949  -0.23038933 -0.19664295 ... -0.76776294 -0.74219801
 -0.7232191 ]
