# EDA - Net Asset Value Forecasting

- Gain insights into the historical NAV data.
- Visualize trends, seasonality, and anomalies in the data.
- Identify key features influencing NAV.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
path = '../Data/Net Asset Value.csv'
data=pd.read_csv(path)
data.head()

Unnamed: 0,Scheme Name,Net Asset Value,Outstanding Number of Units,Nav Per Unit,Sale Price per Unit,Repurchase Price/Unit,Date Valued
0,Umoja Fund,302291686824.91,344671758.31,877.0422,877.0422,868.2718,30-12-2022
1,Wekeza Maisha Fund,6658727935.83,8978247.54,741.6512,741.6512,726.8182,30-12-2022
2,Watoto Fund,8426930098.23,15378315.7,547.9748,547.9748,542.4951,30-12-2022
3,Jikimu Fund,19122648898.31,120180812.85,159.1157,159.1157,155.9333,30-12-2022
4,Liquid Fund,559272074566.94,1632828600.86,342.5173,342.5173,342.5173,30-12-2022


In [3]:
df=data[['Scheme Name','Net Asset Value','Outstanding Number of Units','Nav Per Unit','Sale Price per Unit','Repurchase Price/Unit','Date Valued']]
df.head()

Unnamed: 0,Scheme Name,Net Asset Value,Outstanding Number of Units,Nav Per Unit,Sale Price per Unit,Repurchase Price/Unit,Date Valued
0,Umoja Fund,302291686824.91,344671758.31,877.0422,877.0422,868.2718,30-12-2022
1,Wekeza Maisha Fund,6658727935.83,8978247.54,741.6512,741.6512,726.8182,30-12-2022
2,Watoto Fund,8426930098.23,15378315.7,547.9748,547.9748,542.4951,30-12-2022
3,Jikimu Fund,19122648898.31,120180812.85,159.1157,159.1157,155.9333,30-12-2022
4,Liquid Fund,559272074566.94,1632828600.86,342.5173,342.5173,342.5173,30-12-2022


In [4]:
df.shape

(9088, 7)

In [5]:
df.dtypes

Scheme Name                     object
Net Asset Value                 object
Outstanding Number of Units     object
Nav Per Unit                   float64
Sale Price per Unit            float64
Repurchase Price/Unit          float64
Date Valued                     object
dtype: object

### data cleaning

In [6]:
import re
def clean_and_extract_number(s):
    cleaned_value = re.sub(r'[^\d.]', '', str(s))
    return cleaned_value
numeric_columns = ['Net Asset Value','Outstanding Number of Units']
for col in numeric_columns:
    df[col] = df[col].apply(clean_and_extract_number)
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9088 entries, 0 to 9087
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Scheme Name                  9088 non-null   object 
 1   Net Asset Value              9088 non-null   float64
 2   Outstanding Number of Units  9088 non-null   float64
 3   Nav Per Unit                 9088 non-null   float64
 4   Sale Price per Unit          9088 non-null   float64
 5   Repurchase Price/Unit        9088 non-null   float64
 6   Date Valued                  9088 non-null   object 
dtypes: float64(5), object(2)
memory usage: 497.1+ KB


In [7]:
df['Date Valued'] = df['Date Valued'].astype(str)
df['Date Valued'] = df['Date Valued'].str.replace('/', '-')  
df['Date Valued'] = pd.to_datetime(df['Date Valued'], format='%d-%m-%Y', errors='coerce')
df['Date Valued'] = pd.to_datetime(df['Date Valued'])
df.dtypes

Scheme Name                            object
Net Asset Value                       float64
Outstanding Number of Units           float64
Nav Per Unit                          float64
Sale Price per Unit                   float64
Repurchase Price/Unit                 float64
Date Valued                    datetime64[ns]
dtype: object

In [8]:
df.head()

Unnamed: 0,Scheme Name,Net Asset Value,Outstanding Number of Units,Nav Per Unit,Sale Price per Unit,Repurchase Price/Unit,Date Valued
0,Umoja Fund,302291700000.0,344671800.0,877.0422,877.0422,868.2718,2022-12-30
1,Wekeza Maisha Fund,6658728000.0,8978248.0,741.6512,741.6512,726.8182,2022-12-30
2,Watoto Fund,8426930000.0,15378320.0,547.9748,547.9748,542.4951,2022-12-30
3,Jikimu Fund,19122650000.0,120180800.0,159.1157,159.1157,155.9333,2022-12-30
4,Liquid Fund,559272100000.0,1632829000.0,342.5173,342.5173,342.5173,2022-12-30


### EDA

In [10]:
df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
Net Asset Value,9088.0,80818440558.51947,146107741.12,3525975795.38,19654065211.860001,177426121949.957489,570121620321.589966,112508941656.71744
Outstanding Number of Units,9088.0,381630616.964447,2668395.93,9766251.3375,145563536.325,359074889.4425,452173239578.73999,6165714658.568305
Nav Per Unit,9088.0,344.105499,101.3698,153.956925,321.21975,501.4478,877.0422,200.638451
Sale Price per Unit,9088.0,344.105413,101.3698,153.956925,321.21975,501.4478,877.0422,200.638543
Repurchase Price/Unit,9088.0,340.082981,101.3698,150.877825,316.07645,496.08325,868.2718,197.907457
Date Valued,9088.0,2019-11-18 02:25:36.971830784,2017-01-02 00:00:00,2018-02-12 00:00:00,2019-12-04 00:00:00,2021-06-21 00:00:00,2022-12-30 00:00:00,


In [11]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df['Net Asset Value'])
p_value = result[1]

if p_value < 0.05:
    print("Net Asset Value time series is likely stationary.")
else:
    print("Net Asset Value time series is likely non-stationary.")

Net Asset Value time series is likely stationary.


### handling outliers

In [12]:
#log transformation
df['Log_NAV'] = np.log(df['Net Asset Value'])

# Square Root Transformation
df['Sqrt_NAV'] = np.sqrt(df['Net Asset Value'])


# Display the first few rows to check the results
print(df[['Net Asset Value', 'Sqrt_NAV', 'Log_NAV']].head())

   Net Asset Value       Sqrt_NAV    Log_NAV
0     3.022917e+11  549810.591772  26.434658
1     6.658728e+09   81601.029012  22.619194
2     8.426930e+09   91798.312066  22.854698
3     1.912265e+10  138284.666172  23.674139
4     5.592721e+11  747844.953561  27.049902
