# Basic Data Analysis

In [None]:
"""
                    BASIC DATA ANALYSIS
--> Basic Facts about loaded data set:
    --> .info() : returns column names, types of column names and not-nulls counts, total number of rows
        --> df.info(verbose=True) : will list out all the columns by their names and types
        --> df.info(show_counts=True) : will list out the null_counts of the columns 
    --> .describe() : returns mean, min, max, quartiles, standard deviation
                    --> By default, this method only includes numerical columns.
        --> df.describe(include='all') : which includes categorical columns
                    --> returns number of unique values 
                    --> returns most frequent values and their frequency
    --> df.nunique() : returns number of unique values 
    --> df.unique() : returns an array of unique values
    --> df.value_counts() : returns a Series of values and their frequency
    --> df.count() : gets the count in a series 
    --> df.mean()
    --> df.median()
    --> df.std()
    --> df.quantile()
"""

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('Morningstar - European Mutual Funds.csv')
df.head()

In [None]:
df.shape

In [None]:
df.iloc[:5, :]

In [None]:
df.info()

In [None]:
pd.options.display.max_info_columns=100

In [None]:
df.info()

In [None]:
df.info(verbose=True)

In [None]:
df.info(verbose=True, show_counts=True)

In [None]:
stats = df.describe()
stats

In [None]:
pd.options.display.max_columns=None

In [None]:
stats

In [None]:
stats = df.describe(include='all')
stats

In [None]:
stats['morningstar_category']

In [None]:
stats['fund_return_2018']

In [None]:
data = df.loc[:, ['ticker', 'fund_name', 'morningstar_category', 'fund_return_2018']]
data.head(7)

In [None]:
data = data.set_index('ticker')

In [None]:
data.head()

In [None]:
data.describe(include='all')

In [None]:
data['morningstar_category'].nunique()

In [None]:
data.nunique()

In [None]:
categories = data['morningstar_category'].unique()
print(categories)

In [97]:
sorted_categories = sorted(categories)

In [98]:
sorted_categories.sort()

In [99]:
for category in sorted_categories:
    print(category)

ASEAN Equity
Africa & Middle East Equity
Africa Equity
Alt - Currency
Alt - Event Driven
Alt - Global Macro
Alt - Long/Short Credit
Alt - Long/Short Equity - Europe
Alt - Long/Short Equity - Global
Alt - Long/Short Equity - Other
Alt - Long/Short Equity - UK
Alt - Long/Short Equity - US
Alt - Market Neutral - Equity
Alt - Multistrategy
Alt - Other
Alt - Relative Value Arbitrage
Alt - Systematic Futures
Alt - Volatility
Asia Allocation
Asia Bond
Asia Bond - Local Currency
Asia High Yield Bond
Asia ex-Japan Equity
Asia ex-Japan Small/Mid-Cap Equity
Asia-Pacific ex-Japan Equity
Asia-Pacific ex-Japan Equity Income
Asia-Pacific inc. Japan Equity
Australia & New Zealand Equity
BRIC Equity
Brazil Equity
CHF Aggressive Allocation
CHF Bond
CHF Bond - Short Term
CHF Cautious Allocation
CHF Moderate Allocation
CHF Money Market
Canada Equity
Capital Protected
China Equity
China Equity - A Shares
Commodities - Broad Agriculture
Commodities - Broad Basket
Convertible Bond - Europe
Convertible Bond -

In [100]:
cat_frequency = data['morningstar_category'].value_counts()
cat_frequency

Other Bond                        3418
Other Equity                      3209
Global Large-Cap Blend Equity     2054
Global Emerging Markets Equity    1822
GBP Moderate Allocation           1085
                                  ... 
Global Bond - GBP Biased             1
Global Bond - ILS                    1
Target Date 2011 - 2015              1
NOK Moderate Allocation              1
Guaranteed Funds                     1
Name: morningstar_category, Length: 261, dtype: int64

In [102]:
type(cat_frequency)

pandas.core.series.Series

In [104]:
cat_frequency.index

Index(['Other Bond', 'Other Equity', 'Global Large-Cap Blend Equity',
       'Global Emerging Markets Equity', 'GBP Moderate Allocation',
       'Alt - Multistrategy', 'Global Emerging Markets Bond',
       'US Large-Cap Blend Equity', 'Japan Large-Cap Equity',
       'GBP Moderately Adventurous Allocation',
       ...
       'Global Bond - NOK Hedged', 'NOK Cautious Allocation', 'Vietnam Equity',
       'EUR Aggressive Allocation', 'RMB High Yield Bond',
       'Global Bond - GBP Biased', 'Global Bond - ILS',
       'Target Date 2011 - 2015', 'NOK Moderate Allocation',
       'Guaranteed Funds'],
      dtype='object', length=261)

In [108]:
for cat, freq in cat_frequency.items():
    print(f'{freq}\t{cat}')

3418	Other Bond
3209	Other Equity
2054	Global Large-Cap Blend Equity
1822	Global Emerging Markets Equity
1085	GBP Moderate Allocation
899	Alt - Multistrategy
874	Global Emerging Markets Bond
828	US Large-Cap Blend Equity
780	Japan Large-Cap Equity
756	GBP Moderately Adventurous Allocation
713	Alt - Long/Short Credit
702	Europe Large-Cap Blend Equity
696	Global Equity Income
632	Global Large-Cap Growth Equity
626	Global Emerging Markets Bond - Local Currency
616	UK Large-Cap Equity
553	Asia ex-Japan Equity
546	GBP Moderately Cautious Allocation
531	Europe ex-UK Large-Cap Equity
527	USD Moderate Allocation
509	US Large-Cap Growth Equity
490	Global Bond
464	Global High Yield Bond
456	UK Equity Income
444	EUR Corporate Bond
442	USD High Yield Bond
423	Global Flexible Bond - GBP Hedged
419	GBP Corporate Bond
415	GBP Adventurous Allocation
415	Other Allocation
413	Global Emerging Markets Bond - EUR Biased
400	Global Flexible Bond - EUR Hedged
390	EUR Moderate Allocation - Global
381	UK Flex-

In [109]:
cat_frequency.sort_index()

ASEAN Equity                      63
Africa & Middle East Equity       53
Africa Equity                     46
Alt - Currency                    66
Alt - Event Driven               100
                                ... 
USD Inflation-Linked Bond         16
USD Moderate Allocation          527
USD Money Market                  35
USD Money Market - Short Term    288
Vietnam Equity                     2
Name: morningstar_category, Length: 261, dtype: int64

In [115]:
for cat, freq in cat_frequency.sort_index().items():
    print(f'{freq}\t{cat}')

63	ASEAN Equity
53	Africa & Middle East Equity
46	Africa Equity
66	Alt - Currency
100	Alt - Event Driven
200	Alt - Global Macro
713	Alt - Long/Short Credit
209	Alt - Long/Short Equity - Europe
114	Alt - Long/Short Equity - Global
43	Alt - Long/Short Equity - Other
103	Alt - Long/Short Equity - UK
87	Alt - Long/Short Equity - US
303	Alt - Market Neutral - Equity
899	Alt - Multistrategy
40	Alt - Other
21	Alt - Relative Value Arbitrage
128	Alt - Systematic Futures
121	Alt - Volatility
31	Asia Allocation
142	Asia Bond
111	Asia Bond - Local Currency
52	Asia High Yield Bond
553	Asia ex-Japan Equity
143	Asia ex-Japan Small/Mid-Cap Equity
331	Asia-Pacific ex-Japan Equity
260	Asia-Pacific ex-Japan Equity Income
88	Asia-Pacific inc. Japan Equity
15	Australia & New Zealand Equity
132	BRIC Equity
105	Brazil Equity
27	CHF Aggressive Allocation
28	CHF Bond
7	CHF Bond - Short Term
50	CHF Cautious Allocation
68	CHF Moderate Allocation
16	CHF Money Market
12	Canada Equity
8	Capital Protected
381	China 

In [118]:
data.describe()

Unnamed: 0,fund_return_2018
count,41580.0
mean,-4.740028
std,6.921632
min,-81.78
25%,-9.2
50%,-4.29
75%,-0.17
max,49.98


In [120]:
col = data['fund_return_2018']
col

ticker
0P00000AWF   -18.13
0P00000AYI   -14.11
0P00000BOW     3.26
0P00000ESH    -1.60
0P00000ESL    -6.79
              ...  
FOUSA088S1   -13.56
FOUSA08ML5   -18.80
FOUSA0905L    11.58
FOUSA09F2D     0.37
FOUSA09F2G     0.73
Name: fund_return_2018, Length: 49399, dtype: float64

In [125]:
col.count(), col.mean(), col.std()

(41580, -4.7400283790284075, 6.921632080857115)

In [126]:
col.min(), col.max()

(-81.78, 49.98)

In [128]:
col.quantile(0.25), col.quantile(0.5), col.quantile(0.75)

(-9.2, -4.29, -0.17)

In [129]:
col.quantile([0.25, 0.5, 0.75])

0.25   -9.20
0.50   -4.29
0.75   -0.17
Name: fund_return_2018, dtype: float64