Collect numbers for the diameters of these planets (heavenly bodies) and store it as a Series object. Then given these two Series objects mass and diameter, compute the density of each planet.

In [None]:
import pandas as pd
import numpy as np

In [None]:
mass = pd.Series([0.33, 4.87, 5.97, 0.642, 1898, 568, 86.8, 102, 0.0146], 
                 index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])

In [None]:
diameter = pd.Series([4879.4, 12104, 12742, 6779, 139820, 116460, 50724, 49244, 2373.6], index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])

In [None]:
mass['PlanetX'] = 6

In [None]:
diameter

Mercury      4879.4
Venus       12104.0
Earth       12742.0
Mars         6779.0
Jupiter    139820.0
Saturn     116460.0
Uranus      50724.0
Neptune     49244.0
Pluto        2373.6
dtype: float64

In [None]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
PlanetX       6.0000
dtype: float64

In [None]:
density = pd.Series()

  """Entry point for launching an IPython kernel.


In [None]:
for planet in mass.index:
  density[planet] = (mass[planet]) / (np.pi * np.power(diameter[planet], 3)/6)

NameError: ignored

In [None]:
density

Series([], dtype: float64)

In [None]:
density = mass / (np.pi * np.power(diameter, 3)/6)

In [None]:
density

Earth      5.511412e-12
Jupiter    1.326141e-12
Mars       3.935859e-12
Mercury    5.425204e-12
Neptune    1.631329e-12
PlanetX             NaN
Pluto      2.085123e-12
Saturn     6.867827e-13
Uranus     1.270225e-12
Venus      5.244977e-12
dtype: float64

Given this density Series, replace all values which NaNs with the mean density of all planets

In [None]:
density.index

Index(['Earth', 'Jupiter', 'Mars', 'Mercury', 'Neptune', 'PlanetX', 'Pluto',
       'Saturn', 'Uranus', 'Venus'],
      dtype='object')

In [None]:
density_mean = np.mean(density)

In [None]:
%%time
for key in density.index:
  if pd.isnull(density[key]):
    density[key] = density_mean

CPU times: user 0 ns, sys: 206 µs, total: 206 µs
Wall time: 210 µs


In [None]:
density

Earth      5.511412e-12
Jupiter    1.326141e-12
Mars       3.935859e-12
Mercury    5.425204e-12
Neptune    1.631329e-12
PlanetX    3.013006e-12
Pluto      2.085123e-12
Saturn     6.867827e-13
Uranus     1.270225e-12
Venus      5.244977e-12
dtype: float64

In [None]:
%%time
density[pd.isnull(density)] = np.mean(density)

CPU times: user 3.48 ms, sys: 48 µs, total: 3.53 ms
Wall time: 3.45 ms


In [None]:
print(density)

Earth      5.511412e-12
Jupiter    1.326141e-12
Mars       3.935859e-12
Mercury    5.425204e-12
Neptune    1.631329e-12
PlanetX    3.013006e-12
Pluto      2.085123e-12
Saturn     6.867827e-13
Uranus     1.270225e-12
Venus      5.244977e-12
dtype: float64


In [None]:
%%time
replaced = np.nan_to_num(density, copy=False, nan = np.mean(density)) #alternative method 

CPU times: user 652 µs, sys: 0 ns, total: 652 µs
Wall time: 627 µs


array([5.51141237e-12, 1.32614097e-12, 3.93585875e-12, 5.42520373e-12,
       1.63132909e-12, 3.01300582e-12, 2.08512278e-12, 6.86782715e-13,
       1.27022490e-12, 5.24497707e-12])

Compare Dictionary with Series: 
- checking if some key is present
- summing values
- computing std

In [None]:
import pandas as pd
import numpy as np

In [None]:
my_dict = {}
N = 10000000
for i in range(N):
  my_dict[i] = i%10

In [None]:
my_series = pd.Series(my_dict)

In [None]:
M = 10000

In [None]:
arr = np.random.randint(0, N, M)

In [None]:
%%timeit
for i in arr:
  i in my_dict

100 loops, best of 5: 14.4 ms per loop


In [None]:
%%timeit
for i in arr:
  i in my_series

100 loops, best of 5: 13.5 ms per loop


In [None]:
%%timeit
sum(my_dict.values())

10 loops, best of 5: 111 ms per loop


In [None]:
%%timeit
np.sum(my_series)

100 loops, best of 5: 14.4 ms per loop


In [None]:
%%timeit
mean = sum(my_dict.values())/N
variance = sum([(x - mean)**2 for x in my_dict.values()])
std = variance ** 0.5

1 loop, best of 5: 1.88 s per loop


In [None]:
%%timeit
np.std(my_series)

10 loops, best of 5: 38.3 ms per loop


# **Nifty Case study**

In [None]:
import numpy as np
import pandas as pd

In [None]:
nifty = pd.read_csv('nifty.csv', index_col=0).iloc[:,0]

In [None]:
nifty

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 245, dtype: float64

In [None]:
nifty[1:]

Date
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
08-Jan-2019    10802.15
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 244, dtype: float64

In [None]:
nifty[:-1]

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
23-Dec-2019    12262.75
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
Name: Close, Length: 244, dtype: float64

In [None]:
nifty[1:] - nifty[:-1]

Date
01-Apr-2019    0.0
01-Aug-2019    0.0
01-Feb-2019    0.0
01-Jan-2019    NaN
01-Jul-2019    0.0
              ... 
31-Dec-2019    NaN
31-Jan-2019    0.0
31-Jul-2019    0.0
31-May-2019    0.0
31-Oct-2019    0.0
Name: Close, Length: 245, dtype: float64

In [None]:
nifty.values[1:] - nifty.values[:-1]

array([-1.1760e+02, -1.2025e+02,  5.5100e+01,  4.4450e+01,  3.0350e+01,
        5.3000e+01, -3.3550e+01, -2.6650e+01, -5.7350e+01,  1.4920e+02,
        3.5000e+00,  1.4900e+01,  1.7500e+00,  5.4900e+01, -3.9100e+01,
       -9.1250e+01,  1.8300e+01, -6.9250e+01, -1.1900e+02, -9.3500e+00,
       -4.0000e-01,  1.7915e+02,  6.2700e+01,  1.8600e+01,  2.2100e+01,
        1.2810e+02,  6.9500e+00, -1.2580e+02, -5.4800e+01, -5.7400e+01,
       -3.7750e+01, -4.7600e+01, -2.1650e+01, -8.3450e+01, -3.6600e+01,
        1.3110e+02,  5.4400e+01,  1.8000e+00,  8.8450e+01, -4.4800e+01,
       -2.8650e+01, -1.4150e+01,  7.1000e+01,  1.2395e+02,  6.5550e+01,
        5.2000e+00, -2.2800e+01,  1.3265e+02,  1.3315e+02,  4.0500e+01,
        1.5500e+00,  8.3600e+01,  3.5350e+01,  7.0200e+01, -1.1350e+01,
       -6.4150e+01, -1.0265e+02,  1.2900e+02, -3.8200e+01,  1.2495e+02,
        5.3900e+01,  4.5250e+01,  4.4050e+01, -6.9250e+01, -4.5950e+01,
        6.7950e+01, -6.1450e+01,  6.7450e+01, -8.7650e+01,  1.24

In [None]:
(nifty.values[1:] - nifty.values[:-1]) > 0

array([False, False,  True,  True,  True,  True, False, False, False,
        True,  True,  True,  True,  True, False, False,  True, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
       False, False, False, False, False, False, False, False,  True,
        True,  True,  True, False, False, False,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
       False, False, False,  True, False,  True,  True,  True,  True,
       False, False,  True, False,  True, False,  True,  True,  True,
        True, False, False, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False,  True, False,
        True,  True,  True, False,  True, False,  True,  True,  True,
       False,  True, False,  True, False, False,  True,  True,  True,
       False,  True, False, False,  True, False,  True, False, False,
        True,  True, False, False,  True,  True,  True,  True, False,
       False, False,

In [None]:
np.sum((nifty.values[1:] - nifty.values[:-1]) > 0)/len(nifty)

0.5265306122448979

1. Compute moving average of the last 5 days

2. Subset the data to include only data for Fridays

In [None]:
nifty

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 245, dtype: float64

In [None]:
d = pd.Timestamp(nifty.index[0])

In [None]:
d.dayofweek

1

In [None]:
new_index = map(pd.Timestamp, nifty.index)

In [None]:
new_nifty = pd.Series(nifty, index=new_index)

In [None]:
new_nifty

2019-01-01    10910.10
2019-01-02    10792.50
2019-01-03    10672.25
2019-01-04    10727.35
2019-01-07    10771.80
                ...   
2019-12-24    12214.55
2019-12-26    12126.55
2019-12-27    12245.80
2019-12-30    12255.85
2019-12-31    12168.45
Name: Close, Length: 245, dtype: float64

In [None]:
new_nifty.index[0]

Timestamp('2019-01-01 00:00:00')

In [None]:
new_nifty.rolling('5d').mean()

2019-01-01    10910.100000
2019-01-02    10851.300000
2019-01-03    10791.616667
2019-01-04    10775.550000
2019-01-07    10723.800000
                  ...     
2019-12-24    12249.700000
2019-12-26    12201.283333
2019-12-27    12212.412500
2019-12-30    12209.400000
2019-12-31    12223.366667
Name: Close, Length: 245, dtype: float64

In [None]:
day_of_week.index

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-07', '2019-01-08', '2019-01-09', '2019-01-10',
               '2019-01-11', '2019-01-14',
               ...
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24', '2019-12-26', '2019-12-27',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', length=245, freq=None)

In [None]:
day_of_week = new_nifty.copy()
for i in day_of_week.index:
  day_of_week[i] = i.dayofweek

In [None]:
day_of_week

2019-01-01    1.0
2019-01-02    2.0
2019-01-03    3.0
2019-01-04    4.0
2019-01-07    0.0
             ... 
2019-12-24    1.0
2019-12-26    3.0
2019-12-27    4.0
2019-12-30    0.0
2019-12-31    1.0
Name: Close, Length: 245, dtype: float64