In [2]:
import numpy as np
import pandas as pd
import math

# Create Series Objects

In [4]:
s = pd.Series([0,1,1,2,3,5,8])

In [5]:
s

0    0
1    1
2    1
3    2
4    3
5    5
6    8
dtype: int64

In [6]:
s = pd.Series([0.0,1,1,2,3,5,8])

In [7]:
s

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
5    5.0
6    8.0
dtype: float64

In [8]:
s.values

array([0., 1., 1., 2., 3., 5., 8.])

In [9]:
s.index

RangeIndex(start=0, stop=7, step=1)

In [10]:
for item in zip(s.index, s.values):
    print(item)

(0, 0.0)
(1, 1.0)
(2, 1.0)
(3, 2.0)
(4, 3.0)
(5, 5.0)
(6, 8.0)


In [11]:
s[0]

0.0

In [12]:
s[3]

2.0

In [33]:
mercury = pd.Series([0.33,57.9,4222.6], index=['mass', 'diameter', 'day length']) #Mass, diameter and data length

In [34]:
mercury

mass             0.33
diameter        57.90
day length    4222.60
dtype: float64

In [35]:
mercury['day length']

4222.6

In [36]:
mercury.mass # Dont use

0.33

In [20]:
arr = np.random.randint(0,10,10)

In [21]:
arr

array([9, 4, 4, 2, 6, 6, 6, 5, 4, 2])

In [24]:
index = np.arange(10,20)

In [25]:
randSeries = pd.Series(arr, index=index)

In [26]:
randSeries

10    9
11    4
12    4
13    2
14    6
15    6
16    6
17    5
18    4
19    2
dtype: int32

In [27]:
# mars = pd.Series([0.33,57.9,4222.6], index=['mass', 'diameter', 'day length'])
d = {}
d['mass'] = 0.33
d['diameter'] = 57.9
d['dayLength'] = 4222.6

In [28]:
d

{'mass': 0.33, 'diameter': 57.9, 'dayLength': 4222.6}

In [37]:
mercury = pd.Series(d)

In [38]:
mercury

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64

In [39]:
mercury = pd.Series(d, index=['mass', 'dayLength']) # To filter in values in series

In [40]:
mercury

mass            0.33
dayLength    4222.60
dtype: float64

## iLOC and LOC

In [41]:
s = pd.Series([0,1,1,2,3,5,8], index=[1,2,3,4,5,6,7])

In [42]:
s

1    0
2    1
3    1
4    2
5    3
6    5
7    8
dtype: int64

In [44]:
s.loc[4]

2

In [45]:
s.iloc[4]

3

In [46]:
s.iloc[-1]

8

In [47]:
s.iloc[0:2]

1    0
2    1
dtype: int64

In [48]:
 mercury.loc[:'day length']

mass            0.33
dayLength    4222.60
dtype: float64

In [100]:
mass = pd.Series([0.33, 4.87, 5.97, 0.642, 1898, 568, 86.8, 102, 0.0146], index = ['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])

In [101]:
print(mass)

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64


In [102]:
mass.iloc[1]

4.87

In [103]:
mass.loc['Earth']

5.97

In [104]:
mass['Earth': 'Jupiter']

Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64

In [105]:
mass.iloc[2:5]

Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64

In [106]:
mass>100

Mercury    False
Venus      False
Earth      False
Mars       False
Jupiter     True
Saturn      True
Uranus     False
Neptune     True
Pluto      False
dtype: bool

In [107]:
mass[mass>100]

Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64

In [108]:
mass[(mass>100) & (mass<600)]

Saturn     568.0
Neptune    102.0
dtype: float64

In [109]:
mass/10

Mercury      0.03300
Venus        0.48700
Earth        0.59700
Mars         0.06420
Jupiter    189.80000
Saturn      56.80000
Uranus       8.68000
Neptune     10.20000
Pluto        0.00146
dtype: float64

In [110]:
np.mean(mass)

296.29184444444445

In [111]:
np.amin(mass)

0.0146

In [112]:
np.amax(mass)

1898.0

In [113]:
np.median(mass)

5.97

In [114]:
mass + mass

Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64

In [115]:
mass - mass

Mercury    0.0
Venus      0.0
Earth      0.0
Mars       0.0
Jupiter    0.0
Saturn     0.0
Uranus     0.0
Neptune    0.0
Pluto      0.0
dtype: float64

In [116]:
big_mass = mass[mass>100]

In [117]:
big_mass

Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64

In [118]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64

In [119]:
new_mass = mass + big_mass

In [120]:
new_mass

Earth         NaN
Jupiter    3796.0
Mars          NaN
Mercury       NaN
Neptune     204.0
Pluto         NaN
Saturn     1136.0
Uranus        NaN
Venus         NaN
dtype: float64

In [121]:
new_mass[~pd.isnull(new_mass)]

Jupiter    3796.0
Neptune     204.0
Saturn     1136.0
dtype: float64

In [122]:
mass['Moon'] = 0.7346

In [123]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
Moon          0.7346
dtype: float64

In [124]:
mass.drop(['Pluto'])

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Moon          0.7346
dtype: float64

**Task 1**


Collect numbers for diameter of planets and store as series and calculate density of each planet

In [130]:
diameter = pd.Series([4879,	12104,	12756,	6792,	142984,120536,	51118,	49528,	2370, 3475.0], index = ['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto', 'Moon'])

In [131]:
diameter

Mercury      4879.0
Venus       12104.0
Earth       12756.0
Mars         6792.0
Jupiter    142984.0
Saturn     120536.0
Uranus      51118.0
Neptune     49528.0
Pluto        2370.0
Moon         3475.0
dtype: float64

In [142]:
volume = (1/6) * math.pi *diameter * diameter * diameter

In [143]:
volume

Mercury    6.081225e+10
Venus      9.285074e+11
Earth      1.086781e+12
Mars       1.640558e+11
Jupiter    1.530597e+15
Saturn     9.169570e+14
Uranus     6.993912e+13
Neptune    6.361375e+13
Pluto      6.970175e+09
Moon       2.197167e+10
dtype: float64

In [164]:
density = mass/volume

In [145]:
density

Mercury    5.426538e-12
Venus      5.244977e-12
Earth      5.493286e-12
Mars       3.913302e-12
Jupiter    1.240039e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Neptune    1.603427e-12
Pluto      2.094639e-12
Moon       3.343396e-11
dtype: float64

In [146]:
density1 = pd.Series([])

In [150]:
for planet in mass.index:
    density1[planet] = mass[planet]/ (np.pi * diameter[planet] * diameter[planet] * diameter[planet] / 6)

In [151]:
density1

Mercury    5.426538e-12
Venus      5.244977e-12
Earth      5.493286e-12
Mars       3.913302e-12
Jupiter    1.240039e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Neptune    1.603427e-12
Pluto      2.094639e-12
Moon       3.343396e-11
dtype: float64

In [154]:
density_eff = mass/ (np.pi * np.power(diameter,3)/6) # Most  efficient

In [155]:
density_eff

Mercury    5.426538e-12
Venus      5.244977e-12
Earth      5.493286e-12
Mars       3.913302e-12
Jupiter    1.240039e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Neptune    1.603427e-12
Pluto      2.094639e-12
Moon       3.343396e-11
dtype: float64

**Task 2**


Given density, replace NaNs with mean density of remaining planets

In [156]:
density_mean = np.mean(density)

In [158]:
density_mean


(6.031069152893776e-12, numpy.float64)

In [163]:
mass['PlanetX'] = 500

In [165]:
density

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX             NaN
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64

In [166]:
density[np.isnan(density)] = density_mean

In [167]:
density

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX    6.031069e-12
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64

**Task 3**


Compare the series way to dictionaries
- checking presence of a key
- summing values
- computing std


In [196]:
my_dict ={}
N = 1000000
for i in range(N):
    my_dict[i] = i %10

In [197]:
my_series = pd.Series(my_dict)

In [198]:
M = 10000


In [199]:
arr = np.random.randint(0,N,M)

In [200]:
%%timeit
for i in arr:
    (i in my_dict)

9.33 ms ± 718 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [201]:
%%timeit
for i in arr:
    i in my_series

32.2 ms ± 532 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


*Series takes more time to find an item than dictionary*


In [203]:
%%timeit
sum(my_dict.values())

12.1 ms ± 820 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [204]:
%%timeit 
np.sum(my_series)

652 µs ± 101 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


*Series sums faster*

In [214]:
%%timeit
mean = sum(my_dict.values())/N
var = sum([(x-mean)**2 for x in my_dict.values()])
std = var ** 0.5

326 ms ± 7.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [215]:
%%timeit 
np.std(my_series)

23.3 ms ± 1.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


*Series find standard deviation faster*

## NIFTY Case Study

In [4]:
nifty = pd.read_csv("C:/Users/apoor/Desktop/Programs/Python Programs/nifty.csv", index_col = 0).iloc[:,0]

In [5]:
nifty

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 245, dtype: float64

In [6]:
nifty.head(25)

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
08-Jan-2019    10802.15
09-Jan-2019    10855.15
10-Jan-2019    10821.60
11-Jan-2019    10794.95
14-Jan-2019    10737.60
15-Jan-2019    10886.80
16-Jan-2019    10890.30
17-Jan-2019    10905.20
18-Jan-2019    10906.95
21-Jan-2019    10961.85
22-Jan-2019    10922.75
23-Jan-2019    10831.50
24-Jan-2019    10849.80
25-Jan-2019    10780.55
28-Jan-2019    10661.55
29-Jan-2019    10652.20
30-Jan-2019    10651.80
31-Jan-2019    10830.95
01-Feb-2019    10893.65
04-Feb-2019    10912.25
Name: Close, dtype: float64

In [7]:
nifty.tail(25)

Date
26-Nov-2019    12037.70
27-Nov-2019    12100.70
28-Nov-2019    12151.15
29-Nov-2019    12056.05
02-Dec-2019    12048.20
03-Dec-2019    11994.20
04-Dec-2019    12043.20
05-Dec-2019    12018.40
06-Dec-2019    11921.50
09-Dec-2019    11937.50
10-Dec-2019    11856.80
11-Dec-2019    11910.15
12-Dec-2019    11971.80
13-Dec-2019    12086.70
16-Dec-2019    12053.95
17-Dec-2019    12165.00
18-Dec-2019    12221.65
19-Dec-2019    12259.70
20-Dec-2019    12271.80
23-Dec-2019    12262.75
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, dtype: float64

In [8]:
np.mean(nifty)

11432.632244897959

In [9]:
np.median(nifty)

11512.4

In [10]:
np.std(nifty)

453.28669474598075

Fraction of days did market close higher than the previous day's close

In [11]:
nifty[0]

10910.1

In [12]:
nifty[1]

10792.5

In [13]:
nifty[1:]

Date
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
08-Jan-2019    10802.15
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 244, dtype: float64

In [14]:
nifty[:-1]

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
23-Dec-2019    12262.75
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
Name: Close, Length: 244, dtype: float64

In [15]:
nifty[1:] - nifty[:-1]
# Wont work
# This is because during subtraction, the indices are matched and then subtraction

Date
01-Apr-2019    0.0
01-Aug-2019    0.0
01-Feb-2019    0.0
01-Jan-2019    NaN
01-Jul-2019    0.0
              ... 
31-Dec-2019    NaN
31-Jan-2019    0.0
31-Jul-2019    0.0
31-May-2019    0.0
31-Oct-2019    0.0
Name: Close, Length: 245, dtype: float64

In [18]:
nifty_diff = nifty.values[1:] - nifty.values[:-1]

In [19]:
np.sum(nifty_diff>0)/len(nifty)

0.5265306122448979

**Tasks**

1. Compute moving values of the last 5 days

2. Subset data to include data only for Friday

In [20]:
nifty.index[0]

'01-Jan-2019'

In [22]:
d = pd.Timestamp(nifty.index[0])

In [24]:
d.dayofweek # 0 is Monday, 1 is Tuesday

1

In [25]:
new_index = map(pd.Timestamp, nifty.index)

In [27]:
new_nifty = pd.Series(nifty, index=new_index)

In [28]:
new_nifty

2019-01-01    10910.10
2019-01-02    10792.50
2019-01-03    10672.25
2019-01-04    10727.35
2019-01-07    10771.80
                ...   
2019-12-24    12214.55
2019-12-26    12126.55
2019-12-27    12245.80
2019-12-30    12255.85
2019-12-31    12168.45
Name: Close, Length: 245, dtype: float64

In [30]:
new_nifty.index[5]

Timestamp('2019-01-08 00:00:00')

In [32]:
new_nifty.rolling('5d').mean()

2019-01-01    10910.100000
2019-01-02    10851.300000
2019-01-03    10791.616667
2019-01-04    10775.550000
2019-01-07    10723.800000
                  ...     
2019-12-24    12249.700000
2019-12-26    12201.283333
2019-12-27    12212.412500
2019-12-30    12209.400000
2019-12-31    12223.366667
Name: Close, Length: 245, dtype: float64

In [35]:
dow = new_nifty.copy()
for i in dow.index:
    dow[i] = i.dayofweek


In [36]:
dow

2019-01-01    1.0
2019-01-02    2.0
2019-01-03    3.0
2019-01-04    4.0
2019-01-07    0.0
             ... 
2019-12-24    1.0
2019-12-26    3.0
2019-12-27    4.0
2019-12-30    0.0
2019-12-31    1.0
Name: Close, Length: 245, dtype: float64

In [37]:
new_nifty[dow == 4]

2019-01-04    10727.35
2019-01-11    10794.95
2019-01-18    10906.95
2019-01-25    10780.55
2019-02-01    10893.65
2019-02-08    10943.60
2019-02-15    10724.40
2019-02-22    10791.65
2019-03-01    10863.50
2019-03-08    11035.40
2019-03-15    11426.85
2019-03-22    11456.90
2019-03-29    11623.90
2019-04-05    11665.95
2019-04-12    11643.45
2019-04-26    11754.65
2019-05-03    11712.25
2019-05-10    11278.90
2019-05-17    11407.15
2019-05-24    11844.10
2019-05-31    11922.80
2019-06-07    11870.65
2019-06-14    11823.30
2019-06-21    11724.10
2019-06-28    11788.85
2019-07-05    11811.15
2019-07-12    11552.50
2019-07-19    11419.25
2019-07-26    11284.30
2019-08-02    10997.35
2019-08-09    11109.65
2019-08-16    11047.80
2019-08-23    10829.35
2019-08-30    11023.25
2019-09-06    10946.20
2019-09-13    11075.90
2019-09-20    11274.20
2019-09-27    11512.40
2019-10-04    11174.75
2019-10-11    11305.05
2019-10-18    11661.85
2019-10-25    11583.90
2019-11-01    11890.60
2019-11-08 

## Test

In [44]:
data = [1,2,3,4]
s = pd.Series(data,index=[1,2,3,4])
b = (s>2).any()
b

True