In [1]:
import pandas as pd
from pandas import DataFrame, Series

## Return a single row of series

In [2]:
s = Series([4,5,6,7])
s[2]

6

## Return the rows of Series with labels 1 and 3

In [10]:
s = Series([1,2,3,4])
print(s[[1,3]])

1    2
3    4
dtype: int64


## Creating an Explicit index based Series

In [12]:
s = Series([1,2,3,4,5], index = ['a','b','c','d','e'])
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [14]:
s[['a','c']]

a    1
c    3
dtype: int64

In [16]:
s[[0,1,3]]

a    1
b    2
d    4
dtype: int64

In [17]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [21]:
dates = pd.date_range('2023-01-01','2023-01-07')
dates

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06', '2023-01-07'],
              dtype='datetime64[ns]', freq='D')

In [22]:
temp1 = Series([40,50,55,57,60,65,54],index = dates)
temp1

2023-01-01    40
2023-01-02    50
2023-01-03    55
2023-01-04    57
2023-01-05    60
2023-01-06    65
2023-01-07    54
Freq: D, dtype: int64

In [24]:
round(temp1.mean(),2)

54.43

In [25]:
temp2 = Series([57,62,48,47,52,53,63],index = dates)
temp_diff = temp2-temp1
temp_diff

2023-01-01    17
2023-01-02    12
2023-01-03    -7
2023-01-04   -10
2023-01-05    -8
2023-01-06   -12
2023-01-07     9
Freq: D, dtype: int64

In [34]:
temp_diff[['2023-01-01','2023-01-04','2023-01-06']]

2023-01-01    17
2023-01-04   -10
2023-01-06   -12
dtype: int64

In [32]:
temp_diff[[2,1]]

2023-01-03    -7
2023-01-02    12
Freq: -1D, dtype: int64

In [35]:
temp_df = DataFrame({'Delhi':temp1,'Mumbai':temp2})
temp_df

Unnamed: 0,Delhi,Mumbai
2023-01-01,40,57
2023-01-02,50,62
2023-01-03,55,48
2023-01-04,57,47
2023-01-05,60,52
2023-01-06,65,53
2023-01-07,54,63


In [36]:
temp_df['Delhi']

2023-01-01    40
2023-01-02    50
2023-01-03    55
2023-01-04    57
2023-01-05    60
2023-01-06    65
2023-01-07    54
Freq: D, Name: Delhi, dtype: int64

In [38]:
temp_df[['Mumbai','Delhi']]

Unnamed: 0,Mumbai,Delhi
2023-01-01,57,40
2023-01-02,62,50
2023-01-03,48,55
2023-01-04,47,57
2023-01-05,52,60
2023-01-06,53,65
2023-01-07,63,54


In [40]:
temp_df.Delhi

2023-01-01    40
2023-01-02    50
2023-01-03    55
2023-01-04    57
2023-01-05    60
2023-01-06    65
2023-01-07    54
Freq: D, Name: Delhi, dtype: int64

In [41]:
temp_df.Mumbai

2023-01-01    57
2023-01-02    62
2023-01-03    48
2023-01-04    47
2023-01-05    52
2023-01-06    53
2023-01-07    63
Freq: D, Name: Mumbai, dtype: int64

In [43]:
temp_diffs = temp_df.Delhi-temp_df.Mumbai
temp_diffs

2023-01-01   -17
2023-01-02   -12
2023-01-03     7
2023-01-04    10
2023-01-05     8
2023-01-06    12
2023-01-07    -9
Freq: D, dtype: int64

In [44]:
temp_df['temp_diff'] = temp_diffs
temp_df

Unnamed: 0,Delhi,Mumbai,temp_diff
2023-01-01,40,57,-17
2023-01-02,50,62,-12
2023-01-03,55,48,7
2023-01-04,57,47,10
2023-01-05,60,52,8
2023-01-06,65,53,12
2023-01-07,54,63,-9


In [45]:
temp_df.columns

Index(['Delhi', 'Mumbai', 'temp_diff'], dtype='object')

In [46]:
temp_df.temp_diff[1:4]

2023-01-02   -12
2023-01-03     7
2023-01-04    10
Freq: D, Name: temp_diff, dtype: int64

In [54]:
temp_df.iloc[1].index

Index(['Delhi', 'Mumbai', 'temp_diff'], dtype='object')

## To locate a record in Data Frame by it's key

In [51]:
temp_df.loc['2023-01-03']

Delhi        55
Mumbai       48
temp_diff     7
Name: 2023-01-03 00:00:00, dtype: int64

### To access specifc row here, had use .iloc or .loc property as opposed to Series

In [57]:
temp_df.iloc[[1,2,4]]

Unnamed: 0,Delhi,Mumbai,temp_diff
2023-01-02,50,62,-12
2023-01-03,55,48,7
2023-01-05,60,52,8


In [58]:
temp_df.iloc[[1,2,4]].temp_diff

2023-01-02   -12
2023-01-03     7
2023-01-05     8
Name: temp_diff, dtype: int64

In [60]:
temp_df.Delhi > 55

2023-01-01    False
2023-01-02    False
2023-01-03    False
2023-01-04     True
2023-01-05     True
2023-01-06     True
2023-01-07    False
Freq: D, Name: Delhi, dtype: bool

## Boolean Selection

In [62]:
temp_df[temp_df.Delhi>55]

Unnamed: 0,Delhi,Mumbai,temp_diff
2023-01-04,57,47,10
2023-01-05,60,52,8
2023-01-06,65,53,12


## Loading Data from csv file

In [3]:
! type C:\Users\radha\Downloads\test1.csv

quarter,SER_REF,industry_code,industry_name,filled jobs,filled jobs revised,filled jobs diff,filled jobs % diff,total_earnings,total earnings revised,earnings diff,earnings % diff
2020.09,BDCQ.SEA1AA,A,"Agriculture, Forestry and Fishing",93195,93706,511,0.5,1205,1212,7,0.6
2020.09,BDCQ.SEA1BA,B,Mining,5340,5355,15,0.3,145,145,0,0
2020.09,BDCQ.SEA1CA,C,Manufacturing,217139,218967,1828,0.8,3791,3823,32,0.8
2020.09,BDCQ.SEA1DA,D,"Electricity, Gas, Water and Waste S",19886,20060,174,0.9,493,498,5,1
2020.09,BDCQ.SEA1EA,E,Construction,166413,168095,1682,1,2873,2906,33,1.1
2020.09,BDCQ.SEA1FA,F,Wholesale Trade,103816,104974,1158,1.1,1867,1892,25,1.3
2020.09,BDCQ.SEA1GA,G,Retail Trade,197373,198379,1006,0.5,2160,2173,13,0.6
2020.09,BDCQ.SEA1HA,H,Accommodation and Food Services,138259,139176,917,0.7,1089,1098,9,0.8
2020.09,BDCQ.SEA1IA,I,"Transport, Postal and Warehousing",84413,85499,1086,1.3,1520,1536,16,1.1
2020.09,BDCQ.SEA1JA,J,Information Media and Telecommunica,29086,30117,1031,3.5,611,634

## The same information into Data Frame object