# Pandas introduction
```sh
pip install pandas 
```

In [1]:
import pandas as pd 

In [2]:
series1 = pd.Series([1,2,3,4,5])
series1     # index and value at that index

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
s2 = pd.Series(["Sam","Elon","Arpan"])  # Zero based indexing
s2

0      Sam
1     Elon
2    Arpan
dtype: object

In [4]:
s3 = pd.Series(["Ram","Shyam","Arpan"],index=[1,2,3]) # Choice based Indexing
s3

1      Ram
2    Shyam
3    Arpan
dtype: object

In [5]:
s4 = pd.Series(["Ram","Shyam","Arpan"],index=["March","April","May"])
s4

March      Ram
April    Shyam
May      Arpan
dtype: object

In [6]:
s5 = pd.Series(["Ram","Shyam","Arpan"],index=["March","April",5])
s5

March      Ram
April    Shyam
5        Arpan
dtype: object

In [7]:
dict1 = { 
    "India" : "New Delhi", 
    "Sri Lanka" : "Columbo", 
    "Bangladesh" : "Dhaka" , 
    "Japan" : "Tokyo", 
    "USA": "Washington DC"
}
dict1

{'India': 'New Delhi',
 'Sri Lanka': 'Columbo',
 'Bangladesh': 'Dhaka',
 'Japan': 'Tokyo',
 'USA': 'Washington DC'}

In [8]:
s7 = pd.Series(dict)
s7

0    <class 'dict'>
dtype: object

## In pandas, we have 
- positional indexing (Starts from Zero )  
- labelled indexing (Done by User)


In [28]:
s8 = pd.Series([2,3,4], index=["Feb","Mar","April"])
print("s8 series :\n", s8)
print("\nValue at s8.April : ", s8['April'])
print(s8.__getitem__)


s8 series :
 Feb      2
Mar      3
April    4
dtype: int64

Value at s8.April :  4
<bound method Series.__getitem__ of Feb      2
Mar      3
April    4
dtype: int64>


In [29]:
print(s3)
print("\ns3[1:3]:\n",s3[1:3])

1      Ram
2    Shyam
3    Arpan
dtype: object

s3[1:3]:
 2    Shyam
3    Arpan
dtype: object


## Slicing

In [11]:
s9 = pd.Series(["New Delhi", "Washington DC", "London", "Paris"],index=["India","USA","UK","France"])
print(s9)
print("\nPositional Access:\n",s9[1:4])  # 1 to 3 (4-1)
print()
print("\nLabelled Access:\n",s9["USA":"France"])

India         New Delhi
USA       Washington DC
UK               London
France            Paris
dtype: object

Positional Access:
 USA       Washington DC
UK               London
France            Paris
dtype: object


Labelled Access:
 USA       Washington DC
UK               London
France            Paris
dtype: object


In [12]:
s9[::-1] # reverses the series

France            Paris
UK               London
USA       Washington DC
India         New Delhi
dtype: object

In [13]:
import numpy as np
series = pd.Series(np.arange(10,20,2))
print(series)
series11 = pd.Series(np.arange(10,20,2),index=["a","b","c","d","e"])
print(series11)

0    10
1    12
2    14
3    16
4    18
dtype: int64
a    10
b    12
c    14
d    16
e    18
dtype: int64


In [14]:
s10 = pd.Series(np.arange(10,20,2), index=np.arange(20,30,2))
s10

20    10
22    12
24    14
26    16
28    18
dtype: int64

In [15]:
print(series)
series[1:3] = 350
print(series)

0    10
1    12
2    14
3    16
4    18
dtype: int64
0     10
1    350
2    350
3     16
4     18
dtype: int64


In [16]:
series.name = "capitals" 
print(series)

newDict = pd.Series(dict1, name="capitals")
newDict

0     10
1    350
2    350
3     16
4     18
Name: capitals, dtype: int64


India             New Delhi
Sri Lanka           Columbo
Bangladesh            Dhaka
Japan                 Tokyo
USA           Washington DC
Name: capitals, dtype: object

In [17]:
newestDict = pd.Series(dict1)
newestDict.index.name = "country"
print(newestDict.values)
print(newestDict.empty)     # false

newestDict["Pakistan"] =  ""
newestDict["Afghanistan"] =  ""
newestDict2 = pd.Series()
print(newestDict2)
print(newestDict2.empty)    # true


['New Delhi' 'Columbo' 'Dhaka' 'Tokyo' 'Washington DC']
False
Series([], dtype: object)
True


In [18]:
s11 = pd.Series(np.arange(10,20,1))
print(s11)
print(s11.head())   # first 5 rows
print(s11.head(10)) # first 10 rows

0    10
1    11
2    12
3    13
4    14
5    15
6    16
7    17
8    18
9    19
dtype: int64
0    10
1    11
2    12
3    13
4    14
dtype: int64
0    10
1    11
2    12
3    13
4    14
5    15
6    16
7    17
8    18
9    19
dtype: int64


In [19]:
print(s11.count())      # Total elements
print("\nlast 5 values : \n", s11.tail())

10

last 5 values : 
 5    15
6    16
7    17
8    18
9    19
dtype: int64


In [20]:
new = pd.Series([10,'',20])
print(new.count())  # no. of element
print(new.size)     # no. of element

3
3


In [21]:
s12 = pd.Series([1,2,3,4,5],index=["a","b","c","d","e"])
s13 = pd.Series([10,20,-3,-4,-5],index=["z","v","a","c","e"])
print(s12)
print(s13)
s14 = s12 + s13
print(s14)

a    1
b    2
c    3
d    4
e    5
dtype: int64
z    10
v    20
a    -3
c    -4
e    -5
dtype: int64
a   -2.0
b    NaN
c   -1.0
d    NaN
e    0.0
v    NaN
z    NaN
dtype: float64


In [22]:
s = pd.Series([1, np.nan, 2, None, 3])
clean_s = s.dropna()        # removes nan values 
print(s)
print(clean_s)

0    1.0
1    NaN
2    2.0
3    NaN
4    3.0
dtype: float64
0    1.0
2    2.0
4    3.0
dtype: float64


In [23]:
s13 = pd.Series()
print(s13)
s13.empty

Series([], dtype: object)


True

In [24]:
data = pd.Series([10,np.nan,30])

print(data.isnull())        #identify missing values
print("\n", data.fillna(0)) # fills missing values with 0

0    False
1     True
2    False
dtype: bool

 0    10.0
1     0.0
2    30.0
dtype: float64
