# pandas 

- refers towards panel data and python data analysis
- developed in 2008 by McKinney
- Installation:
    - if PIP and python is already installed in your machine: 
        -  pip install pandas
- Note:
    - python distribution like anaconda already have pandas installed

- data structure in python:
    - data series
        one dimension
        like column in a table
    - dataframe
        2 dimension
        table with row and column


In [59]:
# importing pandas
import pandas as pd

In [2]:
# check version that will verify the availability of pandas
pd.__version__
# this can vary depending upon the version installed in your machine

'2.2.0'

# Series

- list as data

In [62]:

mydata = [11,22,33,55,11,66,77]
dataseries = pd.Series(mydata)
dataseries
# Note: 'S' in Series is upper case
# by default the index begins from zeroth index
# so 0 1 2 etcetera is being displayed

0    11
1    22
2    33
3    55
4    11
5    66
6    77
dtype: int64

- indexing(positive indexing) is allowed

In [63]:
dataseries[0], dataseries[1]


(11, 22)

- negative indexing is not allowed

In [64]:
dataseries[-1]

KeyError: -1

In [65]:
dataseries

0    11
1    22
2    33
3    55
4    11
5    66
6    77
dtype: int64

- slicing is allowed
- we can access the sub portion of the series

In [66]:
dataseries[1:5]

1    22
2    33
3    55
4    11
dtype: int64

In [67]:
dataseries[::]  

0    11
1    22
2    33
3    55
4    11
5    66
6    77
dtype: int64

In [68]:
dataseries[1:6]  
# start at 1 stop at 6 not including stop point

1    22
2    33
3    55
4    11
5    66
dtype: int64

In [69]:
dataseries[1:6:2]  
# start at 1 stop at 6 not including stop point update by step 2
# begins from 1
# add 2 on that, becomes 1 + 2 =3
# add 2 on current value, becomes 3 + 2= 5

1    22
3    55
5    66
dtype: int64

In [23]:
# update detail using slicing

In [70]:
dataseries[1:4]=[100,101,102]
dataseries
# index 1 to 3 is update to 100 101 and 102

0     11
1    100
2    101
3    102
4     11
5     66
6     77
dtype: int64

In [71]:
# update using same value
dataseries[1:4] = 999
dataseries
# index 1 to 3 is updated to same value 999

0     11
1    999
2    999
3    999
4     11
5     66
6     77
dtype: int64

In [72]:
# we have been working with default index
# we can customize the index 
mydata = [22,33,444,55]
series_with_own_index= pd.Series(mydata,['ind1','ind2','ind3','ind4'])
series_with_own_index

ind1     22
ind2     33
ind3    444
ind4     55
dtype: int64

In [None]:
# keys and values of Series

In [40]:
series_with_own_index.keys()

Index(['ind1', 'ind2', 'ind3', 'ind4'], dtype='object')

In [41]:
series_with_own_index.values

array([ 22,  33, 444,  55], dtype=int64)

In [75]:
series_with_own_index.sort_values()
# ascending order 

ind1     22
ind2     33
ind4     55
ind3    444
dtype: int64

In [78]:
# descending order
series_with_own_index.sort_values(ascending=False)

ind3    444
ind4     55
ind2     33
ind1     22
dtype: int64

In [79]:
series_with_own_index.sum()

554

In [80]:
series_with_own_index.max()

444

In [81]:
series_with_own_index.min()

22

In [82]:
series_with_own_index.mean()

138.5

In [83]:
series_with_own_index.median()

44.0

In [84]:
series_with_own_index

ind1     22
ind2     33
ind3    444
ind4     55
dtype: int64

In [86]:
# index that has highest value
# in this example 444 is highest value with index ind3 that is at 
# index number 2 (since indexing begins from zeroth index)

In [85]:
series_with_own_index.argmax()

2

In [87]:
test_data = [22,44,33,44,11,34,11]
user_series = pd.Series(test_data)
user_series

0    22
1    44
2    33
3    44
4    11
5    34
6    11
dtype: int64

In [90]:
# argmax displays the first occurance of the index with highest value

In [52]:
user_series.argmax()

1

In [92]:
user_series.count()

7

In [93]:
# dictionary as data

In [94]:
mydata = {
    'ram':77,
    'shyam':81,
    'sita':91
}

In [96]:
myseries = pd.Series(mydata)
myseries

ram      77
shyam    81
sita     91
dtype: int64

In [97]:
# use selected index only
# let's say we don't want to access all of the index
# we can customize it this way:
myseries = pd.Series(mydata,index=['ram','sita'])
myseries

ram     77
sita    91
dtype: int64

In [None]:
# Have great moments ahead
# Good times