### Pandas

* Pandas is a built in library using for data analysis. Pandas heavily used for Data Manipulations,Visualization,building machine learning models.

* Pandas implements a number of powerful data operations familiar to users of both database framework and spreadsheet programs.

* There are 2 main datastructures in pandas - Series and Dataframes. The default way to store data is dataframes, and manipulating dataframes quickly is probably the most important skill set for data analysis.

In [1]:
import pandas as pd

In [2]:
# check the version of pandas

pd.__version__

'1.0.1'

### pandas Series

* A series is similar to 1-D numpy array, and contains values of same type
(numeric,character,datetime etc..). A dataframe is simply a table where each column is a pandas series.

* Creating series
    * List
    * Tuple
    * Dictionary
    * Numpy
    * Date_Range
* Series indexing

In [3]:
# Creating pandas series using list

l=[10,20,30,40,50]

p=pd.Series(l)
p

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [4]:
# Creating series using tuple

t = (100,200,300)
t1=pd.Series(t)
t1

0    100
1    200
2    300
dtype: int64

In [6]:
# Reindexing

p=pd.Series([90,87,65,45,23],index=['a','b','c','d','e'])
p

a    90
b    87
c    65
d    45
e    23
dtype: int64

In [9]:
# Creating series using dictionary

d={'amar':89,'balu':90,'swapna':75,'raj':69,'ram':88}
d1=pd.Series(d)
d1

amar      89
balu      90
swapna    75
raj       69
ram       88
dtype: int64

In [10]:
d1.index=['st1','st2','st3','st4','st5']  # Reindexing
d1

st1    89
st2    90
st3    75
st4    69
st5    88
dtype: int64

In [11]:
# Getting values by using index

d1['st1']


89

In [12]:
d1['st4']

69

In [13]:
# Slicing

d1[1:3] #start, stop

st2    90
st3    75
dtype: int64

In [14]:
d1[1:4:2]   # start,stop,step

st2    90
st4    69
dtype: int64

In [19]:
import numpy as np    
d1.index=np.arange(20,25)    #Reindexing

In [18]:

d1

20    89
21    90
22    75
23    69
24    88
dtype: int64

### Note: always length index values is equal to the data values

In [20]:
a=pd.Series("APSSDC",index=[1,2,3,4,5])
a

1    APSSDC
2    APSSDC
3    APSSDC
4    APSSDC
5    APSSDC
dtype: object

In [None]:
## Creating series object having square of index values

0 -- 0
1 -- 1
2 -- 4
3 -- 9
.
.
.
.
10 -- 100


In [21]:
[x for x in range(11)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [22]:
s=pd.Series([x**2 for x in range(11)])
s

0       0
1       1
2       4
3       9
4      16
5      25
6      36
7      49
8      64
9      81
10    100
dtype: int64

In [26]:
num=int(input())

l={i:i**2 for i in range(1,num+1)}
sq=pd.Series(l)
sq

11


1       1
2       4
3       9
4      16
5      25
6      36
7      49
8      64
9      81
10    100
11    121
dtype: int64

In [27]:
np.square(np.arange(1,10))

array([ 1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

In [29]:
s=pd.Series(np.square(np.arange(1,5)),index=[1,2,3,4])
s

1     1
2     4
3     9
4    16
dtype: int32

In [31]:
## Date Range Method

dates=pd.date_range(start="2020-10-5",end="2020-10-12")
dates

DatetimeIndex(['2020-10-05', '2020-10-06', '2020-10-07', '2020-10-08',
               '2020-10-09', '2020-10-10', '2020-10-11', '2020-10-12'],
              dtype='datetime64[ns]', freq='D')

In [34]:
help(pd.date_range)

Help on function date_range in module pandas.core.indexes.datetimes:

date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None, closed=None, **kwargs) -> pandas.core.indexes.datetimes.DatetimeIndex
    Return a fixed frequency DatetimeIndex.
    
    Parameters
    ----------
    start : str or datetime-like, optional
        Left bound for generating dates.
    end : str or datetime-like, optional
        Right bound for generating dates.
    periods : int, optional
        Number of periods to generate.
    freq : str or DateOffset, default 'D'
        Frequency strings can have multiples, e.g. '5H'. See
        :ref:`here <timeseries.offset_aliases>` for a list of
        frequency aliases.
    tz : str or tzinfo, optional
        Time zone name for returning localized DatetimeIndex, for example
        'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
        timezone-naive.
    normalize : bool, default False
        Normalize start/

In [35]:
import calendar
import datetime
import time

In [40]:
num=np.arange(10)
s=pd.Series(num,index=np.arange(20,30))
s

20    0
21    1
22    2
23    3
24    4
25    5
26    6
27    7
28    8
29    9
dtype: int32

In [41]:
s[:2]

20    0
21    1
dtype: int32

In [42]:
### Fancy indexing

s[[20,27,29]]

20    0
27    7
29    9
dtype: int32

In [43]:
s

20    0
21    1
22    2
23    3
24    4
25    5
26    6
27    7
28    8
29    9
dtype: int32

In [44]:
d={"a":90,"b":67,"c":56,"d":np.nan,"y":"abc"}
dd=pd.Series(d)
dd

a     90
b     67
c     56
d    NaN
y    abc
dtype: object

In [45]:
dd['d']

nan

In [46]:
# Filling nullvalue with somevalue

dd['d']=99

In [47]:
dd

a     90
b     67
c     56
d     99
y    abc
dtype: object

In [48]:
# Creating Data Frame

l=[12,14,16,18,20,22]
df=pd.DataFrame(l)
df

Unnamed: 0,0
0,12
1,14
2,16
3,18
4,20
5,22


In [52]:
l=[[1,2,3],[4,5,6],[7,8,9]]
s=pd.Series(l)
s

0    [1, 2, 3]
1    [4, 5, 6]
2    [7, 8, 9]
dtype: object

In [53]:
df=pd.DataFrame(l)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [54]:
# Changing Row index 

df.index=["a","b","c"]
df

Unnamed: 0,0,1,2
a,1,2,3
b,4,5,6
c,7,8,9


In [56]:
# Changing column index

df.columns=[111,222,333]
df

Unnamed: 0,111,222,333
a,1,2,3
b,4,5,6
c,7,8,9


In [57]:
### Creating dataframe using series

df=pd.DataFrame(pd.Series([100,200,300],index=['x','y','z']))
df

Unnamed: 0,0
x,100
y,200
z,300


In [58]:
df.columns=[1]

In [59]:
df

Unnamed: 0,1
x,100
y,200
z,300


In [63]:
# creating dataframe using dictionary

d={"a":10,"b":78,"c":90,"d":76}
df1=pd.DataFrame(d,index=[1])
df1

Unnamed: 0,a,b,c,d
1,10,78,90,76


In [67]:
d={"a":pd.Series([10,20,30,40],index=[1,2,3,4]),
  "b":pd.Series([10,20,30],index=[1,2,3]),
   "c":pd.Series([20,30,40],index=[2,3,4])
  }
df=pd.DataFrame(d)
df

Unnamed: 0,a,b,c
1,10,10.0,
2,20,20.0,20.0
3,30,30.0,30.0
4,40,,40.0


In [69]:
df.columns=list("xyz")   #changing the column index
df

Unnamed: 0,x,y,z
1,10,10.0,
2,20,20.0,20.0
3,30,30.0,30.0
4,40,,40.0


In [70]:
# Creating data frame having squares and cubes

# Row having index values and columns having meaningful name/info about data


[i**2 for i in range(1,11)]

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [71]:
[i**3 for i in range(1,11)]

[1, 8, 27, 64, 125, 216, 343, 512, 729, 1000]

In [75]:
f=[[i**2 for i in range(1,11)],[j**3 for j in range(1,11)]]
df=pd.DataFrame(f,index=["squares","cubes"],columns=np.arange(1,11))
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
squares,1,4,9,16,25,36,49,64,81,100
cubes,1,8,27,64,125,216,343,512,729,1000


In [76]:
data=[{"squares":num**2,"cubes":num**3} for num in range(10,21)]
df1=pd.DataFrame(data,index=np.arange(10,21))
df1

Unnamed: 0,squares,cubes
10,100,1000
11,121,1331
12,144,1728
13,169,2197
14,196,2744
15,225,3375
16,256,4096
17,289,4913
18,324,5832
19,361,6859


In [77]:
s=[1,2,3,4]
d={'a':pd.Series([i**2 for i in s],index=s),
  'b':pd.Series([i**3 for i in s],index=s)
  }

df=pd.DataFrame(d)
df

Unnamed: 0,a,b
1,1,1
2,4,8
3,9,27
4,16,64
