## Pandas

In [28]:
import pandas as pd
import numpy as np

## Series

In [11]:
a = pd.Series([1,2,3,4,5])
print(a)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [12]:
# List
b = pd.Series([1,2,3,4,5] , index = ["a" ,"b" ,"c" ,"d" , "e"])
print(b)

a    1
b    2
c    3
d    4
e    5
dtype: int64


In [13]:
# Dictionary
c = pd.Series({"a" : 1 , "b" : 2 , "c" : 3})
print(c)

a    1
b    2
c    3
dtype: int64


In [14]:
d = pd.Series(5 , index = ["a" ,"b" ,"c" ,"d" , "e"])
print(d)

a    5
b    5
c    5
d    5
e    5
dtype: int64


### DataFrame

In [None]:
data = {
    'name' : ['pratham' , 'john' , 'henry' , 'shawn'],
    'age'  : [20 , 25 , 30 , 35]
}

df = pd.DataFrame(data , index = ["a" , "b" , "c" , "d"]) # default index is 0,1,2,3,..
print(df)

      name  age
a  pratham   20
b     john   25
c    henry   30
d    shawn   35


In [27]:
data = {
    'name' : ['pratham' , 'john' , 'henry' , 'shawn'],
    'age'  : [20 , 25 , 30 , 35]
}

df = pd.DataFrame(data , index = ["a" , "b" , "c" , "d"]) # default index is 0,1,2,3,..
print("Index : " , df.index)
print("\nIndex :\n" , df.name)
print("\nAge :\n" , df.age)
print("\nSpecific element :\n" , df.name.get("a"))

Index :  Index(['a', 'b', 'c', 'd'], dtype='object')

Index :
 a    pratham
b       john
c      henry
d      shawn
Name: name, dtype: object

Age :
 a    20
b    25
c    30
d    35
Name: age, dtype: int64

Specific element :
 pratham


## Inspecting Data or Viewing Data

In [57]:
data = {
    'name' : [ 'pratham' , 'suresh' , 'raj' , np.nan , 'amit'
              ,'neha' , 'vikas' , 'pooja' , 'ajay' , np.nan
              ,'sneha' , 'raj' , 'vivek' , 'john' , 'joker'
              , 'kalpesh' , 'sahil' , 'nikhil' , 'suresh' , 'hemat' 
              ],

    'age'   : [ 20 , 22 , 33 , np.nan , 64 , 88 , 23 , 34 , 11 , np.nan , 
               55 , 36 , 78 , 98 , np.nan , 67 , 65 , 87 , 12 , 45 
               ]
}

# df = pd.DataFrame(data , index = [i+1 for i in range(len(data['name']))])
df = pd.DataFrame(data )
print("Data :\n" , df)
print("\nTop 5 data entries :\n" , df.head())
print("\nBottom entries :\n" , df.tail(3))
df.info()

Data :
        name   age
0   pratham  20.0
1    suresh  22.0
2       raj  33.0
3       NaN   NaN
4      amit  64.0
5      neha  88.0
6     vikas  23.0
7     pooja  34.0
8      ajay  11.0
9       NaN   NaN
10    sneha  55.0
11      raj  36.0
12    vivek  78.0
13     john  98.0
14    joker   NaN
15  kalpesh  67.0
16    sahil  65.0
17   nikhil  87.0
18   suresh  12.0
19    hemat  45.0

Top 5 data entries :
       name   age
0  pratham  20.0
1   suresh  22.0
2      raj  33.0
3      NaN   NaN
4     amit  64.0

Bottom entries :
       name   age
17  nikhil  87.0
18  suresh  12.0
19   hemat  45.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    18 non-null     object 
 1   age     17 non-null     float64
dtypes: float64(1), object(1)
memory usage: 452.0+ bytes


In [58]:
print("\nTotal null values:\n" , df.isnull().sum())
print("\nSatistical Summary :\n" , df.describe())
print("\nDuplicate entries : ", df['name'].duplicated())
print("\nTotal Duplicate entries : ", df['name'].duplicated().sum())


Total null values:
 name    2
age     3
dtype: int64

Satistical Summary :
              age
count  17.000000
mean   49.294118
std    28.252798
min    11.000000
25%    23.000000
50%    45.000000
75%    67.000000
max    98.000000

Duplicate entries :  0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9      True
10    False
11     True
12    False
13    False
14    False
15    False
16    False
17    False
18     True
19    False
Name: name, dtype: bool

Total Duplicate entries :  3


In [62]:
print("\nSort by index :\n" , df.sort_index())
print("\nSort by values :\n" , df.sort_values('age'))
# print("\nSort by index :\n" , df.sort_index())


Sort by index :
        name   age
0   pratham  20.0
1    suresh  22.0
2       raj  33.0
3       NaN   NaN
4      amit  64.0
5      neha  88.0
6     vikas  23.0
7     pooja  34.0
8      ajay  11.0
9       NaN   NaN
10    sneha  55.0
11      raj  36.0
12    vivek  78.0
13     john  98.0
14    joker   NaN
15  kalpesh  67.0
16    sahil  65.0
17   nikhil  87.0
18   suresh  12.0
19    hemat  45.0

Sort by values :
        name   age
8      ajay  11.0
18   suresh  12.0
0   pratham  20.0
1    suresh  22.0
6     vikas  23.0
2       raj  33.0
7     pooja  34.0
11      raj  36.0
19    hemat  45.0
10    sneha  55.0
4      amit  64.0
16    sahil  65.0
15  kalpesh  67.0
12    vivek  78.0
17   nikhil  87.0
5      neha  88.0
13     john  98.0
3       NaN   NaN
9       NaN   NaN
14    joker   NaN


## Column Selection or Indexing

In [None]:
data = {
    'name' : [ 'pratham' , 'suresh' , 'raj' , np.nan , 'amit'
              ,'neha' , 'vikas' , 'pooja' , 'ajay' , np.nan
              ,'sneha' , 'raj' , 'vivek' , 'john' , 'joker'
              , 'kalpesh' , 'sahil' , 'nikhil' , 'suresh' , 'hemat' 
              ],

    'age'   : [ 20 , 22 , 33 , np.nan , 64 , 88 , 23 , 34 , 11 , np.nan , 
               55 , 36 , 78 , 98 , np.nan , 67 , 65 , 87 , 12 , 45 
               ]
}