In [1]:
# Series

import pandas as pd
import numpy as np

s = pd.Series([10, 12, 18, 22, 16, 21])
s

0    10
1    12
2    18
3    22
4    16
5    21
dtype: int64

In [2]:
dt = {'a':10, 'b':20, 'c':30, 'd':40}
data = pd.Series(dt)
data

a    10
b    20
c    30
d    40
dtype: int64

In [3]:
x = [10, 18, 22, 16.21, 30]
data1 = pd.Series(x, index=['a','b','c','d','e'])
data1

a    10.00
b    18.00
c    22.00
d    16.21
e    30.00
dtype: float64

In [4]:
data2 = pd.Series(dt, index=['a','b','c','d','e'])
data2

a    10.0
b    20.0
c    30.0
d    40.0
e     NaN
dtype: float64

In [5]:
web = {'Day':[1,2,3,4,5,6], 'Visitors':[1000,700,6000,1000,400,350], 'Bounce Rate':[20,20,23,15,10,34]}
df =pd.DataFrame(web)
df

Unnamed: 0,Day,Visitors,Bounce Rate
0,1,1000,20
1,2,700,20
2,3,6000,23
3,4,1000,15
4,5,400,10
5,6,350,34


In [6]:
df.head()  #Prints first 5 rows

Unnamed: 0,Day,Visitors,Bounce Rate
0,1,1000,20
1,2,700,20
2,3,6000,23
3,4,1000,15
4,5,400,10


In [7]:
df.tail()  #Prints last 5 rows

Unnamed: 0,Day,Visitors,Bounce Rate
1,2,700,20
2,3,6000,23
3,4,1000,15
4,5,400,10
5,6,350,34


In [8]:
# Descriptive Statistics of DataFrame
df.describe()

Unnamed: 0,Day,Visitors,Bounce Rate
count,6.0,6.0,6.0
mean,3.5,1575.0,20.333333
std,1.870829,2185.806487,8.115828
min,1.0,350.0,10.0
25%,2.25,475.0,16.25
50%,3.5,850.0,20.0
75%,4.75,1000.0,22.25
max,6.0,6000.0,34.0


In [9]:
#Prints the Standard Deviation for all columns in a DataFrame
df.std()

Day               1.870829
Visitors       2185.806487
Bounce Rate       8.115828
dtype: float64

In [10]:
#Prints the Standard Deviation for column 'Day'
ab = df.Day.std()
ab

1.8708286933869707

In [11]:
df.Day.mean()   #Prints the Mean for column 'Day'

3.5

In [12]:
#Prints the Mean for all columns in a DataFrame
df.mean()

Day               3.500000
Visitors       1575.000000
Bounce Rate      20.333333
dtype: float64

In [39]:
import numpy as np
import pandas as pd

# Defining a new DataFrame
df = pd.DataFrame({'col1' : ['Item0','Item0','Item1','Item1'],
                   'col2' : ['Gold','Bronze','Gold','Silver'],
                   'col3' : [1,2,np.nan,4]})
df

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
2,Item1,Gold,
3,Item1,Silver,4.0


In [14]:
df.dtypes

col1     object
col2     object
col3    float64
dtype: object

In [15]:
# To chwck number of rows and columns
df.shape

(4, 3)

In [16]:
len(df)  # To check number of columns

4

In [17]:
df.columns    #Prints Names of all Columns in a DataFrame

Index(['col1', 'col2', 'col3'], dtype='object')

In [18]:
#Displays Descriptive Statistics for all the Columns
df.describe()

Unnamed: 0,col3
count,3.0
mean,2.333333
std,1.527525
min,1.0
25%,1.5
50%,2.0
75%,3.0
max,4.0


In [19]:
#Sorting
#The sort_index function is used to sort the DataFrame by one of its axis indices.

df.sort_index(axis=1, ascending=False)

Unnamed: 0,col3,col2,col1
0,1.0,Gold,Item0
1,2.0,Bronze,Item0
2,,Gold,Item1
3,4.0,Silver,Item1


In [20]:
# Sort by one or multiple columns

df.sort_values(by=['col1'], ascending=False)

Unnamed: 0,col1,col2,col3
2,Item1,Gold,
3,Item1,Silver,4.0
0,Item0,Gold,1.0
1,Item0,Bronze,2.0


In [21]:
# Selecting/Querying
# Selects only the column name 'col1'

df.col1

0    Item0
1    Item0
2    Item1
3    Item1
Name: col1, dtype: object

In [22]:
df['col1']  #Same as previous

0    Item0
1    Item0
2    Item1
3    Item1
Name: col1, dtype: object

In [23]:
# Selects two columns

df[['col1','col2']]

Unnamed: 0,col1,col2
0,Item0,Gold
1,Item0,Bronze
2,Item1,Gold
3,Item1,Silver


In [24]:
# Selects second row

df.iloc[2]

col1    Item1
col2     Gold
col3      NaN
Name: 2, dtype: object

In [25]:
# Selects row from 1 to 3

df.iloc[1:3]

Unnamed: 0,col1,col2,col3
1,Item0,Bronze,2.0
2,Item1,Gold,


In [26]:
# First Row, First Column

df.iloc[0,0]

'Item0'

In [27]:
df.col3 > 0

0     True
1     True
2    False
3     True
Name: col3, dtype: bool

In [28]:
# Query by a single column value

df[df.col3 > 0]

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
3,Item1,Silver,4.0


In [29]:
# Query by a single column, if it is a list of prefered values
df[df['col2'].isin(['Gold','Silver'])]

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
2,Item1,Gold,
3,Item1,Silver,4.0


In [30]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,False,False
2,False,False,True
3,False,False,False


In [31]:
df.fillna(0)

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
2,Item1,Gold,0.0
3,Item1,Silver,4.0


In [32]:
df

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
2,Item1,Gold,
3,Item1,Silver,4.0


In [33]:
df['col3'] = df['col3'].fillna(df['col3'].mean())
df

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1.0
1,Item0,Bronze,2.0
2,Item1,Gold,2.333333
3,Item1,Silver,4.0


In [34]:
df = pd.DataFrame({'col1' : ['Item0','Item0','Item1','Item1'],
                   'col2' : ['Gold','Bronze','Gold',np.nan],
                   'col3' : [1,2,3,4]})
df

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1
1,Item0,Bronze,2
2,Item1,Gold,3
3,Item1,,4


In [35]:
df['col2'] = df['col2'].fillna(df['col2'].mode()[0])
df

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1
1,Item0,Bronze,2
2,Item1,Gold,3
3,Item1,Gold,4


In [36]:
df = pd.DataFrame({'col1' : ['Item0','Item0','Item1','Item1'],
                   'col2' : ['Gold','Bronze','Gold',np.nan],
                   'col3' : [1,2,3,4]})
df

Unnamed: 0,col1,col2,col3
0,Item0,Gold,1
1,Item0,Bronze,2
2,Item1,Gold,3
3,Item1,,4


In [37]:
df.isna()

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,True,False


In [38]:
df.isna().sum()

col1    0
col2    1
col3    0
dtype: int64