# DATA ANALYSIS WITH PANDAS

In [7]:
import pandas as pd
import numpy as np

In [8]:
#Sample Dataframe
student = {'Name':['Gaurav','Tom','Chris','Fatima','Priya','Sudha','Francis'],
          'Marks':[89,67,78,98,87,69,79],
          'Gender':['Male','Male','Male','Female','Female','Female','Male']}
df1 = pd.DataFrame(student)
df1

Unnamed: 0,Name,Marks,Gender
0,Gaurav,89,Male
1,Tom,67,Male
2,Chris,78,Male
3,Fatima,98,Female
4,Priya,87,Female
5,Sudha,69,Female
6,Francis,79,Male


## Display Top3 Rows of the Dataset

In [10]:
df1.head(3)

Unnamed: 0,Name,Marks,Gender
0,Gaurav,89,Male
1,Tom,67,Male
2,Chris,78,Male


## Display Last3 Rows of the Dataset

In [12]:
df1.tail(3)

Unnamed: 0,Name,Marks,Gender
4,Priya,87,Female
5,Sudha,69,Female
6,Francis,79,Male


## Find the shape of your Dataset(Number of rows and Number of columns)

In [13]:
df1.shape

(7, 3)

In [14]:
print('Number of rows: ',df1.shape[0])
print('Number of columns: ',df1.shape[1])

Number of rows:  7
Number of columns:  3


## Get Information about our Dataset like Total number of rows, Total number of columns, Datatypes of each column and Memory Requiremnet

In [15]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    7 non-null      object
 1   Marks   7 non-null      int64 
 2   Gender  7 non-null      object
dtypes: int64(1), object(2)
memory usage: 300.0+ bytes


## Check Null values in the Dataset

In [16]:
df1.isnull()

Unnamed: 0,Name,Marks,Gender
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,False


In [20]:
# print the count of null value for columns
df1.isnull().sum()

Name      0
Marks     0
Gender    0
dtype: int64

In [21]:
# print the count of null value for columns when axis=0
df1.isnull().sum(axis = 0)

Name      0
Marks     0
Gender    0
dtype: int64

In [23]:
#print the count of null value for rows when axis = 1
df1.isnull().sum(axis = 1)

0    0
1    0
2    0
3    0
4    0
5    0
6    0
dtype: int64

## Get Overall Statistics about the Dataframe

In [24]:
# Only numerical values
df1.describe()

Unnamed: 0,Marks
count,7.0
mean,81.0
std,11.120552
min,67.0
25%,73.5
50%,79.0
75%,88.0
max,98.0


In [25]:
# ALl values
df1.describe(include = 'all')

Unnamed: 0,Name,Marks,Gender
count,7,7.0,7
unique,7,,2
top,Gaurav,,Male
freq,1,,4
mean,,81.0,
std,,11.120552,
min,,67.0,
25%,,73.5,
50%,,79.0,
75%,,88.0,


## Find unique values from the Gender column

In [26]:
df1['Gender'].unique()

array(['Male', 'Female'], dtype=object)

## Find the Number of unique values from the Gender column

In [28]:
df1['Gender'].nunique()

2

## Display the count of unique values in Gender column

In [29]:
df1['Gender'].value_counts()

Gender
Male      4
Female    3
Name: count, dtype: int64

## Find the total number of students having marks betwwen 80 to 100 (including) using betwwen method

In [31]:
df1[df1['Marks']>=80]

Unnamed: 0,Name,Marks,Gender
0,Gaurav,89,Male
3,Fatima,98,Female
4,Priya,87,Female


In [41]:
df1[(df1['Marks']>=80) & (df1['Marks']<=100)]

Unnamed: 0,Name,Marks,Gender
0,Gaurav,89,Male
3,Fatima,98,Female
4,Priya,87,Female


In [45]:
len(df1[(df1['Marks']>=80) & (df1['Marks']<=100)])

3

In [48]:
#between method
sum(df1['Marks'].between(80,100))

3

## Find average/max/min of marks

In [50]:
df1['Marks'].mean()

81.0

In [52]:
df1['Marks'].max()

98

In [53]:
df1['Marks'].min()

67

## Apply method

In [54]:
def percentage(x):
    return print(x,' %')

In [55]:
df1['Marks'].apply(percentage)

89  %
67  %
78  %
98  %
87  %
69  %
79  %


0    None
1    None
2    None
3    None
4    None
5    None
6    None
Name: Marks, dtype: object

In [79]:
# Add new column to DataFrame
df1['Percentage']= df1['Marks'].apply(percentage)

89  %
67  %
78  %
98  %
87  %
69  %
79  %


In [57]:
df1

Unnamed: 0,Name,Marks,Gender,Percentage
0,Gaurav,89,Male,
1,Tom,67,Male,
2,Chris,78,Male,
3,Fatima,98,Female,
4,Priya,87,Female,
5,Sudha,69,Female,
6,Francis,79,Male,


In [60]:
df1['Name'].apply(len)

0    6
1    3
2    5
3    6
4    5
5    5
6    7
Name: Name, dtype: int64

## Map Function

In [61]:
df1

Unnamed: 0,Name,Marks,Gender,Percentage
0,Gaurav,89,Male,
1,Tom,67,Male,
2,Chris,78,Male,
3,Fatima,98,Female,
4,Priya,87,Female,
5,Sudha,69,Female,
6,Francis,79,Male,


In [62]:
df1['Gender'].map({'Male':1, 'Female':0})

0    1
1    1
2    1
3    0
4    0
5    0
6    1
Name: Gender, dtype: int64

In [77]:
df1['IsMale'] = df1['Gender'].map({'Male':1, 'Female':0})

In [65]:
df1

Unnamed: 0,Name,Marks,Gender,Percentage,IsMale
0,Gaurav,89,Male,,1
1,Tom,67,Male,,1
2,Chris,78,Male,,1
3,Fatima,98,Female,,0
4,Priya,87,Female,,0
5,Sudha,69,Female,,0
6,Francis,79,Male,,1


## Drop the Columns

In [67]:
df1.drop('IsMale', axis =1)

Unnamed: 0,Name,Marks,Gender,Percentage
0,Gaurav,89,Male,
1,Tom,67,Male,
2,Chris,78,Male,
3,Fatima,98,Female,
4,Priya,87,Female,
5,Sudha,69,Female,
6,Francis,79,Male,


In [80]:
df1

Unnamed: 0,Name,Marks,Gender,IsMale,Percentage
0,Gaurav,89,Male,1,
1,Tom,67,Male,1,
2,Chris,78,Male,1,
3,Fatima,98,Female,0,
4,Priya,87,Female,0,
5,Sudha,69,Female,0,
6,Francis,79,Male,1,


In [81]:
df1.drop(['IsMale','Percentage'], axis= 1, inplace = True)

In [82]:
df1

Unnamed: 0,Name,Marks,Gender
0,Gaurav,89,Male
1,Tom,67,Male
2,Chris,78,Male
3,Fatima,98,Female
4,Priya,87,Female
5,Sudha,69,Female
6,Francis,79,Male


## Print name of the columns

In [83]:
df1.columns

Index(['Name', 'Marks', 'Gender'], dtype='object')

In [84]:
df1.index

RangeIndex(start=0, stop=7, step=1)

## Sort the DataFrame as per the marks


In [87]:
df1.sort_values(by ='Marks', ascending=False)

Unnamed: 0,Name,Marks,Gender
3,Fatima,98,Female
0,Gaurav,89,Male
4,Priya,87,Female
6,Francis,79,Male
2,Chris,78,Male
5,Sudha,69,Female
1,Tom,67,Male


In [93]:
df1.sort_values(by= ['Marks','Gender'], ascending = False)

Unnamed: 0,Name,Marks,Gender
3,Fatima,98,Female
0,Gaurav,89,Male
4,Priya,87,Female
6,Francis,79,Male
2,Chris,78,Male
5,Sudha,69,Female
1,Tom,67,Male


## Display Name and Marks of the female student only

In [94]:
df1['Gender'] =='Female'

0    False
1    False
2    False
3     True
4     True
5     True
6    False
Name: Gender, dtype: bool

In [95]:
df1[df1['Gender'] == 'Female']

Unnamed: 0,Name,Marks,Gender
3,Fatima,98,Female
4,Priya,87,Female
5,Sudha,69,Female


In [96]:
df1[df1['Gender']=='Female'][['Name','Marks']]

Unnamed: 0,Name,Marks
3,Fatima,98
4,Priya,87
5,Sudha,69


In [97]:
#isin method
df1['Gender'].isin(['Female'])

0    False
1    False
2    False
3     True
4     True
5     True
6    False
Name: Gender, dtype: bool

In [98]:
df1[df1['Gender'].isin(['Female'])]

Unnamed: 0,Name,Marks,Gender
3,Fatima,98,Female
4,Priya,87,Female
5,Sudha,69,Female


In [99]:
df1[df1['Gender'].isin(['Female'])][['Name','Marks']]

Unnamed: 0,Name,Marks
3,Fatima,98
4,Priya,87
5,Sudha,69


# The End