# Viewing, Selecting, Assigning Data

In [55]:
import pandas as pd
import numpy as np

In [61]:
data = {
    'Name': ['Ankit', 'Pankaj', 'Shreya', 'Vipin', 'Priyanshi', 'Mukul', 'Abhishek'],
    'Scores': [79, 74, 85, 93, 81, 94, 67],
    'Age': [23, 21, 24, 22, 23, 22, 21],
    'Course': ['BTech', 'BCA', 'BSC', 'BTech', 'BCA', 'MCA', 'MBA']  # Added missing elements
}
print(data)
print(type(data))

df = pd.DataFrame(data)
print(df)

{'Name': ['Ankit', 'Pankaj', 'Shreya', 'Vipin', 'Priyanshi', 'Mukul', 'Abhishek'], 'Scores': [79, 74, 85, 93, 81, 94, 67], 'Age': [23, 21, 24, 22, 23, 22, 21], 'Course': ['BTech', 'BCA', 'BSC', 'BTech', 'BCA', 'MCA', 'MBA']}
<class 'dict'>
        Name  Scores  Age Course
0      Ankit      79   23  BTech
1     Pankaj      74   21    BCA
2     Shreya      85   24    BSC
3      Vipin      93   22  BTech
4  Priyanshi      81   23    BCA
5      Mukul      94   22    MCA
6   Abhishek      67   21    MBA


## Shape 

It is a property of dataframe or series that returns the number of rows and columns

In [63]:
df.shape
# no. rows = 7, no. of col = 4

(7, 4)

## Viewing columns

In [69]:
print(type(df.columns))
df.columns

<class 'pandas.core.indexes.base.Index'>


Index(['Name', 'Scores', 'Age', 'Course'], dtype='object')

## Viewing datatypes of each column

In [71]:
df

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA
5,Mukul,94,22,MCA
6,Abhishek,67,21,MBA


In [75]:
df.dtypes

Name      object
Scores     int64
Age        int64
Course    object
dtype: object

## Functions to select rows

## 1)head(n)
Returns top 5 rows by default if n is not passed, otherwise top n rows

In [77]:
df.head()

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA


In [79]:
df.head(2)

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA


## 2) tail(n)
Returns bottom 5 rows by default if n is not passed, otherwise bottom n rows

In [81]:
df.tail()

Unnamed: 0,Name,Scores,Age,Course
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA
5,Mukul,94,22,MCA
6,Abhishek,67,21,MBA


In [83]:
df.tail(2)

Unnamed: 0,Name,Scores,Age,Course
5,Mukul,94,22,MCA
6,Abhishek,67,21,MBA


## 3) sample(n)
Returns random 1 row by default if n is not passed, otherwise random n rows

In [85]:
df.sample()

Unnamed: 0,Name,Scores,Age,Course
6,Abhishek,67,21,MBA


In [87]:
df.sample(3)

Unnamed: 0,Name,Scores,Age,Course
6,Abhishek,67,21,MBA
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC


## Functions to describe dataframe

## 1)describe()
Provides statistical details about all the numerial columns in the dataframe

In [89]:
df.describe

<bound method NDFrame.describe of         Name  Scores  Age Course
0      Ankit      79   23  BTech
1     Pankaj      74   21    BCA
2     Shreya      85   24    BSC
3      Vipin      93   22  BTech
4  Priyanshi      81   23    BCA
5      Mukul      94   22    MCA
6   Abhishek      67   21    MBA>

## 2) inf0()
1. Print a concise summary of a dataframe
2. This method prints information about a dataframe including the index dtype and columns, non-null values and memory usage.

In [91]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    7 non-null      object
 1   Scores  7 non-null      int64 
 2   Age     7 non-null      int64 
 3   Course  7 non-null      object
dtypes: int64(2), object(2)
memory usage: 356.0+ bytes


## Count Related Functions

## 1)count()
Count non-null cells for each column or row.

In [93]:
df.count() # axis = 0 , column count 

Name      7
Scores    7
Age       7
Course    7
dtype: int64

df.count(axis=1)

## 2)value_counts()
Return a series containing counts of unique rows in the dataframe

In [97]:
df['Course'].value_counts()

Course
BTech    2
BCA      2
BSC      1
MCA      1
MBA      1
Name: count, dtype: int64

## 3) unique()
Returns unique values from a series

In [99]:
df['Course'].unique()

array(['BTech', 'BCA', 'BSC', 'MCA', 'MBA'], dtype=object)

## 4) nunique()
Count distinct observations over requested axis.

In [101]:
df['Course'].nunique()

5

## Manipulate dataframe

## 1) df.iloc
1. iloc is used for indexing or selecting based on position.i.e. by row number and column number. it accepets only ineteger. it is 0 based.
2. it is used to slice row s and columns. Roes and columns must be sliced with indexes starting from 0 and onwards. negative indexing is allowed, following the rules of indexing and slicing.

syntax<br>
df.iloc[start_r,end_r,step_r,start_c,end_c,step_c]

In [103]:
df

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA
5,Mukul,94,22,MCA
6,Abhishek,67,21,MBA


In [105]:
df.iloc[2:4,:] # start_r = 2, end_r = 4, step_r = 1

Unnamed: 0,Name,Scores,Age,Course
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech


In [107]:
df.iloc[3:,:] # start-r = 3

Unnamed: 0,Name,Scores,Age,Course
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA
5,Mukul,94,22,MCA
6,Abhishek,67,21,MBA


In [109]:
df.iloc[:3,2:] # end_r = 3, start c = 2
# r_no = 0,1,2 , col = ['Age','Course']

Unnamed: 0,Age,Course
0,23,BTech
1,21,BCA
2,24,BSC


In [111]:
df.iloc[:5,::2]

Unnamed: 0,Name,Age
0,Ankit,23
1,Pankaj,21
2,Shreya,24
3,Vipin,22
4,Priyanshi,23


In [113]:
df.iloc[1::2,2:]

Unnamed: 0,Age,Course
1,21,BCA
3,22,BTech
5,22,MCA


## df.loc
1. loc is used for indexing or selecting based on name i.e. by row name and column name (explicit index)
2. It can be used for filtering as well.
3. In left to right slicing, slicing is performed till end.

In [115]:
df.loc[:4,:]

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA


In [117]:
df.loc[2:5,:]

Unnamed: 0,Name,Scores,Age,Course
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA
5,Mukul,94,22,MCA


In [119]:
df.loc[2:5,['Name','Scores']]

Unnamed: 0,Name,Scores
2,Shreya,85
3,Vipin,93
4,Priyanshi,81
5,Mukul,94


In [121]:
df.loc[1:5:2,:]

Unnamed: 0,Name,Scores,Age,Course
1,Pankaj,74,21,BCA
3,Vipin,93,22,BTech
5,Mukul,94,22,MCA


In [125]:
df.loc[1:5:2,:]

Unnamed: 0,Name,Scores,Age,Course
1,Pankaj,74,21,BCA
3,Vipin,93,22,BTech
5,Mukul,94,22,MCA


In [123]:
df.loc[2:5,['Name','Scores']]

Unnamed: 0,Name,Scores
2,Shreya,85
3,Vipin,93
4,Priyanshi,81
5,Mukul,94


In [127]:
df.columns

Index(['Name', 'Scores', 'Age', 'Course'], dtype='object')

In [129]:
# df.iloc[:6:2,['Name','course']] # Error
df.iloc[:6:2,::3]

Unnamed: 0,Name,Course
0,Ankit,BTech
2,Shreya,BSC
4,Priyanshi,BCA


In [131]:
# df.loc[3:5,1:3] # Error
df.loc[3:5,['Name','Scores','Age']]

Unnamed: 0,Name,Scores,Age
3,Vipin,93,22
4,Priyanshi,81,23
5,Mukul,94,22


## Select column(s) from dataframe

## 1) Select one column from a dataframe

In [133]:
df.head()

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA


In [135]:
print(type(df['Age']))
df['Age']

<class 'pandas.core.series.Series'>


0    23
1    21
2    24
3    22
4    23
5    22
6    21
Name: Age, dtype: int64

## 2) Select multiple column from a dataframe

In [137]:
df2 = df[['Name','Age']]
print(type(df2))
df2.head()

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Name,Age
0,Ankit,23
1,Pankaj,21
2,Shreya,24
3,Vipin,22
4,Priyanshi,23


## Rename columns

## 1) Rename all columns

In [139]:
df.head()

Unnamed: 0,Name,Scores,Age,Course
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA


In [141]:
df.columns = ['Full_Name','Marks','Age_in_2022','Domian']
df.head()

Unnamed: 0,Full_Name,Marks,Age_in_2022,Domian
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA


## 2) Rename specific columns

In [148]:
df.columns

Index(['FirstName', 'Marks', 'Age_onJan2022', 'Domian'], dtype='object')

In [153]:
df.rename(columns={'Age_in_2022':'Age_onJan2022','Full_Name':'FirstName'},inplace = True)
df.head()

Unnamed: 0,FirstName,Marks,Age_onJan2022,Domian
0,Ankit,79,23,BTech
1,Pankaj,74,21,BCA
2,Shreya,85,24,BSC
3,Vipin,93,22,BTech
4,Priyanshi,81,23,BCA


In [155]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   FirstName      7 non-null      object
 1   Marks          7 non-null      int64 
 2   Age_onJan2022  7 non-null      int64 
 3   Domian         7 non-null      object
dtypes: int64(2), object(2)
memory usage: 356.0+ bytes
