### Basic working of Pandas

In [6]:
import pandas as pd
import numpy as np

In [3]:
pop=pd.Series([12000,11500,23000,34000,45000])

In [5]:
pop.name='Population'

In [6]:
pop


0    12000
1    11500
2    23000
3    34000
4    45000
Name: Population, dtype: int64

In [7]:
pop.index=['India','China','Canada','Japan','USA']

In [8]:
pop

India     12000
China     11500
Canada    23000
Japan     34000
USA       45000
Name: Population, dtype: int64

In [9]:
pop.dtypes

dtype('int64')

In [10]:
pop.info()

<class 'pandas.core.series.Series'>
Index: 5 entries, India to USA
Series name: Population
Non-Null Count  Dtype
--------------  -----
5 non-null      int64
dtypes: int64(1)
memory usage: 252.0+ bytes


In [13]:
type(pop.values)

numpy.ndarray

In [14]:
pop['India']

12000

In [15]:
pop[3]

34000

In [24]:
#Locating the value using indexing with iloc
pop.iloc[0], pop.iloc[-1]

(12000, 45000)

In [25]:
#During slicing in Pandas,the last value in slicing is also considered
pop['India':'Japan']

India     12000
China     11500
Canada    23000
Japan     34000
Name: Population, dtype: int64

### Condition Selection(Boolean Arrays)

In [26]:
pop

India     12000
China     11500
Canada    23000
Japan     34000
USA       45000
Name: Population, dtype: int64

In [27]:
#Returns the boolean value for the condition
pop>10000

India     True
China     True
Canada    True
Japan     True
USA       True
Name: Population, dtype: bool

In [28]:
#Returns the values where the conditions are met
pop[pop>10000]

India     12000
China     11500
Canada    23000
Japan     34000
USA       45000
Name: Population, dtype: int64

In [29]:
pop.mean()

25100.0

In [32]:
pop[(pop.mean()>pop)]

India     12000
China     11500
Canada    23000
Name: Population, dtype: int64

In [42]:
pop[pop<14000]=14000

In [43]:
pop

India     14000
China     14000
Canada    23000
Japan     34000
USA       45000
Name: Population, dtype: int64

### Creating a Dataframe

In [8]:
#Creates a new dataframe with columns and values

new=pd.DataFrame({'Salary':[10000,20000,30000,15000,13000],'Company':['TCS','Wipro','Infosys','Accenture','Mindtree']},
              columns=['Salary','Company'])

In [62]:
new

Unnamed: 0,Salary,Company
0,10000,TCS
1,20000,Wipro
2,30000,Infosys
3,15000,Accenture
4,13000,Mindtree


In [32]:
new.index=['Bangalore','Mumbai','Chennai','Lucknow','Noida']

In [70]:
new

Unnamed: 0,Salary,Company
Bangalore,10000,TCS
Mumbai,20000,Wipro
Chennai,30000,Infosys
Lucknow,15000,Accenture
Noida,13000,Mindtree


In [72]:
#Displays the column name in dataframe

new.columns

Index(['Salary', 'Company'], dtype='object')

In [73]:
#Displays the Index values in the dataframe

new.index

Index(['Bangalore', 'Mumbai', 'Chennai', 'Lucknow', 'Noida'], dtype='object')

In [74]:
#Displays information regarding the Data typr memory usage and non null count of column

new.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Bangalore to Noida
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Salary   5 non-null      int64 
 1   Company  5 non-null      object
dtypes: int64(1), object(1)
memory usage: 120.0+ bytes


In [76]:
#Displays the aggregation function of the column with integer or float values

new.describe()

Unnamed: 0,Salary
count,5.0
mean,17600.0
std,7829.431652
min,10000.0
25%,13000.0
50%,15000.0
75%,20000.0
max,30000.0


In [77]:
new.size

10

In [78]:
#Displays no of rows, no of columns in dataframe

new.shape

(5, 2)

In [79]:
#Displays data type of columns

new.dtypes

Salary      int64
Company    object
dtype: object

In [81]:
new.dtypes.value_counts()

int64     1
object    1
dtype: int64

In [88]:
#Displays the count of values in a certain column

new.Salary.value_counts()

10000    1
20000    1
30000    1
15000    1
13000    1
Name: Salary, dtype: int64

In [89]:
#Displays the count of datatype present in Dataframe

new.dtypes.value_counts()

int64     1
object    1
dtype: int64

### Indexing, Selection and Slicing 

In [9]:
new

Unnamed: 0,Salary,Company
0,10000,TCS
1,20000,Wipro
2,30000,Infosys
3,15000,Accenture
4,13000,Mindtree


In [98]:
# loc Lets you select Rows by Index

new.loc['Bangalore']

Salary     10000
Company      TCS
Name: Bangalore, dtype: object

In [101]:
#Displays only the selected index range

new.loc['Bangalore':'Chennai']

Unnamed: 0,Salary,Company
Bangalore,10000,TCS
Mumbai,20000,Wipro
Chennai,30000,Infosys


In [102]:
#Displays Only the selected column for given index range

new.loc['Bangalore':'Chennai','Salary']

Bangalore    10000
Mumbai       20000
Chennai      30000
Name: Salary, dtype: int64

In [96]:
# iloc Displays the value based on numeric indexing

new.iloc[2]

Salary       30000
Company    Infosys
Name: Chennai, dtype: object

In [104]:
#Displays only yhe selected sequential range values

new.iloc[[0,1,-1]]

Unnamed: 0,Salary,Company
Bangalore,10000,TCS
Mumbai,20000,Wipro
Noida,13000,Mindtree


In [105]:
new.iloc[2:4]

Unnamed: 0,Salary,Company
Chennai,30000,Infosys
Lucknow,15000,Accenture


In [97]:
#displays column and its index value

new['Salary']

Bangalore    10000
Mumbai       20000
Chennai      30000
Lucknow      15000
Noida        13000
Name: Salary, dtype: int64

In [106]:
#Displays Boolean value of each index as True where given condition is True and False where the given condition is false

new['Salary']>15000

Bangalore    False
Mumbai        True
Chennai       True
Lucknow      False
Noida        False
Name: Salary, dtype: bool

In [107]:
#Displayes the Rows where the given condition is true

new.loc[new['Salary']>15000]

Unnamed: 0,Salary,Company
Mumbai,20000,Wipro
Chennai,30000,Infosys


In [108]:
#Displayes the Rows where the given condition is true and it displas only values of column which is given inside the loc command

new.loc[new['Salary']>15000,'Company']

Mumbai       Wipro
Chennai    Infosys
Name: Company, dtype: object

### Dropping Methods

In [110]:
new.drop('Noida')

Unnamed: 0,Salary,Company
Bangalore,10000,TCS
Mumbai,20000,Wipro
Chennai,30000,Infosys
Lucknow,15000,Accenture


In [112]:
new.drop(columns=['Salary'])

Unnamed: 0,Company
Bangalore,TCS
Mumbai,Wipro
Chennai,Infosys
Lucknow,Accenture
Noida,Mindtree


In [113]:
new

Unnamed: 0,Salary,Company
Bangalore,10000,TCS
Mumbai,20000,Wipro
Chennai,30000,Infosys
Lucknow,15000,Accenture
Noida,13000,Mindtree


In [114]:
new.drop(['Chennai','Noida'])

Unnamed: 0,Salary,Company
Bangalore,10000,TCS
Mumbai,20000,Wipro
Lucknow,15000,Accenture


### Adding a new column to existing Dataframe 

In [62]:
#Creating a Series and adding index and Name 

langs=pd.Series(['Kannada','Marathi','Tamil','Hindi','Hindi'],index=['Bangalore','Mumbai','Chennai','Lucknow','Noida'], name='Languages')

In [67]:
#Connecting Series to the DataFrame

new['Language']=Langs

In [69]:
new['State']=['Karnataka','Maharsthra','Tamil Nadu','UP','MP']

In [70]:
new

Unnamed: 0,Salary,Company,State,Language
Bangalore,10000,TCS,Karnataka,Kannada
Mumbai,20000,Wipro,Maharsthra,Marathi
Chennai,30000,Infosys,Tamil Nadu,Tamil
Lucknow,15000,Accenture,UP,Hindi
Noida,13000,Mindtree,MP,Hindi


### Renaming Column name and Indices 

In [71]:
new.rename(index={'Bangalore':'BNG','Mumbai':'MUM','Chennai':'CHE','Lucknow':'LKN','Noida':'NOI'},columns={'Salary':'Monthly_Salary'})

Unnamed: 0,Monthly_Salary,Company,State,Language
BNG,10000,TCS,Karnataka,Kannada
MUM,20000,Wipro,Maharsthra,Marathi
CHE,30000,Infosys,Tamil Nadu,Tamil
LKN,15000,Accenture,UP,Hindi
NOI,13000,Mindtree,MP,Hindi


### Import Data from External Files

In [112]:
# Importing data from CSV file

df=pd.read_csv("automobile_data.csv")

In [113]:
df

Unnamed: 0,make,fuel_type,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,num_of_cylinders,engine_size,fuel_system,compression_ratio,horsepower,city_mpg,highway_mpg,price
0,alfa-romero,gas,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,9.0,111,21,27,13495
1,alfa-romero,gas,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,9.0,111,21,27,16500
2,alfa-romero,gas,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,2823,ohcv,six,152,mpfi,9.0,154,19,26,16500
3,audi,gas,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,70.0,102,24,30,13950
4,audi,gas,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,8.0,115,18,22,17450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.9,55.5,2952,ohc,four,141,mpfi,9.5,114,23,28,16845
199,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.8,55.5,3049,ohc,four,141,mpfi,8.7,160,19,25,19045
200,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3012,ohcv,six,173,mpfi,8.8,134,18,23,21485
201,volvo,diesel,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3217,ohc,six,145,idi,23.0,106,26,27,22470


In [114]:
df.shape

(203, 20)

In [115]:
#Displaying top 5 values rows

df.head()

Unnamed: 0,make,fuel_type,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,num_of_cylinders,engine_size,fuel_system,compression_ratio,horsepower,city_mpg,highway_mpg,price
0,alfa-romero,gas,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,9.0,111,21,27,13495
1,alfa-romero,gas,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,9.0,111,21,27,16500
2,alfa-romero,gas,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,2823,ohcv,six,152,mpfi,9.0,154,19,26,16500
3,audi,gas,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,70.0,102,24,30,13950
4,audi,gas,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,8.0,115,18,22,17450


In [116]:
#Displaying the bottom 5 rows 

df.tail()

Unnamed: 0,make,fuel_type,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,num_of_cylinders,engine_size,fuel_system,compression_ratio,horsepower,city_mpg,highway_mpg,price
198,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.9,55.5,2952,ohc,four,141,mpfi,9.5,114,23,28,16845
199,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.8,55.5,3049,ohc,four,141,mpfi,8.7,160,19,25,19045
200,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3012,ohcv,six,173,mpfi,8.8,134,18,23,21485
201,volvo,diesel,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3217,ohc,six,145,idi,23.0,106,26,27,22470
202,volvo,gas,four,sedan,rwd,front,109.1,188.8,68.9,55.5,3062,ohc,four,141,mpfi,9.5,114,19,25,22625


In [118]:
df.dtypes

make                  object
fuel_type             object
num_of_doors          object
body_style            object
drive_wheels          object
engine_location       object
wheel_base           float64
length               float64
width                float64
height               float64
curb_weight            int64
engine_type           object
num_of_cylinders      object
engine_size            int64
fuel_system           object
compression_ratio    float64
horsepower             int64
city_mpg               int64
highway_mpg            int64
price                  int64
dtype: object

In [119]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203 entries, 0 to 202
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   make               203 non-null    object 
 1   fuel_type          203 non-null    object 
 2   num_of_doors       201 non-null    object 
 3   body_style         203 non-null    object 
 4   drive_wheels       203 non-null    object 
 5   engine_location    203 non-null    object 
 6   wheel_base         203 non-null    float64
 7   length             203 non-null    float64
 8   width              203 non-null    float64
 9   height             203 non-null    float64
 10  curb_weight        203 non-null    int64  
 11  engine_type        203 non-null    object 
 12  num_of_cylinders   203 non-null    object 
 13  engine_size        203 non-null    int64  
 14  fuel_system        203 non-null    object 
 15  compression_ratio  203 non-null    float64
 16  horsepower         203 non

In [121]:
df.set_index('make',inplace=True)

In [123]:
df.loc['audi']

Unnamed: 0_level_0,fuel_type,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,curb_weight,engine_type,num_of_cylinders,engine_size,fuel_system,compression_ratio,horsepower,city_mpg,highway_mpg,price
make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
audi,gas,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,70.0,102,24,30,13950
audi,gas,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,8.0,115,18,22,17450
audi,gas,two,sedan,fwd,front,99.8,177.3,66.3,53.1,2507,ohc,five,136,mpfi,8.5,110,19,25,15250
audi,gas,four,sedan,fwd,front,105.8,192.7,71.4,55.7,2844,ohc,five,136,mpfi,8.5,110,19,25,17710
audi,gas,four,wagon,fwd,front,105.8,192.7,71.4,55.7,2954,ohc,five,136,mpfi,8.5,110,19,25,18920
audi,gas,four,sedan,fwd,front,105.8,192.7,71.4,55.9,3086,ohc,five,131,mpfi,8.3,140,17,20,23875
audi,gas,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,3053,ohc,five,131,mpfi,7.0,160,16,22,0
